In [33]:
import pandas as pd
import numpy as np

## Create a Series from List, Array and Dict

In [7]:
iList=list("abcdefghijklmnopqrstuvwxyz")
iArr=np.arange(26)
iDict=dict(zip(iList,iArr))
#print(iList)
#print(iArr)
#print(iDict)
ps1=pd.Series(iList)
ps2=pd.Series(iArr)
ps3=pd.Series(iDict)
print("Series 3: {}".format(ps3.head()))

Series 3: a    0
b    1
c    2
d    3
e    4
dtype: int64


## Convert index of series into column of a dataframe

In [11]:
df=ps3.to_frame().reset_index()
print("Data Frame is \n {}".format(df.head()))

Data Frame is 
   index  0
0     a  0
1     b  1
2     c  2
3     d  3
4     e  4


## Combine Series to form a dataframe

In [18]:
ps1=pd.Series(list("abcdefghijklmnopqrstuvwxyz"))
ps2=pd.Series(np.arange(26))

df=pd.concat([ps1,ps2],axis=1)
print(df.head())

df=pd.DataFrame({'col1': ps1,'col2':ps2})
print(df.head())

   0  1
0  a  0
1  b  1
2  c  2
3  d  3
4  e  4
  col1  col2
0    a     0
1    b     1
2    c     2
3    d     3
4    e     4


## Assign name to the series index 

In [20]:
ps1=pd.Series(list("abcdefghijklmnopqrstuvwxyz"))
ps1.name="Albhabets"
ps1.head()

0    a
1    b
2    c
3    d
4    e
Name: Albhabets, dtype: object

## Get items of series 1 not present in series 2

In [25]:
ps1=pd.Series([1,2,3,4,5])
ps2=pd.Series([4,5,6,7,8])
ps1[~ps1.isin(ps2)]

0    1
1    2
2    3
dtype: int64

## Get items not common to both Series 1 and Series 2

In [30]:
ps1=pd.Series([1,2,3,4,5])
ps2=pd.Series([4,5,6,7,8])
psUnion=pd.Series(np.union1d(ps1,ps2))
psIntersection=pd.Series(np.intersect1d(ps1,ps2))
psUnion[~psUnion.isin(psIntersection)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

## Compute Min, 25th percentile, median, 75th percentile and Max of a numeric series

In [44]:
ps1=pd.Series(np.random.normal(10,5,25))
#print(ps1)
state=np.random.RandomState(100)
ps2=pd.Series(state.normal(10,5,25))
#print(ps2)
print(np.percentile(ps1,q=[0,25,50,75,100]))
print(np.percentile(ps2,q=[0,25,50,75,100]))

[ 0.44029696  6.02726793  9.15643782 12.24956544 18.41626156]
[ 1.25117263  7.70986507 10.92259345 13.36360403 18.0949083 ]


## Get frequency counts of unique items of a series

In [48]:
ps1=pd.Series(np.take(list("abcdefgh"),np.random.randint(8,size=30)))
#print(ps1)
print("Value Count: \n {}".format(ps1.value_counts()))

Value Count: 
 c    7
f    6
a    4
h    4
e    3
d    3
b    2
g    1
dtype: int64


### How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

In [16]:
#np.random.RandomState(100)
ps1=pd.Series(np.random.randint(1,5,12))
print("Top 2 Frequent: \n {}".format(ps1.value_counts()))
ps1[~ps1.isin(ps1.value_counts().index[:2])]="Other"
print("Series {}".format(ps1))

Top 2 Frequent: 
 3    4
2    3
1    3
4    2
dtype: int64
Int64Index([3, 2], dtype='int64')
Series 0         2
1         2
2     Other
3     Other
4         3
5     Other
6         3
7     Other
8     Other
9         3
10        2
11        3
dtype: object


## How to bin a numeric series to 10 groups of equal size?

In [27]:
ps1=pd.Series(np.random.random(20))
print(ps1.head())
print(pd.qcut(ps1,q=[0,.10,.20,.30,.40,.50,.60,.70,.80,.90,1],labels=['1st','2nd','3rd','4th','5th','6th','7th','8th','9th','10th']).head())

state=np.random.RandomState(100)
ps2=pd.Series(state.normal(0,100,10))
print(ps2.head())
print(pd.qcut(ps2,q=[0,.10,.20,.30,.40,.50,.60,.70,.80,.90,1],labels=['1st','2nd','3rd','4th','5th','6th','7th','8th','9th','10th']).head())

0    0.119329
1    0.583995
2    0.806151
3    0.012224
4    0.647320
dtype: float64
0     2nd
1     7th
2    10th
3     1st
4     8th
dtype: category
Categories (10, object): [1st < 2nd < 3rd < 4th ... 7th < 8th < 9th < 10th]
0   -174.976547
1     34.268040
2    115.303580
3    -25.243604
4     98.132079
dtype: float64
0     1st
1     7th
2    10th
3     3rd
4     9th
dtype: category
Categories (10, object): [1st < 2nd < 3rd < 4th ... 7th < 8th < 9th < 10th]


## Convert Numpy array to a dataframe of given shape

In [29]:
ps1=pd.Series(np.random.randint(1,10,35))
df=pd.DataFrame(ps1.values.reshape(7,5))
print(df)

   0  1  2  3  4
0  6  1  6  7  8
1  8  9  5  4  4
2  9  9  4  5  8
3  6  6  9  3  8
4  1  2  8  7  3
5  5  4  5  6  6
6  8  8  2  1  7


### Find the positions of numbers that are multiples of 3 from a series

In [40]:
ps1=pd.Series(np.random.randint(1,10,7))
print(ps1)
np.argwhere(ps1%3==0)

0    7
1    8
2    5
3    6
4    4
5    5
6    6
dtype: int32


ValueError: Length of passed values is 1, index implies 7.

## Extract items at given positions from a series

In [41]:
ps1=pd.Series(list("abcdefghijklmnopqrstuvwxyz"))
positions=[0,4,6,8,14,20]
ps1.take(positions)

0     a
4     e
6     g
8     i
14    o
20    u
dtype: object

## Stack two series vertically and horizontally

In [43]:
ps1=pd.Series(range(5))
ps2=pd.Series(list("abcde"))
ps1.append(ps2)
df=pd.concat([ps1,ps2],axis=1)
print(df)

   0  1
0  0  a
1  1  b
2  2  c
3  3  d
4  4  e


## Get the positions of items of Series A in another Series B

In [49]:
ps1=pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ps2=pd.Series([1,3,10,13])
[np.where(i==ps1)[0].tolist()[0] for i in ps2]

[5, 4, 0, 8]

### Compute mean squared error on a truth and predicted series

In [50]:
actual=pd.Series(np.arange(10))
pred=pd.Series(np.arange(10))+np.random.random(10)
np.mean((actual-pred)**2)

0.17540457935776732

### Convert first character of each element in a Series to Upper case

In [52]:
ps1=pd.Series(["this","is","chaitanya","subha","sagar"])
print(ps1.map(lambda x: x.title()))
print(ps1.map(lambda x: x[0].upper()+x[1:]))
print([i.title() for i in ps1])

0         This
1           Is
2    Chaitanya
3        Subha
4        Sagar
dtype: object
0         This
1           Is
2    Chaitanya
3        Subha
4        Sagar
dtype: object
['This', 'Is', 'Chaitanya', 'Subha', 'Sagar']


### Calculate number of characters in each word in a series

In [53]:
ps1=pd.Series(["This","is","Chaitanya","Subha","Sagar"])
ps1.map(lambda x: len(x))

0    4
1    2
2    9
3    5
4    5
dtype: int64

### Compute difference of differences between consequtive numbers of a series

In [55]:
ps1=pd.Series([1,9,10,5,4,6,8,9,12,15,18,21,24,27])
print(ps1.diff().tolist())
print(ps1.diff().diff().tolist())

[nan, 8.0, 1.0, -5.0, -1.0, 2.0, 2.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0]
[nan, nan, -7.0, -6.0, 4.0, 3.0, 0.0, -1.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0]
