In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({"Name":['James', 'Ken', 'Tom', 'Helen'], "ID":[11,12,13,14],"Age": [16,17,15,18],\
                   "Score":[80,78, 98, 87]}, index = [1, 3, 4, 2])
df

Unnamed: 0,Age,ID,Name,Score
1,16,11,James,80
3,17,12,Ken,78
4,15,13,Tom,98
2,18,14,Helen,87


## Slicing rows 
Refer [here](http://stackoverflow.com/questions/31593201/pandas-iloc-vs-ix-vs-loc-explanation)

In [4]:
# by index
df.loc[:3]

Unnamed: 0,Age,ID,Name,Score
1,16,11,James,80
3,17,12,Ken,78


In [5]:
# by natural row number
df.iloc[:3]

Unnamed: 0,Age,ID,Name,Score
1,16,11,James,80
3,17,12,Ken,78
4,15,13,Tom,98


In [6]:
df[ :: ]

Unnamed: 0,Age,ID,Name,Score
1,16,11,James,80
3,17,12,Ken,78
4,15,13,Tom,98
2,18,14,Helen,87


## Slicing column

In [7]:
df[['Name', 'Score']]

Unnamed: 0,Name,Score
1,James,80
3,Ken,78
4,Tom,98
2,Helen,87


## Manipulating data - map, apply, applymap

In [8]:
# apply works on a row/column basis of a DataFrame
df3 = df['Score'].apply(lambda x: x+2)
df3

1     82
3     80
4    100
2     89
Name: Score, dtype: int64

In [9]:
df4 = df[['Score', 'Age']].apply(lambda x: x+2)
df4

Unnamed: 0,Score,Age
1,82,18
3,80,19
4,100,17
2,89,20


In [10]:
# map only use in Series
df3 = df['Score'].map(lambda x: x+2)
df3

1     82
3     80
4    100
2     89
Name: Score, dtype: int64

In [11]:
# applymap use in dataframe
df2 = df[['Score', 'Age']].applymap(lambda x: x+2)
df2

Unnamed: 0,Score,Age
1,82,18
3,80,19
4,100,17
2,89,20


## Filtering data

In [12]:
df[df['Score']>85]

Unnamed: 0,Age,ID,Name,Score
4,15,13,Tom,98
2,18,14,Helen,87


In [13]:
df[(df['Score']>85) & (df['Age']<18)]

Unnamed: 0,Age,ID,Name,Score
4,15,13,Tom,98


In [14]:
df[df['Name'] >='K']

Unnamed: 0,Age,ID,Name,Score
3,17,12,Ken,78
4,15,13,Tom,98


## JOIN data

In [15]:
df_class = pd.DataFrame({"ID":[11,12,13,14], "class":['A', 'A','B','C']}, index = [1, 3, 4, 2])
df_class

Unnamed: 0,ID,class
1,11,A
3,12,A
4,13,B
2,14,C


In [43]:
df_student = pd.DataFrame({"Name":['James', 'Ken', 'Tom', 'Helen'], "ID":[11,12,13,14],"Age": [16,17,15,18],\
                   "Score":[80,78, 98, 87]}, index = [1, 3, 4, 2])

In [17]:
# Use merge

student = pd.merge(df_student, df_class, on='ID')  # new dataframe has only one ID columns
student

Unnamed: 0,Age,ID,Name,Score,class
0,16,11,James,80,A
1,17,12,Ken,78,A
2,15,13,Tom,98,B
3,18,14,Helen,87,C


In [18]:
# Use concate
pd.concat([df_student, df_class], axis=1) # the new dataframe has two ID columns, you need to specifiy the axis

Unnamed: 0,Age,ID,Name,Score,ID.1,class
1,16,11,James,80,11,A
3,17,12,Ken,78,12,A
4,15,13,Tom,98,13,B
2,18,14,Helen,87,14,C


In [21]:
# Use join

df_student.join(df_class, how='inner', lsuffix='_l', rsuffix='_r')

Unnamed: 0,Age,ID_l,Name,Score,ID_r,class
1,16,11,James,80,11,A
3,17,12,Ken,78,12,A
4,15,13,Tom,98,13,B
2,18,14,Helen,87,14,C


In [53]:
scores = pd.DataFrame({ "ID":[11,12,13,14],
                   "Score":[80,78, 98, 'A']}, index = [1, 3, 4, 2])

In [54]:
scores

Unnamed: 0,ID,Score
1,11,80
3,12,78
4,13,98
2,14,A


In [58]:
# Use coerce

scores.loc[:, 'Score'] = pd.to_numeric(scores['Score'], errors= 'coerce')

In [59]:
scores

Unnamed: 0,ID,Score
1,11,80.0
3,12,78.0
4,13,98.0
2,14,


## Use crosstab

In [22]:
pd.crosstab(student['class'], student.Score)

Score,78,80,87,98
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,1,1,0,0
B,0,0,0,1
C,0,0,1,0


## Use Unique

In [23]:
student['class'].unique()

array(['A', 'B', 'C'], dtype=object)

## Convert a Series to a Dictionary 
refer [here](http://stackoverflow.com/questions/27034794/convert-series-to-list-of-dict-in-pandas)

In [24]:
s = pd.Series(['A', 'B','C', 'D','A','B','D','A','D'])
s

0    A
1    B
2    C
3    D
4    A
5    B
6    D
7    A
8    D
dtype: object

In [30]:
s.unique()

array(['A', 'B', 'C', 'D'], dtype=object)

In [31]:
# convert to a dictionary
cnt = s.value_counts().to_dict()
cnt

{'A': 3, 'B': 2, 'C': 1, 'D': 3}

In [27]:
# convert to a list of dictionaries
lst = [{k: v} for (k, v) in cnt.items()]
lst

[{'D': 3}, {'C': 1}, {'A': 3}, {'B': 2}]

In [35]:
# convert it back to a series

dct= {}
for item in lst:
    for k, v in item.items():
        dct[k] = v
print(dct)

{'D': 3, 'C': 1, 'A': 3, 'B': 2}


In [15]:
lst1 =[]
for k, v in dct.iteritems():
    lst1.extend(list(k*v))
    
print pd.Series(lst1)

0    A
1    A
2    A
3    C
4    B
5    B
6    D
7    D
8    D
dtype: object


### Numpy

In [28]:
arr = np.array([1,2,3,])

In [29]:
type(arr)

numpy.ndarray

In [38]:
x = np.array([1, 2, 4, 7, 0])
np.diff(x)

array([ 1,  2,  3, -7])

In [40]:
x = np.array([[1, 2, 4, 7, 0], [0,4,5, 19,0]])
np.diff(x)

array([[  1,   2,   3,  -7],
       [  4,   1,  14, -19]])

In [41]:
np.diff(x, axis=0)

array([[-1,  2,  1, 12,  0]])