In [2]:
import pandas as pd
import numpy as np

In [3]:
from numpy.random import randn

In [6]:
df = pd.DataFrame(randn(5,4), ['A','B','C','D','E'], ['W','X','Y','Z'])

In [7]:
df

Unnamed: 0,W,X,Y,Z
A,-0.981312,2.854186,-0.867664,0.464549
B,-1.64905,-1.35705,-0.205019,0.276337
C,0.847949,1.401675,-1.758419,0.066553
D,0.244302,0.066007,-0.846044,0.525563
E,-0.39488,-1.563112,-0.265559,0.738181


In [8]:
df['W']

A   -0.981312
B   -1.649050
C    0.847949
D    0.244302
E   -0.394880
Name: W, dtype: float64

In [11]:
df[['W','Z']]

Unnamed: 0,W,Z
A,-0.981312,0.464549
B,-1.64905,0.276337
C,0.847949,0.066553
D,0.244302,0.525563
E,-0.39488,0.738181


In [9]:
df.loc['A']

W   -0.981312
X    2.854186
Y   -0.867664
Z    0.464549
Name: A, dtype: float64

In [10]:
df.iloc[0]

W   -0.981312
X    2.854186
Y   -0.867664
Z    0.464549
Name: A, dtype: float64

In [12]:
df['new'] = df['W'] + 5

In [13]:
df

Unnamed: 0,W,X,Y,Z,new
A,-0.981312,2.854186,-0.867664,0.464549,4.018688
B,-1.64905,-1.35705,-0.205019,0.276337,3.35095
C,0.847949,1.401675,-1.758419,0.066553,5.847949
D,0.244302,0.066007,-0.846044,0.525563,5.244302
E,-0.39488,-1.563112,-0.265559,0.738181,4.60512


In [14]:
df.drop('new', axis=1, inplace=True)

In [15]:
df

Unnamed: 0,W,X,Y,Z
A,-0.981312,2.854186,-0.867664,0.464549
B,-1.64905,-1.35705,-0.205019,0.276337
C,0.847949,1.401675,-1.758419,0.066553
D,0.244302,0.066007,-0.846044,0.525563
E,-0.39488,-1.563112,-0.265559,0.738181


In [17]:
df.loc['A','W']

-0.98131204717066522

In [18]:
df.loc[['A','B'], ['W','X']]

Unnamed: 0,W,X
A,-0.981312,2.854186
B,-1.64905,-1.35705


# Multi Index

In [4]:
# Index Levels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [6]:
list(zip(outside,inside))

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [10]:
df = pd.DataFrame(randn(6,2), hier_index, ['A','B'])

In [11]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,1.022948,-0.230619
G1,2,0.753914,-1.177435
G1,3,-1.643079,-0.219445
G2,1,0.951251,-0.041247
G2,2,-0.400898,-0.434137
G2,3,1.152536,0.61406


In [12]:
df.loc['G1']

Unnamed: 0,A,B
1,1.022948,-0.230619
2,0.753914,-1.177435
3,-1.643079,-0.219445


In [13]:
df.loc['G1'].loc[2].B

-1.1774346111930529

In [16]:
df.index.names = ['group','num']

In [17]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
group,num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,1.022948,-0.230619
G1,2,0.753914,-1.177435
G1,3,-1.643079,-0.219445
G2,1,0.951251,-0.041247
G2,2,-0.400898,-0.434137
G2,3,1.152536,0.61406


In [19]:
#針對某一層 index取值
df.xs(1,level='num')

Unnamed: 0_level_0,A,B
group,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,1.022948,-0.230619
G2,0.951251,-0.041247


# Missing Data

In [20]:
d = {'A':[1,2,np.nan],'B':[5,np.nan,np.nan],'C':[1,2,3]}

In [21]:
df = pd.DataFrame(d)

In [22]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [23]:
df.dropna(axis=1)

Unnamed: 0,C
0,1
1,2
2,3


In [24]:
df.dropna()

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [25]:
df.dropna(thresh=2)

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2


In [26]:
df.fillna(value='value')

Unnamed: 0,A,B,C
0,1,5,1
1,2,value,2
2,value,value,3


In [28]:
df['A'].fillna(value=df['A'].mean())

0    1.0
1    2.0
2    1.5
Name: A, dtype: float64