##删除，填充0，均值，向前填补，向后填补，内插法等

In [3]:
import pandas as pd
import numpy  as np
df = pd.DataFrame([\
                   ['frank', 'M',    np.nan], \
                   [np.nan , np.nan, np.nan], \
                   ['tom'  , 'M',    35], \
                   ['ted'  , 'M',    33], \
                   ['jean' , np.nan, 21], \
                   ['lisa' , 'F',    20]])
df.columns = ['name', 'gender', 'age']
df

Unnamed: 0,name,gender,age
0,frank,M,
1,,,
2,tom,M,35.0
3,ted,M,33.0
4,jean,,21.0
5,lisa,F,20.0


In [4]:
#删除df具有Nan值的行
df.dropna()

Unnamed: 0,name,gender,age
2,tom,M,35.0
3,ted,M,33.0
5,lisa,F,20.0


In [5]:
#删除df所有列均为空值的行记录
df.dropna(how='all')

Unnamed: 0,name,gender,age
0,frank,M,
2,tom,M,35.0
3,ted,M,33.0
4,jean,,21.0
5,lisa,F,20.0


In [6]:
?df.dropna

In [7]:
#删除df空值超过2个的行记录
df.dropna(thresh=2)
#删除df的age列

Unnamed: 0,name,gender,age
0,frank,M,
2,tom,M,35.0
3,ted,M,33.0
4,jean,,21.0
5,lisa,F,20.0


In [8]:
#在df中增加一新列employee，且值为空（np.nan）
df['employee']=np.nan
df

Unnamed: 0,name,gender,age,employee
0,frank,M,,
1,,,,
2,tom,M,35.0,
3,ted,M,33.0,
4,jean,,21.0,
5,lisa,F,20.0,


In [9]:
#删除df中整列值均为nan的列
df.dropna(axis=1,how='all')

Unnamed: 0,name,gender,age
0,frank,M,
1,,,
2,tom,M,35.0
3,ted,M,33.0
4,jean,,21.0
5,lisa,F,20.0


In [10]:
#填补df里任意空单元格的值为0
df.fillna(0)

Unnamed: 0,name,gender,age,employee
0,frank,M,0.0,0.0
1,0,0,0.0,0.0
2,tom,M,35.0,0.0
3,ted,M,33.0,0.0
4,jean,0,21.0,0.0
5,lisa,F,20.0,0.0


In [11]:
#将df的age列的均值填入该列值为空的单元格
df['age'].fillna(df['age'].mean())

0    27.25
1    27.25
2    35.00
3    33.00
4    21.00
5    20.00
Name: age, dtype: float64

In [12]:
#将df的age列的空单元格，按性别填入该类性别的平均年龄值
df['age'].fillna(df.groupby('gender')['age'].transform('mean'))

0    34.0
1     NaN
2    35.0
3    33.0
4    21.0
5    20.0
Name: age, dtype: float64

In [13]:
df

Unnamed: 0,name,gender,age,employee
0,frank,M,,
1,,,,
2,tom,M,35.0,
3,ted,M,33.0,
4,jean,,21.0,
5,lisa,F,20.0,


In [14]:
#使用向后填补方面对df的空值单元格进行填补
df.fillna(method='backfill')

Unnamed: 0,name,gender,age,employee
0,frank,M,35.0,
1,tom,M,35.0,
2,tom,M,35.0,
3,ted,M,33.0,
4,jean,F,21.0,
5,lisa,F,20.0,


In [15]:
#使用向前填补对df的空值单元格进行填补
df.fillna(method='pad')

Unnamed: 0,name,gender,age,employee
0,frank,M,,
1,frank,M,,
2,tom,M,35.0,
3,ted,M,33.0,
4,jean,M,21.0,
5,lisa,F,20.0,


In [16]:
df2 = pd.DataFrame([[1, 870],\
                    [2, 900],\
                    [np.nan, np.nan],\
                    [4, 950],\
                    [5,1080],\
                    [6,1200]])
df2.columns = ['time', 'val']
df2

Unnamed: 0,time,val
0,1.0,870.0
1,2.0,900.0
2,,
3,4.0,950.0
4,5.0,1080.0
5,6.0,1200.0


In [17]:
#使用内插法对df2进行填补
df2.interpolate()

Unnamed: 0,time,val
0,1.0,870.0
1,2.0,900.0
2,3.0,925.0
3,4.0,950.0
4,5.0,1080.0
5,6.0,1200.0
