In [6]:
import pandas as pd
from io import StringIO


csv_data = '''A,B,C,D
1.0,2.0,3.0,4.0
5.0,6.0,,8.0
10.0,11.0,12.0,'''

df = pd.read_csv(StringIO(csv_data))
print(df)

      A     B     C    D
0   1.0   2.0   3.0  4.0
1   5.0   6.0   NaN  8.0
2  10.0  11.0  12.0  NaN


In [7]:
df.isnull().sum()

A    0
B    0
C    1
D    1
dtype: int64

In [10]:
print(df.dropna(axis=0))
print(df.dropna(axis=1))

     A    B    C    D
0  1.0  2.0  3.0  4.0
     A    B    C    D
0  1.0  2.0  3.0  4.0
      A     B
0   1.0   2.0
1   5.0   6.0
2  10.0  11.0


In [12]:
# only drop rows where all columns are NaN
# (returns the whole array here since we don't
# have a row with where all values are NaN
print(df.dropna(how='all'))

      A     B     C    D
0   1.0   2.0   3.0  4.0
1   5.0   6.0   NaN  8.0
2  10.0  11.0  12.0  NaN


In [13]:
# drop rows that have less than 4 real values
print(df.dropna(thresh=4))

     A    B    C    D
0  1.0  2.0  3.0  4.0


In [17]:
# only drop rows where NaN appear in specific columns (here: 'C')
print(df.dropna(subset=['C']))

      A     B     C    D
0   1.0   2.0   3.0  4.0
2  10.0  11.0  12.0  NaN


# Imputing missing values

In [20]:
from sklearn.impute import SimpleImputer


imr = SimpleImputer(strategy='mean')
imr = imr.fit(df.values)
imputed_data = imr.transform(df.values)
print(imputed_data)

[[ 1.   2.   3.   4. ]
 [ 5.   6.   7.5  8. ]
 [10.  11.  12.   6. ]]
