In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame({'col_a':[1, 2, 4, 4, np.nan, np.nan, 6],
                   'col_b':[1.2, 1.4, np.nan, 6.2, None, 1.1, 4.3],
                   'col_c':['a', '?', 'c', 'd', '--', np.nan, 'd'],
                   'col_d':[True, True, np.nan, None, False, True, True]})

In [4]:
df

Unnamed: 0,col_a,col_b,col_c,col_d
0,1.0,1.2,a,True
1,2.0,1.4,?,True
2,4.0,,c,
3,4.0,6.2,d,
4,,,--,False
5,,1.1,,True
6,6.0,4.3,d,True


In [5]:
new_col = pd.Series([1, 2, np.nan, 4, np.nan, 5], dtype=pd.Int64Dtype())
df['col_e'] = new_col

In [6]:
df

Unnamed: 0,col_a,col_b,col_c,col_d,col_e
0,1.0,1.2,a,True,1.0
1,2.0,1.4,?,True,2.0
2,4.0,,c,,
3,4.0,6.2,d,,4.0
4,,,--,False,
5,,1.1,,True,5.0
6,6.0,4.3,d,True,


In [7]:
df.isna()

Unnamed: 0,col_a,col_b,col_c,col_d,col_e
0,False,False,False,False,False
1,False,False,False,False,False
2,False,True,False,True,True
3,False,False,False,True,False
4,True,True,False,False,True
5,True,False,True,False,False
6,False,False,False,False,True


In [8]:
df.isna().any()

col_a    True
col_b    True
col_c    True
col_d    True
col_e    True
dtype: bool

In [9]:
df.isna().sum()

col_a    2
col_b    2
col_c    1
col_d    2
col_e    3
dtype: int64

In [None]:
# Reading file with particular nan values
mis_val = ['?', '--']
df = pd.read_csv('data.csv', na_values = mis_val)

In [14]:
df.replace({'?': np.nan, '--': np.nan}, inplace=True)

In [15]:
df

Unnamed: 0,col_a,col_b,col_c,col_d,col_e
0,1.0,1.2,a,True,1.0
1,2.0,1.4,,True,2.0
2,4.0,,c,,
3,4.0,6.2,d,,4.0
4,,,,False,
5,,1.1,,True,5.0
6,6.0,4.3,d,True,


In [29]:
df_drop = df.dropna(axis=0, how='all', inplace = True)
df_drop

In [31]:
df_drop

In [32]:
df_drop = df.dropna(axis=0, how='all')

In [33]:
df_drop

Unnamed: 0,col_a,col_b,col_c,col_d,col_e
0,1.0,1.2,a,True,1.0
1,2.0,1.4,,True,2.0
2,4.0,,c,,
3,4.0,6.2,d,,4.0
4,,,,False,
5,,1.1,,True,5.0
6,6.0,4.3,d,True,


In [26]:
df_drop.dropna(axis = 0, inplace=True)

In [25]:
df_drop

Unnamed: 0,col_a,col_b,col_c,col_d,col_e
0,1.0,1.2,a,True,1


In [28]:
# Deleting rows with at least 3 NaNs
df_drop.dropna(axis=0, thresh=3)

Unnamed: 0,col_a,col_b,col_c,col_d,col_e
0,1.0,1.2,a,True,1.0
1,2.0,1.4,,True,2.0
3,4.0,6.2,d,,4.0
5,,1.1,,True,5.0
6,6.0,4.3,d,True,


In [34]:
df_fill = df.fillna(25)

df_fill

In [40]:
mean = df['col_a'].mean()
df.col_a.fillna(mean)

0    1.0
1    2.0
2    4.0
3    4.0
4    3.4
5    3.4
6    6.0
Name: col_a, dtype: float64

In [42]:
# Filling mossong values with the values in the previous row.
# For filling with next row -- use bfill 
df.fillna(axis=0, method='ffill')

Unnamed: 0,col_a,col_b,col_c,col_d,col_e
0,1.0,1.2,a,True,1
1,2.0,1.4,a,True,2
2,4.0,1.4,c,True,2
3,4.0,6.2,d,True,4
4,4.0,6.2,d,False,4
5,4.0,1.1,d,True,5
6,6.0,4.3,d,True,5


In [45]:
# Handling only one missing_value
df.fillna(axis=0, method='bfill', limit=1)

Unnamed: 0,col_a,col_b,col_c,col_d,col_e
0,1.0,1.2,a,True,1.0
1,2.0,1.4,c,True,2.0
2,4.0,6.2,c,,4.0
3,4.0,6.2,d,False,4.0
4,,1.1,,False,5.0
5,6.0,1.1,d,True,5.0
6,6.0,4.3,d,True,
