## Handling missing values
* df.dropna(): Remove missing values.
* df.fillna(): Will fill missing values by the provided values

In [2]:
import numpy as np
import pandas as pd 

In [3]:
df1 = pd.DataFrame({'A':[1,2,np.nan],'B':[1,np.nan,np.nan],'C':[5,10,15]})

In [155]:
df1

Unnamed: 0,A,B,C
0,1.0,1.0,5
1,2.0,,10
2,,,15


In [5]:
#look for missing values 
df1.isna()

Unnamed: 0,A,B,C
0,False,False,False
1,False,True,False
2,True,True,False


In [10]:
df1.isna()

Unnamed: 0,A,B,C
0,False,False,False
1,False,True,False
2,True,True,False


In [13]:
# to see how many non-null values in each column 
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       2 non-null      float64
 1   B       1 non-null      float64
 2   C       3 non-null      int64  
dtypes: float64(2), int64(1)
memory usage: 200.0 bytes


In [156]:
#drop na: by default is row-wise (axis=0)
df1.dropna()

Unnamed: 0,A,B,C
0,1.0,1.0,5


In [157]:
#inplace is not activated
df1

Unnamed: 0,A,B,C
0,1.0,1.0,5
1,2.0,,10
2,,,15


In [158]:
#drop na column-wise
df1.dropna(axis=1)

Unnamed: 0,C
0,5
1,10
2,15


In [160]:
#require thresh amount nan values to be removed 
df1.dropna(axis=1,thresh=2)

Unnamed: 0,A,C
0,1.0,5
1,2.0,10
2,,15


In [161]:
#filla na with "something"
df1.fillna(value='SomeValue')

Unnamed: 0,A,B,C
0,1,1,5
1,2,SomeValue,10
2,SomeValue,SomeValue,15


We usually fill missing value by column mean value 

In [162]:
#Fill missing value with mean value of the column
df1['A'].fillna(value=df1['A'].mean(),inplace=True)

In [163]:
df1

Unnamed: 0,A,B,C
0,1.0,1.0,5
1,2.0,,10
2,1.5,,15


In [164]:
df1['B'].fillna(value=df1['B'].mean(),inplace=True)
df1

Unnamed: 0,A,B,C
0,1.0,1.0,5
1,2.0,1.0,10
2,1.5,1.0,15
