In [26]:
import pandas as pd
import numpy as np 
df = pd.DataFrame([[np.nan, 2, np.nan, 0],
                   [3, 4, np.nan, 1],
                   [5, np.nan, np.nan, 6],
                   [np.nan, 4, np.nan, 5]],
                 columns=list('PQRS'))
df.head()

Unnamed: 0,P,Q,R,S
0,,2.0,,0
1,3.0,4.0,,1
2,5.0,,,6
3,,4.0,,5


In [21]:
# Adding all null values
null_check = df.isnull().sum()
null_check.sort_values(ascending=False,inplace=True)
null_check.head()

R    4
P    2
Q    1
S    0
dtype: int64

In [28]:
# dropping all columns with null values
df.dropna(axis=1)


Unnamed: 0,S
0,0
1,1
2,6
3,5


In [29]:
# dropping all rows with null values
df.dropna(axis=0)

Unnamed: 0,P,Q,R,S


In [31]:
# Imputing by Backfill 
df.fillna(method='bfill',axis=1).fillna(0)

Unnamed: 0,P,Q,R,S
0,2.0,2.0,0.0,0.0
1,3.0,4.0,1.0,1.0
2,5.0,6.0,6.0,6.0
3,4.0,4.0,5.0,5.0


In [33]:
# dropping columns containing null values
na_columns =[col for col in df if df[col].isnull().any()]
df.drop(na_columns, axis=1)

Unnamed: 0,S
0,0
1,1
2,6
3,5


In [39]:
# Imputing based on mean, median , constant, frequently used
from sklearn.impute import SimpleImputer
imp_mean=SimpleImputer(missing_values=np.nan,strategy='mean')
imp_mean=SimpleImputer(missing_values=np.nan,strategy='median')
imp_mean=SimpleImputer(missing_values=np.nan,strategy='constant',fill_value=7)
imp_mean=SimpleImputer(missing_values=np.nan,strategy='most_frequent')
imp_mean.fit_transform(df)

array([[3., 2., 0.],
       [3., 4., 1.],
       [5., 4., 6.],
       [3., 4., 5.]])

In [16]:
# Date parsing 
df = pd.DataFrame({'date':['3/2/07','3/22/07','4/06/07','5/06/07']})
df['date_parsed']=pd.to_datetime(df['date'],format='%m/%d/%y')
df

Unnamed: 0,date,date_parsed
0,3/2/07,2007-03-02
1,3/22/07,2007-03-22
2,4/06/07,2007-04-06
3,5/06/07,2007-05-06


In [17]:
df['date_parsed'].dt.year
df['date_parsed'].dt.day
df['date_parsed'].dt.month

0    3
1    3
2    4
3    5
Name: date_parsed, dtype: int64