In [1]:
import pandas as pd
import numpy as np

In [3]:
s = pd.Series(["Tim", np.nan, "Kim", "Tom"])
s

0    Tim
1    NaN
2    Kim
3    Tom
dtype: object

In [4]:
s.isna()

0    False
1     True
2    False
3    False
dtype: bool

In [6]:
s.isnull()

0    False
1     True
2    False
3    False
dtype: bool

In [9]:
s.notnull()

0     True
1    False
2     True
3     True
dtype: bool

In [10]:
s.notnull().sum()

np.int64(3)

In [11]:
s[3] = None
s

0     Tim
1     NaN
2     Kim
3    None
dtype: object

In [12]:
s.isna()

0    False
1     True
2    False
3     True
dtype: bool

In [13]:
s.isnull()

0    False
1     True
2    False
3     True
dtype: bool

In [14]:
s.dropna()

0    Tim
2    Kim
dtype: object

In [15]:
s

0     Tim
1     NaN
2     Kim
3    None
dtype: object

In [17]:
df = pd.DataFrame([[1,2,3], 
                  [4,np.nan,6], 
                  [np.nan, np.nan, np.nan]])
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,,6.0
2,,,


In [20]:
df.dropna()

Unnamed: 0,0,1,2
0,1.0,2.0,3.0


df.dropna(how='all') removes rows from the DataFrame df where all the values are NaN (missing values). If a row contains at least one non-null value, it will be kept.

In [21]:
df.dropna(how='all')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,,6.0


In [22]:
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,,6.0
2,,,


In [23]:
df[1] = np.nan
df

Unnamed: 0,0,1,2
0,1.0,,3.0
1,4.0,,6.0
2,,,


In [25]:
df.dropna(axis = 1, how = "all")

Unnamed: 0,0,2
0,1.0,3.0
1,4.0,6.0
2,,


In [26]:
df

Unnamed: 0,0,1,2
0,1.0,,3.0
1,4.0,,6.0
2,,,


A row will not be dropped if it has atleast 2 not null values

In [35]:
df.dropna(thresh=2)


Unnamed: 0,0,1,2
0,1.0,,3.0
1,4.0,,6.0


In [36]:
df

Unnamed: 0,0,1,2
0,1.0,,3.0
1,4.0,,6.0
2,,,


In [37]:
df.fillna(0)

Unnamed: 0,0,1,2
0,1.0,0.0,3.0
1,4.0,0.0,6.0
2,0.0,0.0,0.0


In [39]:
df.fillna({0 : 27, 1 : 15, 2 : 9})

# Fill null values in columns : 0 as 27, in columns 1 : 15 and in column 2 as 9

Unnamed: 0,0,1,2
0,1.0,15.0,3.0
1,4.0,15.0,6.0
2,27.0,15.0,9.0


In [40]:
from numpy import nan as NA

In [41]:
df=pd.DataFrame([[1,2,3],[4,NA,5],
                 [NA,NA,NA]])
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,,5.0
2,,,


df.fillna(method="ffill") (forward fill) replaces NaN values with the previous non-null value in the same column. If there's no previous value (e.g., at the beginning of the column), the NaN remains unchanged.

In [43]:
df.fillna(method="ffill")

  df.fillna(method="ffill")


Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,4.0,2.0,5.0
2,4.0,2.0,5.0


In [44]:
df[4] = [np.nan, np.nan, np.nan]

In [45]:
df

Unnamed: 0,0,1,2,4
0,1.0,2.0,3.0,
1,4.0,,5.0,
2,,,,


In [46]:
df.fillna(method="ffill")

  df.fillna(method="ffill")


Unnamed: 0,0,1,2,4
0,1.0,2.0,3.0,
1,4.0,2.0,5.0,
2,4.0,2.0,5.0,


In [47]:
df.fillna(method="ffill",limit=1)

# Will do a max of 1 fill 

  df.fillna(method="ffill",limit=1)


Unnamed: 0,0,1,2,4
0,1.0,2.0,3.0,
1,4.0,2.0,5.0,
2,4.0,,5.0,


In [51]:
data=pd.Series([1,0,NA,5])
data

0    1.0
1    0.0
2    NaN
3    5.0
dtype: float64

In [53]:
data.fillna(data.mean())

0    1.0
1    0.0
2    2.0
3    5.0
dtype: float64

In [54]:
df

Unnamed: 0,0,1,2,4
0,1.0,2.0,3.0,
1,4.0,,5.0,
2,,,,


In [55]:
df.fillna(df.mean())

Unnamed: 0,0,1,2,4
0,1.0,2.0,3.0,
1,4.0,2.0,5.0,
2,2.5,2.0,4.0,
