   Content: 1. Working with NaN
            2. Handling NaN Values

In [40]:
import pandas as pd
import numpy as np

# Create the DataFrame
data = {
    'Name': ['Bidhan', 'John', 'Alex', 'Bishowjith', 'Michel', 'Jackson'],
    'Age': [35, 38, np.nan, 40, 38, 37],
    'Salary': [90000, 85000, 87000, np.nan, 86000, 89000]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Salary
0,Bidhan,35.0,90000.0
1,John,38.0,85000.0
2,Alex,,87000.0
3,Bishowjith,40.0,
4,Michel,38.0,86000.0
5,Jackson,37.0,89000.0


In [43]:
df.dtypes

Name       object
Age       float64
Salary    float64
dtype: object

In [47]:
# Convert columns to nullable integer type
df['Age'] = df['Age'].astype('Int64')
df['Salary'] = df['Salary'].astype('Int64')
df

Unnamed: 0,Name,Age,Salary
0,Bidhan,35.0,90000.0
1,John,38.0,85000.0
2,Alex,,87000.0
3,Bishowjith,40.0,
4,Michel,38.0,86000.0
5,Jackson,37.0,89000.0


In [48]:
df.dtypes

Name      object
Age        Int64
Salary     Int64
dtype: object

In [49]:
df.head(6)

Unnamed: 0,Name,Age,Salary
0,Bidhan,35.0,90000.0
1,John,38.0,85000.0
2,Alex,,87000.0
3,Bishowjith,40.0,
4,Michel,38.0,86000.0
5,Jackson,37.0,89000.0


In [52]:
#checking with isnull().sum()
df.isnull()

Unnamed: 0,Name,Age,Salary
0,False,False,False
1,False,False,False
2,False,True,False
3,False,False,True
4,False,False,False
5,False,False,False


In [53]:
df.isnull().sum()

Name      0
Age       1
Salary    1
dtype: int64

In [54]:
#checking with notnull().sum()
df.notnull().sum()

Name      6
Age       5
Salary    5
dtype: int64

In [55]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    6 non-null      object
 1   Age     5 non-null      Int64 
 2   Salary  5 non-null      Int64 
dtypes: Int64(2), object(1)
memory usage: 288.0+ bytes


In [56]:
df

Unnamed: 0,Name,Age,Salary
0,Bidhan,35.0,90000.0
1,John,38.0,85000.0
2,Alex,,87000.0
3,Bishowjith,40.0,
4,Michel,38.0,86000.0
5,Jackson,37.0,89000.0


# Handling NaN Values

In [57]:
df.isnull().sum()

Name      0
Age       1
Salary    1
dtype: int64

In [58]:
df1 = df.copy()  #copying df as df1 with null values

In [59]:
df1.fillna(0, inplace=True)

In [60]:
df1.isnull().sum()

Name      0
Age       0
Salary    0
dtype: int64

In [61]:
df1.head()

Unnamed: 0,Name,Age,Salary
0,Bidhan,35,90000
1,John,38,85000
2,Alex,0,87000
3,Bishowjith,40,0
4,Michel,38,86000


In [62]:
df1['Age'] = df1['Age'].replace(0, 35)
df1['Salary'] = df1['Salary'].replace(0, 35000)
df1

Unnamed: 0,Name,Age,Salary
0,Bidhan,35,90000
1,John,38,85000
2,Alex,35,87000
3,Bishowjith,40,35000
4,Michel,38,86000
5,Jackson,37,89000


In [63]:
df2 = df.copy()     #copying df as df2 with null values
df2

Unnamed: 0,Name,Age,Salary
0,Bidhan,35.0,90000.0
1,John,38.0,85000.0
2,Alex,,87000.0
3,Bishowjith,40.0,
4,Michel,38.0,86000.0
5,Jackson,37.0,89000.0


In [64]:
df2.fillna(0, inplace=True)
df2

Unnamed: 0,Name,Age,Salary
0,Bidhan,35,90000
1,John,38,85000
2,Alex,0,87000
3,Bishowjith,40,0
4,Michel,38,86000
5,Jackson,37,89000


In [65]:
# Forward Fill and backwordfill (Fill with the forward for forward fill and previous value for backword fill):
df2['Age'] = df2['Age'].replace(0, method='ffill')
df2['Salary'] = df2['Salary'].replace(0, method='bfill')
df2

  df2['Age'] = df2['Age'].replace(0, method='ffill')
  df2['Salary'] = df2['Salary'].replace(0, method='bfill')


Unnamed: 0,Name,Age,Salary
0,Bidhan,35,90000
1,John,38,85000
2,Alex,38,87000
3,Bishowjith,40,86000
4,Michel,38,86000
5,Jackson,37,89000


In [72]:
df3 = df.copy()     #copying df as df3 with null values
df3

Unnamed: 0,Name,Age,Salary
0,Bidhan,35.0,90000.0
1,John,38.0,85000.0
2,Alex,,87000.0
3,Bishowjith,40.0,
4,Michel,38.0,86000.0
5,Jackson,37.0,89000.0


In [73]:
df3.fillna(
{
    'Age' : 40,
    'Salary': 85000
})

Unnamed: 0,Name,Age,Salary
0,Bidhan,35,90000
1,John,38,85000
2,Alex,40,87000
3,Bishowjith,40,85000
4,Michel,38,86000
5,Jackson,37,89000


In [82]:
df4 = df.copy()     #copying df as df2 with null values
df4

Unnamed: 0,Name,Age,Salary
0,Bidhan,35.0,90000.0
1,John,38.0,85000.0
2,Alex,,87000.0
3,Bishowjith,40.0,
4,Michel,38.0,86000.0
5,Jackson,37.0,89000.0


In [83]:
df4.dropna(how='any')

Unnamed: 0,Name,Age,Salary
0,Bidhan,35,90000
1,John,38,85000
4,Michel,38,86000
5,Jackson,37,89000


In [91]:
df5 = df.copy()     #copying df as df2 with null values
df5

Unnamed: 0,Name,Age,Salary
0,Bidhan,35.0,90000.0
1,John,38.0,85000.0
2,Alex,,87000.0
3,Bishowjith,40.0,
4,Michel,38.0,86000.0
5,Jackson,37.0,89000.0


In [93]:
# Fill NaN with the mean and median of the column

median_value = df5['Age'].median()
df5['Age'] = df5['Age'].fillna(median_value)

mean_value = df5['Salary'].mean()
df5['Salary'] = df5['Salary'].fillna(mean_value)
df5

Unnamed: 0,Name,Age,Salary
0,Bidhan,35,90000
1,John,38,85000
2,Alex,38,87000
3,Bishowjith,40,87400
4,Michel,38,86000
5,Jackson,37,89000
