# Missing Values

## Setup

In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.read_csv('/content/Economy_of_US_na.csv')

## Detect and report missing values

In [None]:
df

Unnamed: 0,Year,GDP_Nominal,GDP_Growth
0,1980.0,2857.3,
1,1981.0,3207.0,0.025
2,1982.0,3343.8,-0.018
3,1983.0,,
4,1984.0,4037.7,0.072
5,1985.0,4339.0,
6,1986.0,,
7,1987.0,4855.3,
8,1988.0,5236.4,0.042
9,1989.0,,


In [None]:
df.isnull()

Unnamed: 0,Year,GDP_Nominal,GDP_Growth
0,False,False,True
1,False,False,False
2,False,False,False
3,False,True,True
4,False,False,False
5,False,False,True
6,False,True,True
7,False,False,True
8,False,False,False
9,False,True,True


In [None]:
for c in df.columns:
    miss = df[c].isnull().sum()
    print("{} has {} missing value(s)".format(c,miss))
  

Year has 1 missing value(s)
GDP_Nominal has 4 missing value(s)
GDP_Growth has 7 missing value(s)


## Dropping missing values

In [None]:
df2 = df.dropna()
df2

Unnamed: 0,Year,GDP_Nominal,GDP_Growth
1,1981.0,3207.0,0.025
2,1982.0,3343.8,-0.018
4,1984.0,4037.7,0.072
8,1988.0,5236.4,0.042
10,1990.0,5963.1,0.019
12,1992.0,6520.3,0.035


In [None]:
df

Unnamed: 0,Year,GDP_Nominal,GDP_Growth
0,1980.0,2857.3,
1,1981.0,3207.0,0.025
2,1982.0,3343.8,-0.018
3,1983.0,,
4,1984.0,4037.7,0.072
5,1985.0,4339.0,
6,1986.0,,
7,1987.0,4855.3,
8,1988.0,5236.4,0.042
9,1989.0,,


In [None]:
df2 = df.dropna(axis=1)
df2

0
1
2
3
4
5
6
7
8
9
10


In [None]:
df2 = df.dropna(thresh=3)
df2

Unnamed: 0,Year,GDP_Nominal,GDP_Growth
1,1981.0,3207.0,0.025
2,1982.0,3343.8,-0.018
4,1984.0,4037.7,0.072
8,1988.0,5236.4,0.042
10,1990.0,5963.1,0.019
12,1992.0,6520.3,0.035


In [None]:
df2 = df.dropna(thresh=2)
df2

Unnamed: 0,Year,GDP_Nominal,GDP_Growth
0,1980.0,2857.3,
1,1981.0,3207.0,0.025
2,1982.0,3343.8,-0.018
4,1984.0,4037.7,0.072
5,1985.0,4339.0,
7,1987.0,4855.3,
8,1988.0,5236.4,0.042
10,1990.0,5963.1,0.019
12,1992.0,6520.3,0.035


In [None]:
df2 = df.dropna(thresh=1)
df2

Unnamed: 0,Year,GDP_Nominal,GDP_Growth
0,1980.0,2857.3,
1,1981.0,3207.0,0.025
2,1982.0,3343.8,-0.018
3,1983.0,,
4,1984.0,4037.7,0.072
5,1985.0,4339.0,
6,1986.0,,
7,1987.0,4855.3,
8,1988.0,5236.4,0.042
9,1989.0,,


In [None]:
df2 = df.dropna(axis = 1, thresh=7)
df2

Unnamed: 0,Year,GDP_Nominal
0,1980.0,2857.3
1,1981.0,3207.0
2,1982.0,3343.8
3,1983.0,
4,1984.0,4037.7
5,1985.0,4339.0
6,1986.0,
7,1987.0,4855.3
8,1988.0,5236.4
9,1989.0,


In [None]:
df2 = df.dropna(axis = 1, thresh=10)
df2

Unnamed: 0,Year
0,1980.0
1,1981.0
2,1982.0
3,1983.0
4,1984.0
5,1985.0
6,1986.0
7,1987.0
8,1988.0
9,1989.0


## Filling with constant

In [None]:
df2 = df.fillna('NA')
df2

Unnamed: 0,Year,GDP_Nominal,GDP_Growth
0,1980.0,2857.3,
1,1981.0,3207.0,0.025
2,1982.0,3343.8,-0.018
3,1983.0,,
4,1984.0,4037.7,0.072
5,1985.0,4339.0,
6,1986.0,,
7,1987.0,4855.3,
8,1988.0,5236.4,0.042
9,1989.0,,


In [None]:
df['Year_filled'] = df['Year'].fillna('YEAR')
df

Unnamed: 0,Year,GDP_Nominal,GDP_Growth,Year_filled
0,1980.0,2857.3,,1980.0
1,1981.0,3207.0,0.025,1981.0
2,1982.0,3343.8,-0.018,1982.0
3,1983.0,,,1983.0
4,1984.0,4037.7,0.072,1984.0
5,1985.0,4339.0,,1985.0
6,1986.0,,,1986.0
7,1987.0,4855.3,,1987.0
8,1988.0,5236.4,0.042,1988.0
9,1989.0,,,1989.0


## Filling with ffill

In [None]:
df['GDP_filled_ffill'] = df['GDP_Nominal'].fillna(method = 'ffill')
df[['GDP_Nominal', 'GDP_filled_ffill']]

Unnamed: 0,GDP_Nominal,GDP_filled_ffill
0,2857.3,2857.3
1,3207.0,3207.0
2,3343.8,3343.8
3,,3343.8
4,4037.7,4037.7
5,4339.0,4339.0
6,,4339.0
7,4855.3,4855.3
8,5236.4,5236.4
9,,5236.4


## Filling with bfill

In [None]:
df['GDP_filled_bfill'] = df['GDP_Nominal'].fillna(method = 'bfill')
df[['GDP_Nominal', 'GDP_filled_bfill']]

Unnamed: 0,GDP_Nominal,GDP_filled_bfill
0,2857.3,2857.3
1,3207.0,3207.0
2,3343.8,3343.8
3,,4037.7
4,4037.7,4037.7
5,4339.0,4339.0
6,,4855.3
7,4855.3,4855.3
8,5236.4,5236.4
9,,5963.1


## Filling with mean

In [None]:
df['GDP_Nominal_filled_mean'] = df['GDP_Nominal'].fillna(df['GDP_Nominal'].mean())
df[['GDP_Nominal', 'GDP_Nominal_filled_mean']]

Unnamed: 0,GDP_Nominal,GDP_Nominal_filled_mean
0,2857.3,2857.3
1,3207.0,3207.0
2,3343.8,3343.8
3,,4484.433333
4,4037.7,4037.7
5,4339.0,4339.0
6,,4484.433333
7,4855.3,4855.3
8,5236.4,5236.4
9,,4484.433333


## Filling with mode

In [None]:
df['GDP_Nominal_filled_mode'] = df['GDP_Nominal'].fillna(df['GDP_Nominal'].mode()[0])
df[['GDP_Nominal', 'GDP_Nominal_filled_mode']]

Unnamed: 0,GDP_Nominal,GDP_Nominal_filled_mode
0,2857.3,2857.3
1,3207.0,3207.0
2,3343.8,3343.8
3,,2857.3
4,4037.7,4037.7
5,4339.0,4339.0
6,,2857.3
7,4855.3,4855.3
8,5236.4,5236.4
9,,2857.3


## Summary

In [None]:
df['GDP_Growth_fill_NA'] = df['GDP_Growth'].fillna('NA')
df['GDP_Growth_fill_0'] = df['GDP_Growth'].fillna(0)
df['GDP_Growth_fill_ffill'] = df['GDP_Growth'].fillna(method = 'ffill')
df['GDP_Growth_fill_bfill'] = df['GDP_Growth'].fillna(method = 'bfill')
df['GDP_Growth_fill_mean'] = df['GDP_Growth'].fillna(df['GDP_Growth'].mean())
df['GDP_Growth_fill_mode'] = df['GDP_Growth'].fillna(df['GDP_Growth'].mode()[0])
df[['GDP_Growth', 'GDP_Growth_fill_NA', 'GDP_Growth_fill_0', 
    'GDP_Growth_fill_ffill','GDP_Growth_fill_bfill', 
    'GDP_Growth_fill_mean', 'GDP_Growth_fill_mode']]

Unnamed: 0,GDP_Growth,GDP_Growth_fill_NA,GDP_Growth_fill_0,GDP_Growth_fill_ffill,GDP_Growth_fill_bfill,GDP_Growth_fill_mean,GDP_Growth_fill_mode
0,,,0.0,,0.025,0.029167,-0.018
1,0.025,0.025,0.025,0.025,0.025,0.025,0.025
2,-0.018,-0.018,-0.018,-0.018,-0.018,-0.018,-0.018
3,,,0.0,-0.018,0.072,0.029167,-0.018
4,0.072,0.072,0.072,0.072,0.072,0.072,0.072
5,,,0.0,0.072,0.042,0.029167,-0.018
6,,,0.0,0.072,0.042,0.029167,-0.018
7,,,0.0,0.072,0.042,0.029167,-0.018
8,0.042,0.042,0.042,0.042,0.042,0.042,0.042
9,,,0.0,0.042,0.019,0.029167,-0.018
