## Data Cleaning

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("WHI_Inflation.csv")
df.head()

Unnamed: 0,Country,Year,Headline Consumer Price Inflation,Energy Consumer Price Inflation,Food Consumer Price Inflation,Official Core Consumer Price Inflation,Producer Price Inflation,GDP deflator Index growth rate,Continent/Region,Score,GDP per Capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption
0,Afghanistan,2015,-0.66,-4.25,-0.84,0.219999,,2.66509,South Asia,3.575,0.31982,0.30285,0.30335,0.23414,0.3651,0.09719
1,Afghanistan,2016,4.38,2.07,5.67,5.19276,,-2.409509,South Asia,3.36,0.38227,0.11037,0.17344,0.1643,0.31268,0.07112
2,Afghanistan,2017,4.976,4.44,6.94,5.423228,,2.404,South Asia,3.794,0.401477,0.581543,0.180747,0.10618,0.311871,0.061158
3,Afghanistan,2018,0.63,1.474185,-1.045952,-0.126033,,2.071208,South Asia,3.632,0.332,0.537,0.255,0.085,0.191,0.036
4,Afghanistan,2019,2.302,-2.494359,3.79477,,,6.520928,South Asia,3.203,0.35,0.517,0.361,0.0,0.158,0.025


In [3]:
# Checking Data Types
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1232 entries, 0 to 1231
Data columns (total 16 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Country                                 1232 non-null   object 
 1   Year                                    1232 non-null   int64  
 2   Headline Consumer Price Inflation       1200 non-null   float64
 3   Energy Consumer Price Inflation         1090 non-null   float64
 4   Food Consumer Price Inflation           1130 non-null   float64
 5   Official Core Consumer Price Inflation  734 non-null    float64
 6   Producer Price Inflation                769 non-null    float64
 7   GDP deflator Index growth rate          1211 non-null   float64
 8   Continent/Region                        1232 non-null   object 
 9   Score                                   1232 non-null   float64
 10  GDP per Capita                          1232 non-null   floa

In [4]:
df['Year'] = pd.to_datetime(df['Year'], format = '%Y')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1232 entries, 0 to 1231
Data columns (total 16 columns):
 #   Column                                  Non-Null Count  Dtype         
---  ------                                  --------------  -----         
 0   Country                                 1232 non-null   object        
 1   Year                                    1232 non-null   datetime64[ns]
 2   Headline Consumer Price Inflation       1200 non-null   float64       
 3   Energy Consumer Price Inflation         1090 non-null   float64       
 4   Food Consumer Price Inflation           1130 non-null   float64       
 5   Official Core Consumer Price Inflation  734 non-null    float64       
 6   Producer Price Inflation                769 non-null    float64       
 7   GDP deflator Index growth rate          1211 non-null   float64       
 8   Continent/Region                        1232 non-null   object        
 9   Score                                   1232 non-nul

In [5]:
df.isna().sum()

Country                                     0
Year                                        0
Headline Consumer Price Inflation          32
Energy Consumer Price Inflation           142
Food Consumer Price Inflation             102
Official Core Consumer Price Inflation    498
Producer Price Inflation                  463
GDP deflator Index growth rate             21
Continent/Region                            0
Score                                       0
GDP per Capita                              0
Social support                              0
Healthy life expectancy at birth            0
Freedom to make life choices                0
Generosity                                  0
Perceptions of corruption                   1
dtype: int64

In [6]:
df.dropna(subset = ['Headline Consumer Price Inflation', 'GDP deflator Index growth rate'], inplace = True)
df

Unnamed: 0,Country,Year,Headline Consumer Price Inflation,Energy Consumer Price Inflation,Food Consumer Price Inflation,Official Core Consumer Price Inflation,Producer Price Inflation,GDP deflator Index growth rate,Continent/Region,Score,GDP per Capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption
0,Afghanistan,2015-01-01,-0.660000,-4.250000,-0.840000,0.219999,,2.665090,South Asia,3.5750,0.319820,0.302850,0.303350,0.234140,0.365100,0.097190
1,Afghanistan,2016-01-01,4.380000,2.070000,5.670000,5.192760,,-2.409509,South Asia,3.3600,0.382270,0.110370,0.173440,0.164300,0.312680,0.071120
2,Afghanistan,2017-01-01,4.976000,4.440000,6.940000,5.423228,,2.404000,South Asia,3.7940,0.401477,0.581543,0.180747,0.106180,0.311871,0.061158
3,Afghanistan,2018-01-01,0.630000,1.474185,-1.045952,-0.126033,,2.071208,South Asia,3.6320,0.332000,0.537000,0.255000,0.085000,0.191000,0.036000
4,Afghanistan,2019-01-01,2.302000,-2.494359,3.794770,,,6.520928,South Asia,3.2030,0.350000,0.517000,0.361000,0.000000,0.158000,0.025000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1227,Zimbabwe,2019-01-01,255.292007,90.808071,86.124371,,,-4.035235,Sub-Saharan Africa,3.6630,0.366000,1.114000,0.433000,0.361000,0.151000,0.089000
1228,Zimbabwe,2020-01-01,557.210000,306.431673,601.020236,,,568.971862,Sub-Saharan Africa,3.2992,0.425564,1.047835,0.375038,0.377405,0.151349,0.080929
1229,Zimbabwe,2021-01-01,98.546000,69.820000,105.800000,,,113.294981,Sub-Saharan Africa,3.1450,0.457000,0.649000,0.243000,0.359000,0.157000,0.075000
1230,Zimbabwe,2022-01-01,104.705171,97.246550,149.967034,,,113.018434,Sub-Saharan Africa,2.9950,0.947000,0.690000,0.270000,0.329000,0.106000,0.105000


In [7]:
ecpi = df.dropna(subset = 'Energy Consumer Price Inflation')
fcpi = df.dropna(subset = 'Food Consumer Price Inflation')
occpi = df.dropna(subset = 'Official Core Consumer Price Inflation')
ppi = df.dropna(subset = 'Producer Price Inflation')

occpi.head()

Unnamed: 0,Country,Year,Headline Consumer Price Inflation,Energy Consumer Price Inflation,Food Consumer Price Inflation,Official Core Consumer Price Inflation,Producer Price Inflation,GDP deflator Index growth rate,Continent/Region,Score,GDP per Capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption
0,Afghanistan,2015-01-01,-0.66,-4.25,-0.84,0.219999,,2.66509,South Asia,3.575,0.31982,0.30285,0.30335,0.23414,0.3651,0.09719
1,Afghanistan,2016-01-01,4.38,2.07,5.67,5.19276,,-2.409509,South Asia,3.36,0.38227,0.11037,0.17344,0.1643,0.31268,0.07112
2,Afghanistan,2017-01-01,4.976,4.44,6.94,5.423228,,2.404,South Asia,3.794,0.401477,0.581543,0.180747,0.10618,0.311871,0.061158
3,Afghanistan,2018-01-01,0.63,1.474185,-1.045952,-0.126033,,2.071208,South Asia,3.632,0.332,0.537,0.255,0.085,0.191,0.036
9,Albania,2015-01-01,1.910179,-0.52,4.319489,-0.156957,,0.564278,Central and Eastern Europe,4.959,0.87867,0.80434,0.81325,0.35733,0.14272,0.06413


In [8]:
core_i = df.drop(columns=['Energy Consumer Price Inflation','Food Consumer Price Inflation','Headline Consumer Price Inflation','Producer Price Inflation'])
core_i.head()

Unnamed: 0,Country,Year,Official Core Consumer Price Inflation,GDP deflator Index growth rate,Continent/Region,Score,GDP per Capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption
0,Afghanistan,2015-01-01,0.219999,2.66509,South Asia,3.575,0.31982,0.30285,0.30335,0.23414,0.3651,0.09719
1,Afghanistan,2016-01-01,5.19276,-2.409509,South Asia,3.36,0.38227,0.11037,0.17344,0.1643,0.31268,0.07112
2,Afghanistan,2017-01-01,5.423228,2.404,South Asia,3.794,0.401477,0.581543,0.180747,0.10618,0.311871,0.061158
3,Afghanistan,2018-01-01,-0.126033,2.071208,South Asia,3.632,0.332,0.537,0.255,0.085,0.191,0.036
4,Afghanistan,2019-01-01,,6.520928,South Asia,3.203,0.35,0.517,0.361,0.0,0.158,0.025


## Inflation Rates vs Happiness Score

## Regional Analysis

## Election Correlation