## How Does Economic Health Affect People’s Sleep?
***

## Imports

In [1]:
import pandas as pd
import numpy as np

First we import the education data from the data_file.

In [2]:
education_df = pd.DataFrame()
hdi_df = pd.DataFrame()
inequality_df = pd.DataFrame()
finance_df = pd.DataFrame()
income_df = pd.DataFrame()
employment_df =  pd.DataFrame()

In [3]:
def load_dataframe_merge(category_df, dataset_category, dataset, col_drop):

    loc = '../data_file/raw_data_csv/economic_impact_datasets/' + dataset_category + '/' + dataset +'.csv'
    df = pd.read_csv(loc,encoding='cp1252', skiprows=col_drop)

    df = df.loc[df['Country'] == ' United States']
    df = df.T
    df = df.dropna()

    df.rename(columns = {df.columns[0]: dataset}, inplace=True)

    category_df = pd.concat([category_df, df], axis=1)
    return category_df


In [4]:
def convert_type(df, iloc_begin, iloc_end):
    # convert all types in index_df to float

    df = df.iloc[iloc_begin:iloc_end]

    df = df.T
    df.columns = df.columns.astype(int)
    df = df.T
    df = df.apply(lambda x: pd.to_numeric(x, errors='coerce'))
    return df

In [5]:
education_file_list = ['expected_years_of_schooling', 'gross_enrolment_ratio_pre-primary', 'gross_enrolment_ratio_primary',
                       'gross_enrolment_ratio_secondary', 'gross_enrolment_ratio_tertiary']
education_df = load_dataframe_merge(education_df, 'education_datasets', 'education_index', 5)
for i in range(len(education_file_list)):
    education_df = load_dataframe_merge(education_df, 'education_datasets', education_file_list[i], 6)
education_df=convert_type(education_df, 22, -3)
display(education_df)

Unnamed: 0,education_index,expected_years_of_schooling,gross_enrolment_ratio_pre-primary,gross_enrolment_ratio_primary,gross_enrolment_ratio_secondary,gross_enrolment_ratio_tertiary
2010,0.892,16.2,70.0,100.0,95.0,93.0
2011,0.897,16.3,70.0,99.0,96.0,94.0
2012,0.898,16.3,71.0,99.0,96.0,93.0
2013,0.891,16.1,72.0,99.0,96.0,89.0
2014,0.892,16.1,73.0,100.0,97.0,89.0
2015,0.893,16.2,72.0,100.0,98.0,89.0
2016,0.896,16.3,72.0,101.0,99.0,89.0
2017,0.899,16.3,73.0,102.0,99.0,88.0
2018,0.899,16.3,,,,
2019,0.9,16.3,,,,


In [6]:
hdi_df = load_dataframe_merge(hdi_df, 'hdi_dataset', 'human_development_index', 5)
hdi_df=convert_type(hdi_df, 22, len(hdi_df))
display(hdi_df)

Unnamed: 0,human_development_index
2010,0.916
2011,0.919
2012,0.92
2013,0.918
2014,0.92
2015,0.921
2016,0.922
2017,0.924
2018,0.925
2019,0.926


In [7]:
inequality_file_list = ['inequality_adjusted_education_index', 'inequality_adjusted_hdi', 'inequality_adjusted_income_index']
for i in range(len(inequality_file_list)):
    inequality_df = load_dataframe_merge(inequality_df, 'inequality_datasets', inequality_file_list[i], 5)
inequality_df = convert_type(inequality_df, 2, len(inequality_df))
display(inequality_df)

Unnamed: 0,inequality_adjusted_education_index,inequality_adjusted_hdi,inequality_adjusted_income_index
2010,0.864,0.812,0.73
2011,0.864,0.778,0.646
2012,0.851,0.806,0.727
2013,0.831,0.759,0.618
2014,0.844,0.763,0.62
2015,0.843,0.796,0.705
2016,0.846,0.799,0.705
2017,0.85,0.797,0.697
2018,0.849,0.801,0.714
2019,0.875,0.808,0.711


In [8]:
employment_df = load_dataframe_merge(employment_df, 'employment_datasets', 'employment_%', 5)
employment_df = load_dataframe_merge(employment_df, 'employment_datasets', 'unemployed_%', 6)
employment_df = convert_type(employment_df, 6, -2)
display(employment_df)

Unnamed: 0,employment_%,unemployed_%
2010,63.6,9.6
2011,63.0,8.9
2012,63.0,8.1
2013,62.5,7.4
2014,62.2,6.2
2015,62.0,5.3
2016,62.2,4.9
2017,62.3,4.4
2018,62.3,3.9
2019,62.0,3.7


In [9]:
finance_df = load_dataframe_merge(finance_df, 'finance_datasets', 'foreign_direct_investment', 6)
finance_df = load_dataframe_merge(finance_df, 'finance_datasets', 'private_capital_flow', 5)
finance_df = convert_type(finance_df, 6, -3)
finance_df

Unnamed: 0,foreign_direct_investment,private_capital_flow
2010,1.8,-3.6
2011,1.7,-0.3
2012,1.5,-2.3
2013,1.7,0.4
2014,1.4,0.1
2015,2.8,-1.4
2016,2.6,-2.0
2017,1.8,-1.0
2018,1.3,-1.6
2019,1.5,-1.4


In [10]:
income_df = load_dataframe_merge(income_df, 'income_datasets', 'GNI', 6)
income_df = load_dataframe_merge(income_df, 'income_datasets', 'GDP', 5)
income_df = convert_type(income_df, 22, len(income_df))
income_df

Unnamed: 0,GNI,GDP
2010,55421,16838.6
2011,55992,17099.7
2012,56790,17484.4
2013,57409,17806.5
2014,58393,18243.1
2015,59559,18768.6
2016,60024,19062.8
2017,61019,19485.4
2018,62667,20105.8
2019,63826,20575.0


In [11]:
index_df = pd.concat([education_df['education_index'], hdi_df['human_development_index'], inequality_df['inequality_adjusted_education_index'], inequality_df['inequality_adjusted_hdi'], inequality_df['inequality_adjusted_income_index']], axis=1)

In [12]:
finance_df.to_pickle('../data_file/cleaned_data_pickle/finance.pickle')
employment_df.to_pickle('../data_file/cleaned_data_pickle/employment.pickle')
income_df.to_pickle('../data_file/cleaned_data_pickle/income.pickle')

In [13]:
index_df.to_pickle('../data_file/cleaned_data_pickle/index.pickle')
education_df.to_pickle('../data_file/cleaned_data_pickle/education.pickle')
hdi_df.to_pickle('../data_file/cleaned_data_pickle/hdi.pickle')
inequality_df.to_pickle('../data_file/cleaned_data_pickle/inequality.pickle')