## This notebook is for testing functions before adding them to src

In [1]:
import numpy as np
import pandas as pd
from functools import reduce
import plotly.express as px

In [2]:
def process_raw_data():
    path_raw_data = '../data/raw/'
    path_processed_data = '../data/processed/'

    df_EYS = pd.read_csv(path_raw_data+'Expected years of schooling (years).csv',skiprows=6,sep=',', encoding='latin-1')
    df_GNIpc = pd.read_csv(path_raw_data+'Gross national income (GNI) per capita (constant 2017 PPP$).csv',skiprows=6,sep=',', encoding='latin-1')
    df_LE = pd.read_csv(path_raw_data+'Life expectancy at birth (years).csv',skiprows=6,sep=',', encoding='latin-1')
    df_MYS = pd.read_csv(path_raw_data+'Mean years of schooling (years).csv',skiprows=6,sep=',', encoding='latin-1')

    common_countries = set.intersection(set(df_EYS['Country']), set(df_GNIpc['Country']), set(df_LE['Country']),set(df_MYS['Country']))
    n_countries = len(common_countries)

    keep_cols = np.r_[1:2,2:df_EYS.shape[1]:2]
    df_EYS = df_EYS.iloc[:,keep_cols][df_EYS['Country'].isin(common_countries)].dropna(axis=0,subset='Country')
    df_GNIpc = df_GNIpc.iloc[:,keep_cols][df_GNIpc['Country'].isin(common_countries)].dropna(axis=0,subset='Country')
    df_LE = df_LE.iloc[:,keep_cols][df_LE['Country'].isin(common_countries)].dropna(axis=0,subset='Country')
    df_MYS= df_MYS.iloc[:,keep_cols][df_MYS['Country'].isin(common_countries)].dropna(axis=0,subset='Country')

    data_frames = [df_EYS.melt(id_vars='Country',var_name='Year',value_name='Expected_years_of_schooling'),
                        df_GNIpc.melt(id_vars='Country',var_name='Year',value_name='Gross_national_income_per_capita'),
                        df_LE.melt(id_vars='Country',var_name='Year',value_name='Life_expectancy_at_birth'),
                        df_MYS.melt(id_vars='Country',var_name='Year',value_name='Mean_years_of_schooling')
                    ]

    res = reduce(lambda  left,right: pd.merge(left,right,on=['Country','Year'],
                                                how='inner'), data_frames).applymap(lambda x: x.strip() if isinstance(x,str) else x)
    res = res.replace('..', np.nan).astype({'Year': 'int32', 'Expected_years_of_schooling': 'float',
                                            'Gross_national_income_per_capita': 'float', 'Life_expectancy_at_birth': 'float', 'Mean_years_of_schooling': 'float'})
    
    return res



In [3]:
res = process_raw_data()

In [4]:
countries = ['Netherlands','Germany','Finland']
curr = res.query('Country in @countries')

In [8]:
def add_indices(df):
    res = df.copy()
    res['LEI'] = (res['Life_expectancy_at_birth']-20)/(85-20)
    res['EI'] = (res['Mean_years_of_schooling']/15+res['Expected_years_of_schooling']/18) / 2
    res['II'] = (np.log(res['Gross_national_income_per_capita'])-np.log(100)) / (np.log(75000)-np.log(100))
    
    return res

In [9]:
curr_ind = add_indices(curr)
curr_ind

Unnamed: 0,Country,Year,Expected_years_of_schooling,Gross_national_income_per_capita,Life_expectancy_at_birth,Mean_years_of_schooling,LEI,EI,II
59,Finland,1990,15.0,32063.0,75.3,7.5,0.850769,0.666667,0.871635
64,Germany,1990,14.4,37232.0,75.5,8.8,0.853846,0.693333,0.894213
121,Netherlands,1990,14.7,36548.0,77.0,10.2,0.876923,0.748333,0.891412
265,Finland,1991,15.2,29767.0,75.5,7.7,0.853846,0.678889,0.860411
270,Germany,1991,14.7,38644.0,75.7,8.9,0.856923,0.705000,0.899835
...,...,...,...,...,...,...,...,...,...
5832,Germany,2018,17.0,55155.0,81.2,14.1,0.941538,0.942222,0.953574
5889,Netherlands,2018,18.5,57014.0,82.1,12.3,0.955385,0.923889,0.958582
6033,Finland,2019,19.4,48511.0,81.9,12.8,0.952308,0.965556,0.934185
6038,Germany,2019,17.0,55314.0,81.3,14.2,0.943077,0.945556,0.954009


In [14]:
def plot_index(df,index_name):
    titles = {'EI':'Education Index (EI)',
                'II': 'Income Index (II)',
                'LEI':'Life Expectancy Index (LEI)'}
    fig = px.line(df, x="Year", y=index_name,color='Country',title=titles[index_name],
    color_discrete_sequence=px.colors.qualitative.Dark2)
    fig.update_layout(title_x=0.5)
    fig.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    })
    return fig

In [15]:
plot_index(curr_ind,'EI')