In [1]:
import pandas as pd
import numpy as np

In [2]:
def clean_data(merged_df, path, renamed_column, quad=False):
    df = pd.read_csv(f'../inputs_extended/{path}.csv', delimiter=',')[['LOCATION', 'TIME', 'Value']]
    if quad: 
        df['TIME'] = df['TIME'].str.slice(0,4).astype('int64')
        
    df.drop_duplicates(subset=['LOCATION', 'TIME'], keep='first', inplace=True)
    df_final = pd.merge(merged_df, df, on=['LOCATION', 'TIME'], how="outer")
    df_final.rename(columns={'Value': renamed_column}, inplace=True)
    df_final = df_final.dropna(thresh=9).reset_index(drop=True)
    df_final.reset_index(drop=True, inplace=True)
    return df_final

In [3]:
#main dataframe
main_df = pd.read_excel('../inputs_extended/happiness.xls')
main_df.rename(columns={'year': 'TIME'}, inplace=True)

In [4]:
# merge with country codes
df = pd.read_csv('../inputs_extended/codes.csv', delimiter=',')[['English short name lower case', 'Alpha-3 code']]
df.rename(columns={'Alpha-3 code': 'LOCATION', 'English short name lower case': 'Country name'}, inplace=True)
df_v1 = pd.merge(main_df, df, on=['Country name'], how="outer").dropna(thresh=4)
df_v1 = df_v1.dropna().reset_index(drop=True)

In [5]:
# inflation
df_v2 = clean_data(df_v1, 'inflation', 'Inflation rate', True)

In [6]:
# adult education
df_v3 = clean_data(df_v2, 'adult-education', 'Adult education')
df_v3

Unnamed: 0,Country name,TIME,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect,LOCATION,Inflation rate,Adult education
0,Afghanistan,2008.0,3.723590,7.370100,0.450662,50.799999,0.718114,0.167640,0.881686,0.517637,0.258195,AFG,,
1,Afghanistan,2009.0,4.401778,7.539972,0.552308,51.200001,0.678896,0.190099,0.850035,0.583926,0.237092,AFG,,
2,Afghanistan,2010.0,4.758381,7.646709,0.539075,51.599998,0.600127,0.120590,0.706766,0.618265,0.275324,AFG,,
3,Afghanistan,2011.0,3.831719,7.619532,0.521104,51.919998,0.495901,0.162427,0.731109,0.611387,0.267175,AFG,,
4,Afghanistan,2012.0,3.782938,7.705479,0.520637,52.240002,0.530935,0.236032,0.775620,0.710385,0.267919,AFG,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1640,Zimbabwe,2016.0,3.735400,7.984372,0.768425,54.400002,0.732971,-0.094634,0.723612,0.737636,0.208555,ZWE,,
1641,Zimbabwe,2017.0,3.638300,8.015738,0.754147,55.000000,0.752826,-0.097645,0.751208,0.806428,0.224051,ZWE,,
1642,Zimbabwe,2018.0,3.616480,8.048798,0.775388,55.599998,0.762675,-0.068427,0.844209,0.710119,0.211726,ZWE,,
1643,Zimbabwe,2019.0,2.693523,7.950132,0.759162,56.200001,0.631908,-0.063791,0.830652,0.716004,0.235354,ZWE,,


In [7]:
# housing prices
df_v4 = clean_data(df_v3, 'housing-prices', 'Housing prices', True)
df_v4

Unnamed: 0,Country name,TIME,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect,LOCATION,Inflation rate,Adult education,Housing prices
0,Afghanistan,2008.0,3.723590,7.370100,0.450662,50.799999,0.718114,0.167640,0.881686,0.517637,0.258195,AFG,,,
1,Afghanistan,2009.0,4.401778,7.539972,0.552308,51.200001,0.678896,0.190099,0.850035,0.583926,0.237092,AFG,,,
2,Afghanistan,2010.0,4.758381,7.646709,0.539075,51.599998,0.600127,0.120590,0.706766,0.618265,0.275324,AFG,,,
3,Afghanistan,2011.0,3.831719,7.619532,0.521104,51.919998,0.495901,0.162427,0.731109,0.611387,0.267175,AFG,,,
4,Afghanistan,2012.0,3.782938,7.705479,0.520637,52.240002,0.530935,0.236032,0.775620,0.710385,0.267919,AFG,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1640,Zimbabwe,2016.0,3.735400,7.984372,0.768425,54.400002,0.732971,-0.094634,0.723612,0.737636,0.208555,ZWE,,,
1641,Zimbabwe,2017.0,3.638300,8.015738,0.754147,55.000000,0.752826,-0.097645,0.751208,0.806428,0.224051,ZWE,,,
1642,Zimbabwe,2018.0,3.616480,8.048798,0.775388,55.599998,0.762675,-0.068427,0.844209,0.710119,0.211726,ZWE,,,
1643,Zimbabwe,2019.0,2.693523,7.950132,0.759162,56.200001,0.631908,-0.063791,0.830652,0.716004,0.235354,ZWE,,,


In [8]:
# adult education
df_v5 = clean_data(df_v4, 'trust-in-gov', 'Trust in government')

In [9]:
# adult education
df_v6 = clean_data(df_v5, 'self-employment', 'Self-employment rate')

In [10]:
# poverty rates
df_v7 = clean_data(df_v6, 'poverty', 'Poverty rate')

In [11]:
# unemployed youth rates
df_v8 = clean_data(df_v7, 'unemployed-youth', 'Unemployed youth rate')

In [12]:
# housing overcrowding
df = clean_data(df_v8, 'overcrowding', 'Households overcrowding')

In [13]:
# Access to the internet on any device
df = clean_data(df, 'netaccess', 'Internet access')
df

Unnamed: 0,Country name,TIME,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,LOCATION,Inflation rate,Adult education,Housing prices,Trust in government,Self-employment rate,Poverty rate,Unemployed youth rate,Households overcrowding,Internet access
0,Afghanistan,2008.0,3.723590,7.370100,0.450662,50.799999,0.718114,0.167640,0.881686,0.517637,...,AFG,,,,,,,,,
1,Afghanistan,2009.0,4.401778,7.539972,0.552308,51.200001,0.678896,0.190099,0.850035,0.583926,...,AFG,,,,,,,,,
2,Afghanistan,2010.0,4.758381,7.646709,0.539075,51.599998,0.600127,0.120590,0.706766,0.618265,...,AFG,,,,,,,,,
3,Afghanistan,2011.0,3.831719,7.619532,0.521104,51.919998,0.495901,0.162427,0.731109,0.611387,...,AFG,,,,,,,,,
4,Afghanistan,2012.0,3.782938,7.705479,0.520637,52.240002,0.530935,0.236032,0.775620,0.710385,...,AFG,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1640,Zimbabwe,2016.0,3.735400,7.984372,0.768425,54.400002,0.732971,-0.094634,0.723612,0.737636,...,ZWE,,,,,,,,,
1641,Zimbabwe,2017.0,3.638300,8.015738,0.754147,55.000000,0.752826,-0.097645,0.751208,0.806428,...,ZWE,,,,,,,,,
1642,Zimbabwe,2018.0,3.616480,8.048798,0.775388,55.599998,0.762675,-0.068427,0.844209,0.710119,...,ZWE,,,,,,,,,
1643,Zimbabwe,2019.0,2.693523,7.950132,0.759162,56.200001,0.631908,-0.063791,0.830652,0.716004,...,ZWE,,,,,,,,,


In [14]:
# Access to a computer in a house
df = clean_data(df, 'comps', 'Home access to computers')

In [15]:
# the proportion of previous in-work household income
df = clean_data(df, 'benefitsunemp', 'Unemployment benefits')
df

Unnamed: 0,Country name,TIME,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,Adult education,Housing prices,Trust in government,Self-employment rate,Poverty rate,Unemployed youth rate,Households overcrowding,Internet access,Home access to computers,Unemployment benefits
0,Afghanistan,2008.0,3.723590,7.370100,0.450662,50.799999,0.718114,0.167640,0.881686,0.517637,...,,,,,,,,,,
1,Afghanistan,2009.0,4.401778,7.539972,0.552308,51.200001,0.678896,0.190099,0.850035,0.583926,...,,,,,,,,,,
2,Afghanistan,2010.0,4.758381,7.646709,0.539075,51.599998,0.600127,0.120590,0.706766,0.618265,...,,,,,,,,,,
3,Afghanistan,2011.0,3.831719,7.619532,0.521104,51.919998,0.495901,0.162427,0.731109,0.611387,...,,,,,,,,,,
4,Afghanistan,2012.0,3.782938,7.705479,0.520637,52.240002,0.530935,0.236032,0.775620,0.710385,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1640,Zimbabwe,2016.0,3.735400,7.984372,0.768425,54.400002,0.732971,-0.094634,0.723612,0.737636,...,,,,,,,,,,
1641,Zimbabwe,2017.0,3.638300,8.015738,0.754147,55.000000,0.752826,-0.097645,0.751208,0.806428,...,,,,,,,,,,
1642,Zimbabwe,2018.0,3.616480,8.048798,0.775388,55.599998,0.762675,-0.068427,0.844209,0.710119,...,,,,,,,,,,
1643,Zimbabwe,2019.0,2.693523,7.950132,0.759162,56.200001,0.631908,-0.063791,0.830652,0.716004,...,,,,,,,,,,


In [16]:
# Wages on average in unified currency (USD)
df = clean_data(df, 'averagewages', 'Average wage')
df

Unnamed: 0,Country name,TIME,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,Housing prices,Trust in government,Self-employment rate,Poverty rate,Unemployed youth rate,Households overcrowding,Internet access,Home access to computers,Unemployment benefits,Average wage
0,Afghanistan,2008.0,3.723590,7.370100,0.450662,50.799999,0.718114,0.167640,0.881686,0.517637,...,,,,,,,,,,
1,Afghanistan,2009.0,4.401778,7.539972,0.552308,51.200001,0.678896,0.190099,0.850035,0.583926,...,,,,,,,,,,
2,Afghanistan,2010.0,4.758381,7.646709,0.539075,51.599998,0.600127,0.120590,0.706766,0.618265,...,,,,,,,,,,
3,Afghanistan,2011.0,3.831719,7.619532,0.521104,51.919998,0.495901,0.162427,0.731109,0.611387,...,,,,,,,,,,
4,Afghanistan,2012.0,3.782938,7.705479,0.520637,52.240002,0.530935,0.236032,0.775620,0.710385,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1640,Zimbabwe,2016.0,3.735400,7.984372,0.768425,54.400002,0.732971,-0.094634,0.723612,0.737636,...,,,,,,,,,,
1641,Zimbabwe,2017.0,3.638300,8.015738,0.754147,55.000000,0.752826,-0.097645,0.751208,0.806428,...,,,,,,,,,,
1642,Zimbabwe,2018.0,3.616480,8.048798,0.775388,55.599998,0.762675,-0.068427,0.844209,0.710119,...,,,,,,,,,,
1643,Zimbabwe,2019.0,2.693523,7.950132,0.759162,56.200001,0.631908,-0.063791,0.830652,0.716004,...,,,,,,,,,,


In [17]:
# consumption of health care goods and services
df = clean_data(df, 'healthspending', 'Health spending')
df

Unnamed: 0,Country name,TIME,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,Trust in government,Self-employment rate,Poverty rate,Unemployed youth rate,Households overcrowding,Internet access,Home access to computers,Unemployment benefits,Average wage,Health spending
0,Afghanistan,2008.0,3.723590,7.370100,0.450662,50.799999,0.718114,0.167640,0.881686,0.517637,...,,,,,,,,,,
1,Afghanistan,2009.0,4.401778,7.539972,0.552308,51.200001,0.678896,0.190099,0.850035,0.583926,...,,,,,,,,,,
2,Afghanistan,2010.0,4.758381,7.646709,0.539075,51.599998,0.600127,0.120590,0.706766,0.618265,...,,,,,,,,,,
3,Afghanistan,2011.0,3.831719,7.619532,0.521104,51.919998,0.495901,0.162427,0.731109,0.611387,...,,,,,,,,,,
4,Afghanistan,2012.0,3.782938,7.705479,0.520637,52.240002,0.530935,0.236032,0.775620,0.710385,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1640,Zimbabwe,2016.0,3.735400,7.984372,0.768425,54.400002,0.732971,-0.094634,0.723612,0.737636,...,,,,,,,,,,
1641,Zimbabwe,2017.0,3.638300,8.015738,0.754147,55.000000,0.752826,-0.097645,0.751208,0.806428,...,,,,,,,,,,
1642,Zimbabwe,2018.0,3.616480,8.048798,0.775388,55.599998,0.762675,-0.068427,0.844209,0.710119,...,,,,,,,,,,
1643,Zimbabwe,2019.0,2.693523,7.950132,0.759162,56.200001,0.631908,-0.063791,0.830652,0.716004,...,,,,,,,,,,


In [18]:
df.rename(columns={'Country name': 'Country', 'TIME': 'Year', 'Life Ladder': 'Happiness index', 'Log GDP per capita': 'GDP per capita', 'LOCATION': 'Country code'}, inplace=True)
df

Unnamed: 0,Country,Year,Happiness index,GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,Trust in government,Self-employment rate,Poverty rate,Unemployed youth rate,Households overcrowding,Internet access,Home access to computers,Unemployment benefits,Average wage,Health spending
0,Afghanistan,2008.0,3.723590,7.370100,0.450662,50.799999,0.718114,0.167640,0.881686,0.517637,...,,,,,,,,,,
1,Afghanistan,2009.0,4.401778,7.539972,0.552308,51.200001,0.678896,0.190099,0.850035,0.583926,...,,,,,,,,,,
2,Afghanistan,2010.0,4.758381,7.646709,0.539075,51.599998,0.600127,0.120590,0.706766,0.618265,...,,,,,,,,,,
3,Afghanistan,2011.0,3.831719,7.619532,0.521104,51.919998,0.495901,0.162427,0.731109,0.611387,...,,,,,,,,,,
4,Afghanistan,2012.0,3.782938,7.705479,0.520637,52.240002,0.530935,0.236032,0.775620,0.710385,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1640,Zimbabwe,2016.0,3.735400,7.984372,0.768425,54.400002,0.732971,-0.094634,0.723612,0.737636,...,,,,,,,,,,
1641,Zimbabwe,2017.0,3.638300,8.015738,0.754147,55.000000,0.752826,-0.097645,0.751208,0.806428,...,,,,,,,,,,
1642,Zimbabwe,2018.0,3.616480,8.048798,0.775388,55.599998,0.762675,-0.068427,0.844209,0.710119,...,,,,,,,,,,
1643,Zimbabwe,2019.0,2.693523,7.950132,0.759162,56.200001,0.631908,-0.063791,0.830652,0.716004,...,,,,,,,,,,


In [19]:
df1 = df.copy()

In [20]:
countries = df1['Country'].unique()
cols = df1.columns.values.tolist()
df_v10 = pd.DataFrame(columns=cols)

# fill the NaN values with latest available value in the column for the country
for country in countries:
    temp_df = df.loc[df['Country'] == country]
    rows = len(temp_df)
    temp_df = temp_df.loc[:, cols].ffill()
    temp_df = temp_df.loc[:, cols].bfill()
    df_v10 = pd.concat([df_v10, temp_df])

# discard countries that did not hold anything in more than 4 added categories 
# fyi: 8 cols are filled for sure for each country by default, dictated by the base dataset
df_v10.dropna(thresh=17, inplace=True)
df_v10.reset_index(drop=True, inplace=True)
display(df_v10)

Unnamed: 0,Country,Year,Happiness index,GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,Trust in government,Self-employment rate,Poverty rate,Unemployed youth rate,Households overcrowding,Internet access,Home access to computers,Unemployment benefits,Average wage,Health spending
0,Australia,2007.0,7.285391,10.702894,0.965276,71.720001,0.890682,0.347052,0.512578,0.826251,...,60.975292,14.08298,0.129,6.48775,,67.0,75.0,36.0,50279.248277,8.068
1,Australia,2008.0,7.253757,10.71878,0.946635,71.879997,0.915733,0.30529,0.430811,0.826391,...,60.975292,13.71566,0.129,6.310053,,72.0,78.0,34.0,50684.769303,8.256
2,Australia,2010.0,7.450047,10.722262,0.95452,72.199997,0.932059,0.316744,0.366127,0.834236,...,60.975292,13.77405,0.129,8.059968,,78.92,82.57,32.0,51957.406114,8.431
3,Australia,2011.0,7.405616,10.732697,0.967029,72.300003,0.944586,0.36934,0.381772,0.81586,...,53.078672,13.2206,0.129,7.839816,,78.92,82.57,31.0,53600.679845,8.542
4,Australia,2012.0,7.195586,10.753672,0.944599,72.400002,0.935146,0.273635,0.368252,0.810742,...,42.041918,12.21689,0.129,7.231065,,83.0,82.57,31.0,53919.328244,8.676
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
550,United States,2016.0,6.8036,10.985777,0.896751,68.5,0.757893,0.144048,0.73892,0.805674,...,29.720566,7.466977,0.209,8.533521,4.244195,73.37,72.03,9.0,63941.857045,16.844
551,United States,2017.0,6.991759,11.001395,0.921003,68.400002,0.868497,0.197317,0.681191,0.826555,...,38.653502,7.109162,0.212,7.45546,4.18607,77.97,72.03,8.0,64618.452265,16.806
552,United States,2018.0,6.882685,11.025024,0.903856,68.300003,0.824607,0.116116,0.709928,0.815383,...,31.381609,7.262526,0.211,7.458875,4.188988,77.97,72.03,8.0,65302.940142,16.687
553,United States,2019.0,6.943701,11.043353,0.916691,68.199997,0.836139,0.144299,0.706716,0.814985,...,36.277444,6.994946,0.21,7.903465,4.236565,79.88,72.03,8.0,66382.505552,16.767


In [21]:
df_v10.isna().sum()

Country                               0
Year                                  0
Happiness index                       0
GDP per capita                        0
Social support                        0
Healthy life expectancy at birth      0
Freedom to make life choices          0
Generosity                            0
Perceptions of corruption             0
Positive affect                       0
Negative affect                       0
Country code                          0
Inflation rate                       14
Adult education                       0
Housing prices                        0
Trust in government                  14
Self-employment rate                 63
Poverty rate                         15
Unemployed youth rate                28
Households overcrowding             126
Internet access                      43
Home access to computers             43
Unemployment benefits                84
Average wage                         99
Health spending                       0


In [22]:
# since there is no value available in the entire column for a country, lets fill it with mean. 
# those countries may still hold valuable info for other columns so discarding them based on omne NaN value
# would be a loss of meaninful data.

mean_infl = df_v10['Inflation rate'].mean()
mean_gov_trust = df_v10['Trust in government'].mean()
mean_self_emp=df_v10['Self-employment rate'].mean()
mean_poverty=df_v10['Poverty rate'].mean()
mean_unemp_youth=df_v10['Unemployed youth rate'].mean()
mean_overcr=df_v10['Households overcrowding'].mean()
mean_childcare=df_v10['Home access to computers'].mean()
mean_internet=df_v10['Internet access'].mean()
mean_unempbenefits=df_v10['Unemployment benefits'].mean()
mean_wage=df_v10['Average wage'].mean()
mean_health=df_v10['Health spending'].mean()


df_v10['Trust in government'].fillna(mean_gov_trust, inplace=True)
df_v10['Self-employment rate'].fillna(mean_self_emp, inplace=True)
df_v10['Poverty rate'].fillna(mean_poverty, inplace=True)
df_v10['Unemployed youth rate'].fillna(mean_unemp_youth, inplace=True)
df_v10['Inflation rate'].fillna(mean_infl, inplace=True)
df_v10['Households overcrowding'].fillna(mean_overcr, inplace=True)
df_v10['Home access to computers'].fillna(mean_childcare, inplace=True)
df_v10['Internet access'].fillna(mean_internet, inplace=True)
df_v10['Unemployment benefits'].fillna(mean_unempbenefits, inplace=True)
df_v10['Average wage'].fillna(mean_wage, inplace=True)
df_v10['Health spending'].fillna(mean_health, inplace=True)


In [23]:
df_v10.isna().sum()

Country                             0
Year                                0
Happiness index                     0
GDP per capita                      0
Social support                      0
Healthy life expectancy at birth    0
Freedom to make life choices        0
Generosity                          0
Perceptions of corruption           0
Positive affect                     0
Negative affect                     0
Country code                        0
Inflation rate                      0
Adult education                     0
Housing prices                      0
Trust in government                 0
Self-employment rate                0
Poverty rate                        0
Unemployed youth rate               0
Households overcrowding             0
Internet access                     0
Home access to computers            0
Unemployment benefits               0
Average wage                        0
Health spending                     0
dtype: int64

In [24]:
df_v10['Year'] = df['Year'].astype(int)
df_v10['Happy country'] = (df['Happiness index'] > 6).astype(bool).astype(int)
df_v10.sort_values(by=['Year', 'Country'], inplace=True)
#df_v10.drop('Country code', axis=1, inplace=True)
df_v10.reset_index(drop=True, inplace=True)
df_v10['Happy country'].value_counts()
df_v10

Unnamed: 0,Country,Year,Happiness index,GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,...,Self-employment rate,Poverty rate,Unemployed youth rate,Households overcrowding,Internet access,Home access to computers,Unemployment benefits,Average wage,Health spending,Happy country
0,Italy,2005,5.954524,10.622244,0.927213,73.199997,0.623742,-0.080541,0.902801,0.685423,...,28.149700,0.173000,9.351012,18.734644,78.510000,72.543600,59.000000,40192.935976,8.725,1
1,Belgium,2006,6.948936,10.81917,0.928964,71.599998,0.865759,-0.055826,0.496659,0.76459,...,18.449340,0.092000,4.333516,2.639921,84.785700,82.091500,81.000000,55525.989944,10.773,1
2,Brazil,2006,6.190922,9.58852,0.881505,66.400002,0.750609,-0.117002,0.763251,0.749728,...,36.786170,0.341000,18.686466,11.405780,66.662982,41.758582,42.076433,40640.148718,9.465,0
3,Chile,2006,6.579056,10.074142,0.841388,69.699997,0.65229,0.102443,0.858125,0.869229,...,25.639640,0.211000,11.449327,9.253412,79.297795,63.629382,37.000000,28252.106420,8.519,1
4,Colombia,2006,6.448789,9.546183,0.907403,66.580002,0.801191,-0.090322,0.886646,0.84708,...,53.237450,0.151515,18.539675,11.405780,37.980998,44.466586,42.076433,40640.148718,1.170,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
550,Sweden,2020,7.265977,10.740421,0.902533,71.760002,0.864005,0.220528,0.292112,0.819678,...,14.740480,0.090000,5.543694,12.030388,85.982300,87.579900,67.000000,41016.965655,8.770,1
551,Sweden,2020,7.286805,10.873111,0.914017,72.5,0.934582,0.170274,0.239367,0.813548,...,13.568590,0.093000,3.632011,13.328673,94.727300,92.816400,63.000000,45680.513436,10.785,0
552,Switzerland,2020,7.508435,11.080893,0.946316,74.699997,0.917343,-0.063502,0.280367,0.768705,...,16.207310,0.108000,5.271247,5.054440,95.509064,90.250000,72.000000,64824.381095,11.291,1
553,Turkey,2020,4.861554,10.219084,0.85673,67.599998,0.510386,-0.110889,0.774417,0.384292,...,29.791150,0.227000,17.049297,11.405780,90.731600,49.999900,0.000000,40640.148718,4.344,1


In [25]:
# final export
df_v10.to_csv('../cleaned_data/clean_v4_fillednans.csv', index=False)

relative povert
GDP
extend. by years(?)
quality of life in eiuropean cities