In [None]:
import pandas as pd
import pycountry
import numpy as np
import json
import re
from pprint import pprint

### Extract CSVs into DataFrames

In [None]:
# Import csv into happiness_df
happiness_file = "Resources/world-happiness-report.csv"
happiness_df = pd.read_csv(happiness_file)

### Transform DataFrames

In [None]:
# Filter happiness_df for years above 2011
happiness_df = happiness_df[happiness_df.year >= 2011]
happiness_df

In [None]:
# Rename and select the column headers
#happiness_cols = ["Country name", "year", "Life Ladder", "Social support", "Freedom to make life choices", "Perceptions of corruption"]
happiness_transformed = happiness_df.rename(columns={"Country name": "country",
                                                    "year": "year",
                                                    "Life Ladder": "happiness_rating",
                                                     "Log GDP per capita": "gdp_per_capita",
                                                    "Social support": "social_support",
                                                     "Healthy life expectancy at birth": "life_expectancy",
                                                    "Freedom to make life choices": "freedom",
                                                     "Generosity": "generosity",
                                                    "Perceptions of corruption": "corruption"})
#happiness_df_cols = ["country", "happiness_rating", "social_support", "freedom", "corruption"]
#happiness_transformed = happiness_transformed[happiness_df_cols].copy()
happiness_transformed = happiness_transformed.drop(["Positive affect", "Negative affect"], axis=1).reset_index()
happiness_transformed = happiness_transformed.drop(["index"], axis=1)
happiness_transformed

In [None]:
# Define a function to add the alpha_2 id code corresponding to each country name
def findCountry (country_name):
    try:
        return pycountry.countries.get(name=country_name).alpha_2
    except:
        return ("Not found")

In [None]:
happiness_transformed["id"] = happiness_transformed.apply(lambda row: findCountry(row.country), axis = 1)
happiness_master = happiness_transformed.drop(['id'], axis = 1)
happiness_master

In [None]:
happiness_data = happiness_master.drop(columns=['social_support', 'life_expectancy', 'freedom', 'generosity', 'corruption'])
happiness_data

In [None]:
# Create a master country list with alpha_2 id codes
country_id_cols = ["id", "country", "year", "happiness_rating", "gdp_per_capita"]
country_id = happiness_transformed[country_id_cols].copy().drop_duplicates()
country_id

In [None]:
new_country_id_missing = country_id.groupby(["id", "country", "year"], as_index=True)[['happiness_rating', 'gdp_per_capita']].mean()
new_country_id_missing

In [None]:
new_country_id_missing_rnd = new_country_id_missing.round(1)

In [None]:
new_country_id_missing_rnd.to_csv('Resources/new_country_id_missing.csv')

### Create database connection

In [None]:
new_country_id_filled = pd.read_csv('Resources/new_country_id_filled.csv')
new_country_id_filled

In [None]:
master_country_id_filled = new_country_id_filled.set_index('id')

In [None]:
y2k11 = master_country_id_filled.loc[master_country_id_filled["year"] == 2011, :]


In [None]:
y2k11_df = y2k11.drop(['year'], axis=1)
y2k11_df

In [None]:
y2k12 = master_country_id_filled.loc[master_country_id_filled["year"] == 2012, :]
y2k12_df = y2k12.drop(['year'], axis=1)

In [None]:
y2k13 = master_country_id_filled.loc[master_country_id_filled["year"] == 2013, :]
y2k13_df = y2k13.drop(['year'], axis=1)

In [None]:
y2k14 = master_country_id_filled.loc[master_country_id_filled["year"] == 2014, :]
y2k14_df = y2k14.drop(['year'], axis=1)


In [None]:
y2k15 = master_country_id_filled.loc[master_country_id_filled["year"] == 2015, :]
y2k15_df = y2k15.drop(['year'], axis=1)


In [None]:
y2k16 = master_country_id_filled.loc[master_country_id_filled["year"] == 2016, :]
y2k16_df = y2k16.drop(['year'], axis=1)


In [None]:
y2k17 = master_country_id_filled.loc[master_country_id_filled["year"] == 2017, :]
y2k17_df = y2k17.drop(['year'], axis=1)


In [None]:
y2k18 = master_country_id_filled.loc[master_country_id_filled["year"] == 2018, :]
y2k18_df = y2k18.drop(['year'], axis=1)


In [None]:
y2k19 = master_country_id_filled.loc[master_country_id_filled["year"] == 2019, :]
y2k19_df = y2k19.drop(['year'], axis=1)


In [None]:
y2k20 = master_country_id_filled.loc[master_country_id_filled["year"] == 2020, :]
y2k20_df = y2k20.drop(['year'], axis=1)


In [None]:
with open('2011.json', 'w') as f:
    f.write('var y2k11 = [\n')
    for i,j in y2k11_df.T.to_dict().items():
        f.write( json.dumps(i)+ ": { "+re.sub(r'[{"\']', '', json.dumps(j))+',\n')
    f.write('];')

In [None]:
with open('2012.json', 'w') as f:
    f.write('var y2k12 = [\n')
    for i,j in y2k12_df.T.to_dict().items():
        f.write("{id:"+json.dumps(i)+", "+re.sub(r'[{"\']', '', json.dumps(j))+',\n')
    f.write('];')

In [None]:
with open('2013.json', 'w') as f:
    f.write('var y2k13 = [\n')
    for i,j in y2k13_df.T.to_dict().items():
        f.write("{id:"+json.dumps(i)+", "+re.sub(r'[{"\']', '', json.dumps(j))+',\n')
    f.write('];')

In [None]:
with open('2014.json', 'w') as f:
    f.write('var y2k14 = [\n')
    for i,j in y2k14_df.T.to_dict().items():
        f.write("{id:"+json.dumps(i)+", "+re.sub(r'[{"\']', '', json.dumps(j))+',\n')
    f.write('];')

In [None]:
with open('2015.json', 'w') as f:
    f.write('var y2k15 = [\n')
    for i,j in y2k15_df.T.to_dict().items():
        f.write("{id:"+json.dumps(i)+", "+re.sub(r'[{"\']', '', json.dumps(j))+',\n')
    f.write('];')

In [None]:
with open('2016.json', 'w') as f:
    f.write('var y2k16 = [\n')
    for i,j in y2k16_df.T.to_dict().items():
        f.write("{id:"+json.dumps(i)+", "+re.sub(r'[{"\']', '', json.dumps(j))+',\n')
    f.write('];')

In [None]:
with open('2017.json', 'w') as f:
    f.write('var y2k17 = [\n')
    for i,j in y2k17_df.T.to_dict().items():
        f.write("{id:"+json.dumps(i)+", "+re.sub(r'[{"\']', '', json.dumps(j))+',\n')
    f.write('];')

In [None]:
with open('2018.json', 'w') as f:
    f.write('var y2k18 = [\n')
    for i,j in y2k18_df.T.to_dict().items():
        f.write("{id:"+json.dumps(i)+", "+re.sub(r'[{"\']', '', json.dumps(j))+',\n')
    f.write('];')

In [None]:
with open('2019.json', 'w') as f:
    f.write('var y2k19 = [\n')
    for i,j in y2k19_df.T.to_dict().items():
        f.write("{id:"+json.dumps(i)+", "+re.sub(r'[{"\']', '', json.dumps(j))+',\n')
    f.write('];')

In [None]:
with open('2020.json', 'w') as f:
    f.write('var y2k20 = [\n')
    for i,j in y2k20_df.T.to_dict().items():
        f.write("{id:"+json.dumps(i)+", "+re.sub(r'[{"\']', '', json.dumps(j))+',\n')
    f.write('];')

In [None]:
# data for Geochart
#master_df = new_country_id_filled.drop(columns=['id', 'gdp_per_capita'])
#master_df

In [None]:
happiness_list = new_country_id_filled.values.tolist()
#happiness_list

In [None]:
happiness_dict = new_dict_df.to_dict()
#happiness_dict

In [None]:
# Year lists to create a map for each year

In [None]:
happiness_2011_list = y2k11_df.values.tolist()
#happiness_2011_list

In [None]:
happiness_2012_list = y2k12_df.values.tolist()
#happiness_2012_list

In [None]:
happiness_2013_list = y2k13_df.values.tolist()
#happiness_2013_list

In [None]:
happiness_2014_list = y2k14_df.values.tolist()
#happiness_2014_list

In [None]:
happiness_2015_list = y2k15_df.values.tolist()
#happiness_2015_list

In [None]:
happiness_2016_list = y2k16_df.values.tolist()
#happiness_2016_list

In [None]:
happiness_2017_list = y2k17_df.values.tolist()
#happiness_2017_list

In [None]:
happiness_2018_list = y2k18_df.values.tolist()
#happiness_2018_list

In [None]:
happiness_2019_list = y2k19_df.values.tolist()
#happiness_2019_list

In [None]:
happiness_2020_list = y2k20_df.values.tolist()
#happiness_2020_list