In [1]:
import numpy as np
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

pd.options.display.max_columns = None

## Importings

In [2]:
DATASET_PATH = '../datasets/'

df = pd.read_csv(DATASET_PATH + 'df_abt.csv')
df_desc = pd.read_excel('Column descriptions.xlsx')
df_efa = pd.read_csv('EFA_2.csv')
df_wb = pd.read_csv('wellbeing_by_country_3.csv')

# create dictionary of column name and its description
dict_desc = dict(zip(df_desc['Column Name'], df_desc['Description']))

## Vega plots

- Datapreps for creating dataframe for vega / vega-lite

### Trust aspects

In [7]:
df_desc.head()

Unnamed: 0,Column Name,Description,scale,scale description
0,stflife,How satisfied with life as a whole,0-1-2-3-4-…10,10 extremely satisfied
1,happy,How happy are you,0-1-2-3-4-…10,10 extremely happy
2,fltsd,"Felt sad, how often past week",1-2-3-4,4 most of the time
3,fltdpr,"felt depressed, how often past week",1-2-3-4,4 most of the time
4,enjlf,"Enjoyed life, how often past week",1-2-3-4,4 most of the time


In [6]:
trust_cols = [i for i in df_desc['Description'] if 'trust in' in i.lower()]

In [7]:
trust_cols = df_desc[df_desc['Description'].isin(trust_cols)]['Column Name'].tolist()

In [8]:
trust_dict = df_desc.loc[df_desc['Column Name'].isin(trust_cols)].set_index('Column Name')['Description'].to_dict()

In [9]:
df_trust = df[trust_cols].rename(columns=trust_dict)

In [10]:
df_trust = df_trust.stack().to_frame().reset_index().drop(columns=['level_0'])

In [11]:
df_trust.columns = ['Trust', 'Values']

In [12]:
df_trust.head()

Unnamed: 0,Trust,Values
0,Trust in the European Parliament,2
1,Trust in the legal system,0
2,Trust in the police,2
3,Trust in politicians,0
4,Trust in country's parliament,0


In [13]:
df_trust = df_trust[~df_trust['Values'].isin(['77', '88', '99'])]

In [14]:
df_trust['Values'].unique()

array([ 2,  0, 10,  8,  5,  7,  1,  9,  6,  4,  3], dtype=int64)

In [129]:
df_trust.to_csv('datasets/trust_aspects.csv')

**Gist can be found at:** https://gist.github.com/ossamum/94a10bec61d48e0263b026957539ac17

### Happiness by country

In [15]:
df_country = df[['cntry', 'happy']].copy()

In [16]:
df_country = df_country[df_country['happy'] <= 10]

In [17]:
from geopy.exc import GeocoderTimedOut
from geopy.geocoders import Nominatim
from tqdm import tqdm

In [166]:
longitude = []
latitude = []
   
# function to find the coordinate of a given city 
def findGeocode(city):
       
    # try and catch is used to overcome
    # the exception thrown by geolocator
    # using geocodertimedout  
    try:
          
        # Specify the user_agent as your
        # app name it should not be none
        geolocator = Nominatim(user_agent="your_app_name")
          
        return geolocator.geocode(city)
      
    except GeocoderTimedOut:
          
        return findGeocode(city)    
  

In [167]:
countries = df_country['cntry'].unique().tolist()

In [156]:
for i in tqdm(countries):
      
    if findGeocode(i) != None:
           
        loc = findGeocode(i)
          
        # coordinates returned from 
        # function is stored into
        # two separate list
        latitude.append(loc.latitude)
        longitude.append(loc.longitude)
       
    # if coordinate for a city not
    # found, insert "NaN" indicating 
    # missing value 
    else:
        latitude.append(np.nan)
        longitude.append(np.nan)

100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [00:46<00:00,  1.60s/it]


In [159]:
df_country_latlon = pd.DataFrame({'cntry': countries,
                                  'latitude': latitude,
                                  'longitude': longitude})

In [198]:
df_country = pd.merge(df_country, df_country_latlon)
df_country.head()

Unnamed: 0,cntry,happy,latitude,longitude
0,Albania,8,41.000028,19.999962
1,Albania,0,41.000028,19.999962
2,Albania,5,41.000028,19.999962
3,Albania,4,41.000028,19.999962
4,Albania,7,41.000028,19.999962


In [202]:
df_country.groupby(['cntry', 'latitude', 'longitude'],
                   as_index=False)['happy'].mean().to_csv('datasets/country_happiness.csv', index=False)