## Imports

In [15]:
import numpy as np
import pandas as pd
import life_quality_and_government.utils.paths as path

# Load Data

In [16]:
lqi_dir = path.data_raw_dir('LQI_2022.csv')
lqi=pd.read_csv(lqi_dir)  # Life Quality indez Numbeo




# View and prepare

In [17]:
lqi.head()

Unnamed: 0,Rank,Country,Quality of Life Index,Purchasing Power Index,Safety Index,Health Care Index,Cost of Living Index,Property Price to Income Ratio,Traffic Commute Time Index,Pollution Index,Climate Index
0,1,Switzerland,195.27,118.44,78.32,74.85,123.35,8.29,28.5,19.59,80.21
1,2,Denmark,192.36,99.45,73.44,80.07,84.12,6.78,28.52,20.97,81.8
2,3,Netherlands,185.38,87.99,72.12,75.56,75.66,7.15,27.4,25.07,87.11
3,4,Finland,184.96,91.02,72.75,76.31,73.2,7.95,27.8,12.09,56.64
4,5,Australia,183.81,104.63,56.15,78.14,77.75,7.2,34.78,23.85,92.7


In [18]:
# Dropping extra columns and renaming other ones to match with the standard

try:
    lqi.drop('Rank', axis=1, inplace=True)
except:
    print("This columns does not exist")
lqi.rename(columns={"Country":"Country Name", "Quality of Life Index":"LQI"}, inplace=True)
lqi.head()

Unnamed: 0,Country Name,LQI,Purchasing Power Index,Safety Index,Health Care Index,Cost of Living Index,Property Price to Income Ratio,Traffic Commute Time Index,Pollution Index,Climate Index
0,Switzerland,195.27,118.44,78.32,74.85,123.35,8.29,28.5,19.59,80.21
1,Denmark,192.36,99.45,73.44,80.07,84.12,6.78,28.52,20.97,81.8
2,Netherlands,185.38,87.99,72.12,75.56,75.66,7.15,27.4,25.07,87.11
3,Finland,184.96,91.02,72.75,76.31,73.2,7.95,27.8,12.09,56.64
4,Australia,183.81,104.63,56.15,78.14,77.75,7.2,34.78,23.85,92.7


In [19]:
# I will scale all the LQI between 0 and 1 so I can keep the graphs between 0 and 1.

range_2 = lqi['LQI'].max()-lqi['LQI'].min()
min_1 = lqi['LQI'].min()
lqi['LQI'] = (lqi['LQI'] - min_1) / (range_2)
lqi[['Country Name','LQI']]

Unnamed: 0,Country Name,LQI
0,Switzerland,1.000000
1,Denmark,0.979626
2,Netherlands,0.930757
3,Finland,0.927816
4,Australia,0.919765
...,...,...
82,Venezuela,0.174963
83,Sri Lanka,0.108101
84,Bangladesh,0.106070
85,Iran,0.087167


In [20]:
# I will add a 'Mean' row for future plots
#lqi.mean(axis=0)
lqi_len = len(lqi.index)
lqi_means = list()
lqi_medians = list()
for c in lqi.columns:
    if lqi[c].dtype != object:
        lqi_means.append(lqi[c].mean())
        lqi_medians.append(lqi[c].median())
    else:
        lqi_means.append('Mean')
        lqi_medians.append('Median')

lqi.loc[lqi_len] = lqi_means
lqi.loc[lqi_len+1] = lqi_medians
lqi.drop_duplicates(subset=['Country Name'], inplace=True)
lqi

Unnamed: 0,Country Name,LQI,Purchasing Power Index,Safety Index,Health Care Index,Cost of Living Index,Property Price to Income Ratio,Traffic Commute Time Index,Pollution Index,Climate Index
0,Switzerland,1.000000,118.440000,78.320000,74.850000,123.350000,8.290000,28.500000,19.590000,80.210000
1,Denmark,0.979626,99.450000,73.440000,80.070000,84.120000,6.780000,28.520000,20.970000,81.800000
2,Netherlands,0.930757,87.990000,72.120000,75.560000,75.660000,7.150000,27.400000,25.070000,87.110000
3,Finland,0.927816,91.020000,72.750000,76.310000,73.200000,7.950000,27.800000,12.090000,56.640000
4,Australia,0.919765,104.630000,56.150000,78.140000,77.750000,7.200000,34.780000,23.850000,92.700000
...,...,...,...,...,...,...,...,...,...,...
84,Bangladesh,0.106070,25.300000,36.550000,42.250000,33.130000,13.540000,55.920000,84.990000,71.290000
85,Iran,0.087167,17.980000,50.890000,52.300000,37.390000,33.000000,47.120000,75.400000,65.690000
86,Nigeria,0.000000,9.340000,36.160000,48.490000,30.490000,16.110000,61.970000,88.320000,60.750000
87,Mean,0.557843,55.884828,59.274598,64.473103,52.019425,13.238621,35.825747,53.128506,78.327586


In [21]:
lqi.to_csv(path.data_interim_dir('lqi_processed.csv'), index=False)