# Statistics of Country

This data is derived from [UN Human Development Report](https://hdr.undp.org/en/content/human-development-index-hdi). It provides scores and rankings related to the human for each country.

## Necessary Country List

In [1]:
import pandas as pd
pd.options.display.max_columns = None

import numpy as np

UNIV_PATHNAME = '../univ-program/university.csv'

df_univ = pd.read_csv(UNIV_PATHNAME)
df_univ.head()

Unnamed: 0,sequence,name,region,country,city,official-link,established,student-number,faculty-number,campus-size,map-iframe
0,1,GE3(Global Engineering Education Exchange),Others,,,https://globale3.studioabroad.com,,,,,
1,2,ISEP(International Student Exchange Programs),Others,,,,,,,,
2,3,University of Twente,Europe,Netherlands,Enschede,http://www.utwente.nl/,1961.0,12544.0,3150.0,146.0,https://www.google.com/maps/embed?pb=!1m18!1m1...
3,4,Amsterdam University of Applied Sciences,Europe,Netherlands,,,,,,,
4,5,Amsterdam University of Applied Sciences,Europe,Netherlands,,,,,,,


In [2]:
country_list = pd.Series(df_univ['country'].dropna().unique(), name='country')
country_list.sort_values(ignore_index=True, inplace=True)

country_list = country_list.to_frame()
country_list.head()
# country_list.to_csv('./country-list.csv', index=False, header=False)

Unnamed: 0,country
0,Australia
1,Austria
2,Belgium
3,Brazil
4,Brunei


## Extract Statistics

In [3]:
STAT_PATHNAME = './country-stat-all.csv'
df_stat = pd.read_csv(STAT_PATHNAME)
df_stat.head()

Unnamed: 0,hdi-rank,group,label,gii-value,gii-rank,gii-parliament,population,life-expectancy,health-expend,edu-secondary,edu-tertiary,gdp-total,gdp-capita,price-index,labor-rate,unemployment,homicide,suicide-female,suicide-male,trade,foreign-invest,intl-student,intl-tourist,fossil-fuel,co2-gdp
0,1.0,Countries,Norway,0.045,6.0,40.828,5.379,70.837,10.446,94.921,81.992,340.303,63633.102,120.27,63.804,3.346,0.468,6.5,13.6,72.147,0.445,-3.079,5688,56.952,0.109
1,2.0,Countries,Ireland,0.093,23.0,24.312,4.882,70.352,7.184,81.12,77.781,428.825,86781.39,106.584,62.067,4.928,0.872,4.2,17.6,239.215,-20.393,2.24,10926,85.343,0.112
2,2.0,Countries,Switzerland,0.025,1.0,38.618,8.591,72.024,12.346,96.149,59.563,588.472,68627.829,99.547,68.253,4.581,0.586,6.9,15.8,119.439,-2.615,12.985,10362,50.168,0.08
3,4.0,Countries,"Hong Kong, China (SAR)",,,,7.436,,,79.4,76.922,449.64,59892.866,134.593,60.073,3.629,0.651,,,352.822,14.526,-0.697,29263,93.173,0.108
4,4.0,Countries,Iceland,0.058,9.0,38.095,0.339,72.315,8.326,100.0,71.845,20.188,55873.951,129.003,75.033,2.842,0.891,4.7,21.7,85.917,-1.801,-7.831,2344,11.289,0.135


In [4]:
# Get unmatched countries
bools = ~country_list['country'].isin(df_stat[df_stat['group'] == 'Countries']['label'])
country_list[bools]

Unnamed: 0,country
4,Brunei
11,Hong Kong
22,Swiss
24,UK
27,United States of America


In [5]:
# Update statistic's university name
mapper = {
    'Brunei Darussalam': 'Brunei',
    'Hong Kong, China (SAR)': 'Hong Kong',
    'Switzerland': 'Swiss',
    'United Kingdom': 'UK',
    'United States': 'United States of America'
}

for k, v in mapper.items():
    idx = df_stat[df_stat['label'] == k].index[0]
    df_stat.loc[idx, 'label'] = v

In [6]:
# Assertion
assert country_list['country'].isin(df_stat[df_stat['group'] == 'Countries']['label']).all()

In [8]:
df_ctry = pd.merge(left=country_list, right=df_stat, how='inner',
                  left_on='country', right_on='label')

assert df_ctry.shape[0] == country_list.shape[0]

df_ctry.tail()

Unnamed: 0,country,hdi-rank,group,label,gii-value,gii-rank,gii-parliament,population,life-expectancy,health-expend,edu-secondary,edu-tertiary,gdp-total,gdp-capita,price-index,labor-rate,unemployment,homicide,suicide-female,suicide-male,trade,foreign-invest,intl-student,intl-tourist,fossil-fuel,co2-gdp
23,Turkey,54.0,Countries,Turkey,0.306,68.0,17.45,83.43,67.814,4.216,60.346,,2349.995,28167.397,234.437,52.829,13.49,2.59,3.2,11.3,61.395,1.117,0.859,45768,86.843,0.192
24,UK,13.0,Countries,UK,0.118,31.0,28.909,67.53,69.109,9.632,78.799,59.996,3121.12,46699.302,119.623,62.774,3.851,1.205,3.5,11.9,64.254,0.956,16.468,36316,80.352,0.138
25,Ukraine,74.0,Countries,Ukraine,0.234,52.0,20.519,43.994,64.622,6.995,94.597,82.671,538.388,12810.289,281.659,54.151,8.882,6.184,4.7,34.5,90.174,1.979,-1.492,14104,75.35,0.52
26,United Arab Emirates,31.0,Countries,United Arab Emirates,0.079,18.0,50.0,9.771,64.379,3.334,78.918,,655.789,67119.133,114.525,82.094,2.348,0.464,0.8,3.5,160.936,2.46,27.742,21286,86.128,0.317
27,United States of America,17.0,Countries,United States of America,0.204,46.0,23.729,329.065,65.432,17.061,96.029,88.167,20574.972,62682.8,117.244,62.049,3.682,4.957,6.4,21.1,26.389,1.454,4.724,79746,82.428,0.274


In [9]:
# Save CSV
df_ctry.to_csv('./country-stat.csv', index=False,
               columns=[x for x in df_ctry.columns if x not in ['group', 'label']])