# Atlas data updates and growth projection notes

Methodological notes, data sources and interactive visualization from http://atlas.cid.harvard.edu/rankings/growth-predictions/

Important links
* [U.N. Comtrade](http://comtrade.un.org) website
* [Atlas Data](https://github.com/cid-harvard/atlas-data) processing

In [161]:
# Required modules to run this notebook
from IPython.display import IFrame
import pandas as pd
import numexpr

In [None]:
# Raw data files
file_projections = 'sourceData/Growth_proj_rankings_2014.csv'
file_rank_eci = 'sourceData/ECI_rankings_2014.csv'
url_countries = 'https://raw.githubusercontent.com/mledoze/countries/master/dist/countries.json'

# Load countries data

In [210]:
import json
import urllib

r = urllib.request.urlopen(url_countries)
data = json.loads(r.read().decode(r.info().get_param('charset') or 'utf-8'))

In [211]:
# Flatten the countries dataset
from pandas.io.json import json_normalize
result = json_normalize(data)

In [213]:
df_countries = pd.DataFrame(result)
df_countries.head()

Unnamed: 0,altSpellings,area,borders,callingCode,capital,cca2,cca3,ccn3,cioc,currency,...,translations.jpn.common,translations.jpn.official,translations.nld.common,translations.nld.official,translations.por.common,translations.por.official,translations.rus.common,translations.rus.official,translations.spa.common,translations.spa.official
0,[AW],180,[],[297],Oranjestad,AW,ABW,533,ARU,[AWG],...,アルバ,アルバ,Aruba,Aruba,Aruba,Aruba,Аруба,Аруба,Aruba,Aruba
1,"[AF, Afġānistān]",652230,"[IRN, PAK, TKM, UZB, TJK, CHN]",[93],Kabul,AF,AFG,4,AFG,[AFN],...,アフガニスタン,アフガニスタン·イスラム共和国,Afghanistan,Islamitische Republiek Afghanistan,Afeganistão,República Islâmica do Afeganistão,Афганистан,Исламская Республика Афганистан,Afganistán,República Islámica de Afganistán
2,"[AO, República de Angola, ʁɛpublika de an'ɡɔla]",1246700,"[COG, COD, ZMB, NAM]",[244],Luanda,AO,AGO,24,ANG,[AOA],...,アンゴラ,アンゴラ共和国,Angola,Republiek Angola,Angola,República de Angola,Ангола,Республика Ангола,Angola,República de Angola
3,[AI],91,[],[1264],The Valley,AI,AIA,660,,[XCD],...,アンギラ,アングィラ,Anguilla,Anguilla,Anguilla,Anguilla,Ангилья,Ангилья,Anguilla,Anguila
4,"[AX, Aaland, Aland, Ahvenanmaa]",1580,[],[358],Mariehamn,AX,ALA,248,,[EUR],...,オーランド諸島,オーランド諸島,Ålandeilanden,Åland eilanden,Alândia,Ilhas Åland,Аландские острова,Аландские острова,Alandia,Islas Åland


# Load growth prediction data

In [203]:
df = pd.read_csv(file_projections)
# Removing ranking column (will be re-generated later on)
df.pop('rank2014');

In [204]:
# Turning temporal columns into rows+year
df = pd.melt(df, id_vars=["iso", "countryname"], var_name="year", value_name="value")

In [205]:
# Formatting year (time) column
for index, row in df.iterrows():
    df.loc[index, "year"] = df.loc[index, "year"][2:]

In [206]:
# Adding countries ranks for each year
df['rank'] = df.groupby('year')['value'].rank(ascending=False, method='first')

In [207]:
df.sort(['rank'], ascending=False);

In [208]:
df[df['rank'].isnull()]

Unnamed: 0,iso,countryname,year,value,rank
38,SRB,Serbia,2004,,
614,SYR,Syrian Arab Republic,2008,,
737,SYR,Syrian Arab Republic,2009,,
860,SYR,Syrian Arab Republic,2010,,
983,SYR,Syrian Arab Republic,2011,,
1106,SYR,Syrian Arab Republic,2012,,
1229,SYR,Syrian Arab Republic,2013,,
1352,SYR,Syrian Arab Republic,2014,,


In [209]:
# Discard countries that don't have data for every time point between 2004 and 2014
countries_null = list(set(df[df['rank'].isnull()]['iso']))

In [173]:
# TODO: use the countries_null variable
df = df.query("iso not in ['SRB', 'SYR']")

In [174]:
# Convert rank to integer
df['rank'] = df['rank'].apply(lambda x: int(x))

In [175]:
# Preview of the resulting rankings
df.head()

Unnamed: 0,iso,countryname,year,value,rank
0,IND,India,2004,7.15,1
1,UGA,Uganda,2004,6.11,3
2,KEN,Kenya,2004,6.32,2
3,TZA,Tanzania,2004,6.11,4
4,EGY,"Egypt, Arab Rep.",2004,5.0,14


## Load ECI data

In [260]:
df_eci = pd.read_csv(file_rank_eci)
df_eci.head()

Unnamed: 0,iso,rank_eci2003,rank_eci2004,rank_eci2005,rank_eci2006,rank_eci2007,rank_eci2008,rank_eci2009,rank_eci2010,rank_eci2011,rank_eci2012,rank_eci2013,rank_eci2014
0,JPN,1,1,1,1,1,1,1,1,1,1,1,1
1,DEU,2,2,2,2,2,2,2,3,3,3,2,2
2,CHE,3,4,3,4,3,3,3,2,2,2,3,3
3,KOR,17,15,9,10,9,9,9,8,8,4,4,4
4,SWE,4,3,4,3,4,4,4,5,4,5,5,5


In [261]:
# Turning temporal columns into rows+year
df_eci = pd.melt(df_eci, id_vars=["iso"], var_name="year", value_name="rank_eci")
df_eci.head()

Unnamed: 0,iso,year,rank_eci
0,JPN,rank_eci2003,1
1,DEU,rank_eci2003,2
2,CHE,rank_eci2003,3
3,KOR,rank_eci2003,17
4,SWE,rank_eci2003,4


In [262]:
# Formatting year (time) column
for index, row in df_eci.iterrows():
    df_eci.loc[index, "year"] = df_eci.loc[index, "year"][8:]
df_eci.head()

Unnamed: 0,iso,year,rank_eci
0,JPN,2003,1
1,DEU,2003,2
2,CHE,2003,3
3,KOR,2003,17
4,SWE,2003,4


In [263]:
# Merging with countries metadata
df_eci_countries = pd.merge(df_eci, df_countries[['cca3', 'name.common']], how='left', left_on='iso', right_on='cca3')

In [264]:
df_eci_countries.pop('cca3');

In [265]:
df_eci_countries = df_eci_countries.rename(columns = {'name.common':'name'})

In [266]:
# Discard countries that don't have data for every time point between 2004 and 2014
countries_null = list(set(df_eci_countries[df_eci_countries['rank_eci'].isnull()]['iso']))
countries_null

['SRB']

In [267]:
df_eci_countries = df_eci_countries.query("iso not in ['SRB']")

In [268]:
df_eci_countries['rank_eci'] = df_eci_countries['rank_eci'].apply(lambda x: int(x))
df_eci_countries.head()

Unnamed: 0,iso,year,rank_eci,name
0,JPN,2003,1,Japan
1,DEU,2003,2,Germany
2,CHE,2003,3,Switzerland
3,KOR,2003,17,South Korea
4,SWE,2003,4,Sweden


In [269]:
# Save the file to generate visualizations later on
df_eci_countries.to_csv('/Users/rvuillemot/Dev/vis-toolkit-datasets/data/atlas_eci_rankings_2004_2014.csv')

# TODO: generate data for download using this format
Will go on the sidebar for the page http://beta.atlas.cid.harvard.edu/rankings/
<pre>
rank,abbrv,country,eci_value,delta,year,growth_proj_annual_2024
1,JPN,Japan,2.348182,0,2013,2.13
2,CHE,Switzerland,2.331362,0,2013,3.62
3,DEU,Germany,2.03559,0,2013,-1.33
4,KOR,"Korea, Rep.",1.92968,1,2013,3.85
5,SWE,Sweden,1.817256,-1,2013,2.6
</pre>

In [155]:
df_export = df[['rank', 'countryname', 'iso']]
url_rankings = '/Users/rvuillemot/Dev/atlas-economic-complexity/media/growth_projections/country_rankings_and_projections_2014.csv'
df_export.to_csv(url_rankings, index = False)

# Projections of GDP Growth to 2024 Rankings: Selected Top Countries

In [13]:
# from IPython.display import display, HTML
# HTML(df.head().to_html())
df['name'] = df['countryname']
df.to_csv('/Users/rvuillemot/Dev/vis-toolkit-datasets/data/atlas_growth_projections_2024.csv')

In [14]:
# Generate a pretty table
from ipy_table import *
import numpy as np

df_table = df[(df['year'] == '2014')].head(10).reset_index(drop=True).reset_index()
table = df_table.as_matrix()

header = np.asarray(df_table.columns)
header[0] = 'Index'
header[1] = 'Country'
# df.rename(columns=lambda x: x[1:], inplace=True)
table_with_header = np.concatenate(([header], table))

# Basic themes
# Detais http://nbviewer.ipython.org/github/epmoyer/ipy_table/blob/master/ipy_table-Introduction.ipynb
make_table(table_with_header)
apply_theme('basic')
# Only show the top-10
set_row_style(1, color='yellow')

0,1,2,3,4,5,6
Index,Country,countryname,year,value,rank,name
0,IND,India,2014,6.98,1,India
1,UGA,Uganda,2014,6.04,2,Uganda
2,KEN,Kenya,2014,6.0,3,Kenya
3,TZA,Tanzania,2014,5.96,4,Tanzania
4,EGY,"Egypt, Arab Rep.",2014,5.83,5,"Egypt, Arab Rep."
5,MDG,Madagascar,2014,5.78,6,Madagascar
6,SEN,Senegal,2014,5.77,7,Senegal
7,PHL,Philippines,2014,5.68,8,Philippines
8,MWI,Malawi,2014,5.66,9,Malawi


# Economic Complexity Index: Rank of Expected GDP Growth to 2024

The geo-map below shows the expected GDP growth to 2024

In [21]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/geomap_and_tick.html', width=900, height=450)

# Biggest Winners and Losers in Economic Complexity: 2004-2014


In [16]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/slopegraph_projections.html', width=900, height=350)

# Economic Complexity Index: 2004-2014 Country Rankings

The line chart below shows the changes in ECI between 2004 and

In [17]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/linechart_projections.html', width=900, height=350)

# Ranking of ECI by country and grid of countries

In [18]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/barchart_vertical_projections.html', width=900, height=550)

# How many countries report their data every year?

In [19]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/barchart_histogram.html', width=900, height=550)