# Atlas of Economic Complexity: 2015 data updates

Related links
* [U.N. Comtrade](http://comtrade.un.org) source of trade data
* [Atlas Data](https://github.com/cid-harvard/atlas-data) processing scripts for trade data

# How many countries report their data every year?
The data used in The Atlas use a sample of countries that has been restricted to countries with average exports of US$200 million (if reported) between 2005-2014.

In [180]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/barchart_histogram.html', width=900, height=550)

# Load countries metadata

In [130]:
import json
import urllib

r = urllib.request.urlopen(url_countries)
data = json.loads(r.read().decode(r.info().get_param('charset') or 'utf-8'))

In [131]:
# Flatten the countries dataset
from pandas.io.json import json_normalize
result = json_normalize(data)

In [132]:
df_countries = pd.DataFrame(result)
df_countries.head()

Unnamed: 0,altSpellings,area,borders,callingCode,capital,cca2,cca3,ccn3,cioc,currency,...,translations.jpn.common,translations.jpn.official,translations.nld.common,translations.nld.official,translations.por.common,translations.por.official,translations.rus.common,translations.rus.official,translations.spa.common,translations.spa.official
0,[AW],180,[],[297],Oranjestad,AW,ABW,533,ARU,[AWG],...,アルバ,アルバ,Aruba,Aruba,Aruba,Aruba,Аруба,Аруба,Aruba,Aruba
1,"[AF, Afġānistān]",652230,"[IRN, PAK, TKM, UZB, TJK, CHN]",[93],Kabul,AF,AFG,4,AFG,[AFN],...,アフガニスタン,アフガニスタン·イスラム共和国,Afghanistan,Islamitische Republiek Afghanistan,Afeganistão,República Islâmica do Afeganistão,Афганистан,Исламская Республика Афганистан,Afganistán,República Islámica de Afganistán
2,"[AO, República de Angola, ʁɛpublika de an'ɡɔla]",1246700,"[COG, COD, ZMB, NAM]",[244],Luanda,AO,AGO,24,ANG,[AOA],...,アンゴラ,アンゴラ共和国,Angola,Republiek Angola,Angola,República de Angola,Ангола,Республика Ангола,Angola,República de Angola
3,[AI],91,[],[1264],The Valley,AI,AIA,660,,[XCD],...,アンギラ,アングィラ,Anguilla,Anguilla,Anguilla,Anguilla,Ангилья,Ангилья,Anguilla,Anguila
4,"[AX, Aaland, Aland, Ahvenanmaa]",1580,[],[358],Mariehamn,AX,ALA,248,,[EUR],...,オーランド諸島,オーランド諸島,Ålandeilanden,Åland eilanden,Alândia,Ilhas Åland,Аландские острова,Аландские острова,Alandia,Islas Åland


# Load growth prediction data

In [133]:
df = pd.read_csv(file_projections)
# Removing ranking column (will be re-generated later on)
df.pop('rank2014');
df.pop('countryname');

In [134]:
# Turning temporal columns into rows+year
df = pd.melt(df, id_vars=["iso"], var_name="year", value_name="value")

In [135]:
df.head()

Unnamed: 0,iso,year,value
0,IND,tg2004,7.15
1,UGA,tg2004,6.11
2,KEN,tg2004,6.32
3,TZA,tg2004,6.11
4,EGY,tg2004,5.0


In [136]:
# Formatting year (time) column
for index, row in df.iterrows():
    df.loc[index, "year"] = df.loc[index, "year"][2:]

In [137]:
# Adding countries ranks for each year
df['rank'] = df.groupby('year')['value'].rank(ascending=False, method='first')

In [138]:
df.sort(['rank'], ascending=False);

In [139]:
df[df['rank'].isnull()]

Unnamed: 0,iso,year,value,rank
38,SRB,2004,,
614,SYR,2008,,
737,SYR,2009,,
860,SYR,2010,,
983,SYR,2011,,
1106,SYR,2012,,
1229,SYR,2013,,
1352,SYR,2014,,


In [140]:
# Discard countries that don't have data for every time point between 2004 and 2014
countries_null = list(set(df[df['rank'].isnull()]['iso']))

In [141]:
# TODO: use the countries_null variable
df = df.query("iso not in ['SRB', 'SYR']")

In [142]:
# Convert rank to integer
df['rank'] = df['rank'].apply(lambda x: int(x))

In [143]:
# Preview of the resulting rankings
df.head()

Unnamed: 0,iso,year,value,rank
0,IND,2004,7.15,1
1,UGA,2004,6.11,3
2,KEN,2004,6.32,2
3,TZA,2004,6.11,4
4,EGY,2004,5.0,14


In [144]:
df = pd.merge(df, df_countries[['cca3', 'name.common']], how='left', left_on='iso', right_on='cca3')

In [145]:
df = df.rename(columns = {'name.common':'name'})
df.head()

Unnamed: 0,iso,year,value,rank,cca3,name
0,IND,2004,7.15,1,IND,India
1,UGA,2004,6.11,3,UGA,Uganda
2,KEN,2004,6.32,2,KEN,Kenya
3,TZA,2004,6.11,4,TZA,Tanzania
4,EGY,2004,5.0,14,EGY,Egypt


In [146]:
# Export
url_export_projections = '/Users/rvuillemot/Dev/vis-toolkit-datasets/data/atlas_growth_projections_2024.csv'
df_export = df[['rank', 'value', 'name', 'iso', 'year']]
df_export.to_csv(url_export_projections, index=False)

## Load ECI data

In [147]:
df_eci = pd.read_csv(file_rank_eci)
df_eci.head()

Unnamed: 0,iso,rank_eci2003,rank_eci2004,rank_eci2005,rank_eci2006,rank_eci2007,rank_eci2008,rank_eci2009,rank_eci2010,rank_eci2011,rank_eci2012,rank_eci2013,rank_eci2014
0,JPN,1,1,1,1,1,1,1,1,1,1,1,1
1,DEU,2,2,2,2,2,2,2,3,3,3,2,2
2,CHE,3,4,3,4,3,3,3,2,2,2,3,3
3,KOR,17,15,9,10,9,9,9,8,8,4,4,4
4,SWE,4,3,4,3,4,4,4,5,4,5,5,5


In [148]:
# Turning temporal columns into rows+year
df_eci = pd.melt(df_eci, id_vars=["iso"], var_name="year", value_name="rank_eci")
df_eci.head()

Unnamed: 0,iso,year,rank_eci
0,JPN,rank_eci2003,1
1,DEU,rank_eci2003,2
2,CHE,rank_eci2003,3
3,KOR,rank_eci2003,17
4,SWE,rank_eci2003,4


In [149]:
# Formatting year (time) column
for index, row in df_eci.iterrows():
    df_eci.loc[index, "year"] = df_eci.loc[index, "year"][8:]
df_eci.head()

Unnamed: 0,iso,year,rank_eci
0,JPN,2003,1
1,DEU,2003,2
2,CHE,2003,3
3,KOR,2003,17
4,SWE,2003,4


In [150]:
# Merging with countries metadata
df_eci_countries = pd.merge(df_eci, df_countries[['cca3', 'name.common']], how='left', left_on='iso', right_on='cca3')

In [151]:
df_eci_countries.pop('cca3');

In [152]:
df_eci_countries = df_eci_countries.rename(columns = {'name.common':'name'})

In [153]:
# Discard countries that don't have data for every time point between 2004 and 2014
countries_null = list(set(df_eci_countries[df_eci_countries['rank_eci'].isnull()]['iso']))
countries_null

['SRB']

In [154]:
df_eci_countries = df_eci_countries.query("iso not in ['SRB']")

In [171]:
df_eci_countries['rank_eci'] = df_eci_countries['rank_eci'].apply(lambda x: int(x))
df_eci_countries.head()

Unnamed: 0,iso,year,rank_eci,name
0,JPN,2003,1,Japan
1,DEU,2003,2,Germany
2,CHE,2003,3,Switzerland
3,KOR,2003,17,South Korea
4,SWE,2003,4,Sweden


# Projections of GDP Growth to 2024 Rankings: Selected Top Countries

In [157]:
# from IPython.display import display, HTML
# HTML(df.head().to_html())
df['name'] = df['countryname']
df.to_csv('/Users/rvuillemot/Dev/vis-toolkit-datasets/data/atlas_growth_projections_2024.csv')

KeyError: 'countryname'

In [None]:
# Generate a pretty table
from ipy_table import *
import numpy as np

df_table = df[(df['year'] == '2014')].head(10).reset_index(drop=True).reset_index()
df_table = df_table[['rank', 'name', 'value']]
table = df_table.as_matrix()

header = np.asarray(df_table.columns)
header[0] = 'Rank'
header[2] = 'Value'
# df.rename(columns=lambda x: x[1:], inplace=True)
table_with_header = np.concatenate(([header], table))

# Basic themes
# Detais http://nbviewer.ipython.org/github/epmoyer/ipy_table/blob/master/ipy_table-Introduction.ipynb
make_table(table_with_header)
apply_theme('basic')
# Only show the top-10
set_row_style(1, color='yellow')

# Economic Complexity Index: Rank of Expected GDP Growth to 2024

The geo-map below shows the expected GDP growth to 2024

In [160]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/geomap_and_tick.html', width=900, height=500)

# Biggest Winners and Losers in Economic Complexity: 2004-2014


In [158]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/slopegraph_eci_rankings.html', width=900, height=550)

# Economic Complexity Index: 2004-2014 Country Rankings

The line chart below shows the changes in ECI rankings between 2004 and 2014.

In [159]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/linechart_eci_rankings.html', width=900, height=550)

# Ranking of ECI by country and grid of countries

In [36]:
IFrame('https://cid-harvard.github.io/vis-toolkit/examples/barchart_vertical_projections.html', width=900, height=550)