TODO
* ~~Load `vistk.py`~~
* ~~Load products quality dataset~~
 * ~~Drop some unused columns~~
* ~~List of products (SITC4)~~
  * ~~Products colors~~
  * ~~Full / short names~~
* ~~Caterplot~~
* Tooltips
  * ~~Name is displayed when hover~~
  * ~~Click on items makes it persistent~~
* Add the description of the quality dataset (e.g role of variables, ..)
* Generate descriptive statistics
 * Use matlibplot
* Load and merge atlas datasets
 * List of countries
 * Countries and products metadata
  * 1-digit category names
* Allow a shared x domain accross charts
  * ~~Dot plots~~
* Generate visualizations we find on The Atlas
 * Country treemap
 * Geo-maps
* ~~Scatterplot~~
 * ~~Products treemap~~
   * Add categories labels
* Stacked graph
* Product space
* ~~Add user-friendly controls (e.g. widgets)~~
 * Make the widget **refresh** the chart and not re-generate it
* Test with other dataset SITC4 rev. 1 (from Papagiorgio et. al.)
* https://www.dropbox.com/s/9qkd76a0hmn58sx/Export%20Quality%20IMF%20Master.csv?dl=0

LIVE VERSION
* http://nbviewer.ipython.org/github/cid-harvard/visualization-notebook-templates/blob/master/atlas-export-quality.ipynb

NOTES FROM MEETING
* Upgrading quality allows to increase export
* High quality product allows you to jump to other products
* Ranking chart of average quality


In [1]:
import sys
sys.path.append("./modules")
import vistk
import pandas as pd
import json
import numpy as np

In [2]:
vistk.__radius_min = 2
vistk.__radius_max = 5

In [3]:
# Loading metadata files
metadata = pd.read_json('sourceData/sitc_metadata_int_atlas.csv') 

In [4]:
# Make sure the product code is formatted properly
metadata.code = metadata.code.astype(int).astype(str).str.zfill(4)

# In case we want to change the name of the column
# df.columns.values[3] = "sitc4"

In [5]:
# Loading data files (exports quality)
# SITC4 rev. 2 (from Feenstra et. al.)
# https://www.dropbox.com/s/p1nc7jzz77hnodg/master_data.csv?dl=0
data = pd.read_csv('sourceData/master_data.csv', 
                 usecols=["year", "iso3", "sitc4", "imports", "exports", "quality_imp", "quality_exp"]).sort(columns='year')

In [6]:
# Formatting produts codes for data
data.sitc4 = data.sitc4.astype(int).astype(str).str.zfill(4)

In [7]:
# Merging data and metadata
df = pd.merge(data, metadata, how='left', left_on='sitc4', right_on='code')
df.head()

Unnamed: 0,year,iso3,sitc4,exports,quality_exp,imports,quality_imp,code,color,community_id,name
0,1984,DOM,11,,,52.08442,0.702034,11,#ffe999,81,Live bovines
1,1984,JOR,7264,,,882.2307,1.248837,7264,#9edae5,10,Printing presses
2,1984,MLT,7264,33.6984,1.1567,338.3002,0.937385,7264,#9edae5,10,Printing presses
3,1984,BGD,7264,,,1146.318,0.902277,7264,#9edae5,10,Printing presses
4,1984,GHA,7264,,,31.4641,1.159864,7264,#9edae5,10,Printing presses


In [8]:
# Generate 1-digit as categories names
df['category'] = df['sitc4'].map(lambda x: str(x)[0])

In [9]:
# Example of data subset for the year 1984 and the country France
df[(df['year'] == 1984) & (df['iso3'] == 'FRA')].head()

Unnamed: 0,year,iso3,sitc4,exports,quality_exp,imports,quality_imp,code,color,community_id,name,category
91,1984,FRA,7264,57485.28,1.098981,80330.4,1.02687,7264,#9edae5,10,Printing presses,7
165,1984,FRA,7267,20834.8,1.022593,31882.92,1.11999,7267,#9edae5,10,Other printing machines,7
179,1984,FRA,7264,303.1721,1.896705,,,7264,#9edae5,10,Printing presses,7
308,1984,FRA,7263,14285.87,0.322501,36011.96,0.892491,7263,#9edae5,10,Type-setting machines,7
330,1984,FRA,7263,9.545205,0.111916,,,7263,#9edae5,10,Type-setting machines,7


In [10]:
# Retrieve the unique list of countries
list_countries = list(set(df['iso3']))

In [11]:
from ipywidgets import interact, interactive, fixed

def f(x=1984):
    dotplot = vistk.Dotplot(id='sitc4', name='name', x='quality_exp', year=x, color='color', 
                        group='category')
    dotplot.draw(df[(df['year'] == x) & (df['iso3'] == 'FRA')])

interact(f, x=[1965, 2000]);
f(1984)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
scatterplot = vistk.Scatterplot(id='sitc4', color='color', name='name', x='imports', 
                                y='exports', r='quality_exp', year=1984, group='category')
scatterplot.draw(df[(df['year'] == 1984) & (df['iso3'] == 'FRA')])

In [None]:
scatterplot = vistk.Scatterplot(id='sitc4', color='color', name='name', x='quality_imp', 
                                y='quality_exp', r='exports', year=1984, group='category')
scatterplot.draw(df[(df['year'] == 1984) & (df['iso3'] == 'FRA')])

In [None]:
linechart = vistk.Linechart(id='sitc4', x='year', y='quality_exp', color='sitc4', name='name',
                           group='category')
linechart.draw(df[(df['iso3'] == 'FRA')])

In [None]:
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets

def f(x):
    grid = vistk.Grid(id='sitc4', sort='quality_exp', color='color', name='name', group='category', 
                  r='quality_imp', year=1984)

    grid.draw(df[(df['year'] == 1984) & (df['iso3'] == x)])

interact(f, x=list_countries);

# Default chart
f('FRA')

In [None]:
country = 'FRA'
year = 1984
title= 'Export quality for %s in %s' % (country, year)
treemap = vistk.Treemap(id='sitc4', color='color', name='name', size='quality_exp', sort='quality_exp',
                        group='category', year=year, title=title)
treemap.draw(df[(df['year'] == year) & (df['iso3'] == country)])

In [None]:
# WORK IN PROGRESS
#stackedgraph = vistk.Stackedgraph(id='sitc4', x='year', y='quality_exp', color='color', 
#                                  name='name')
#stackedgraph.draw(df[df['iso3'] == 'FRA'])

In [None]:
country = 'FRA'
year = 1984
caterplot = vistk.Caterplot(id='sitc4', color='color', name='name', x='category', 
                            y='quality_exp', r='exports', year=year, group='category')
caterplot.draw(df[(df['year'] == year) & (df['iso3'] == country)])

In [None]:
# Work in progress
geomap = vistk.Geomap(id='iso3', color='eci', name='name', year=1984)
geomap.draw(df[(df['year'] == 1984)])

In [13]:
x_domain = [0, df[(df['year'] == 1984)].quality_exp.max()]

In [14]:
# Generates serveral charts automatically

for i in range(10):
    print(list_countries[i])
    dotplot = vistk.Dotplot(id='sitc4', name='name', x='quality_exp', year=1984, color='color', 
                        group='category', x_domain=x_domain)
    dotplot.draw(df[(df['year'] == 1984) & (df['iso3'] == list_countries[i])])

LBN


<IPython.core.display.Javascript object>

COL


<IPython.core.display.Javascript object>

MKD


<IPython.core.display.Javascript object>

BLZ


<IPython.core.display.Javascript object>

ETH


<IPython.core.display.Javascript object>

NAM


<IPython.core.display.Javascript object>

VEN


<IPython.core.display.Javascript object>

GEO


<IPython.core.display.Javascript object>

URY


<IPython.core.display.Javascript object>

ARG


<IPython.core.display.Javascript object>