## Volatility Plots

In [1]:
import pandas as pd
import numpy as np
import requests
import ujson
import os

import plotly
from plotly import tools

import plotly
with open('/Users/timlee/Dropbox/keys/plotly_apikey.txt','r') as f:
    api_key = f.read()
plotly.tools.set_credentials_file(username='tdlee', api_key=api_key)

import plotly.plotly as ply
import plotly.graph_objs as go
from plotly.grid_objs import Grid, Column

## Common Functions

In [2]:
country_codes = ['chn', 'idn', 'ind', 'jpn', 'sau', 'tur', 'twn',
                 'bel', 'che', 'deu', 'esp', 'fra', 'gbr', 'ita',
                 'nld', 'pol', 'rus', 'swe', 'can', 'mex', 'usa',
                 'aus', 'arg', 'bra']


def get_countries_lkup():
    countries_url = 'https://atlas.media.mit.edu/attr/country/'
    resp = requests.get(countries_url)
    resp_json = ujson.loads(resp.content)['data']
    resp_df = pd.DataFrame(resp_json)
    country_id_lkup = {idx: name for idx, name in resp_df[['display_id','name']].values}
    return country_id_lkup


def get_data(countrycode):
    imports_url = './OEC_%s_imports.feather' % countrycode
    exports_url = './OEC_%s_exports.feather' % countrycode
    
    if not os.path.isfile(imports_url):
        return None, None
    
    if not os.path.isfile(exports_url):
        return None, None
        
    
    df_exports = pd.read_feather(exports_url)
    df_exports = df_exports[df_exports['export_val'].isna()==False].copy()
    df_exports = df_exports.sort_values(by='export_val')
    
    df_imports = pd.read_feather(imports_url)
    df_imports = df_imports[df_imports['import_val'].isna() == False].copy()
    df_imports = df_imports.sort_values(by = 'import_val')
    
    return df_imports, df_exports



def all_ports(country_codes):
    import_collector = []
    export_collector = []
    for countrycode in country_codes:
        df_imports, df_exports = get_data(countrycode)
        if df_imports is not None:
            df_imp_tmp = df_imports[['year','product_name','import_val']].copy()
            df_imp_tmp['countrycode'] = countrycode
            df_exp_tmp = df_exports[['year','product_name','export_val']].copy()
            df_exp_tmp['countrycode'] = countrycode
            import_collector.append(df_imp_tmp)
            export_collector.append(df_exp_tmp)
        
    return pd.concat(import_collector), pd.concat(export_collector)
    

## Top Importer / Exporter Bar Plots

In [None]:
country_id_lkup = get_countries_lkup()
import_collector, export_collector = all_ports(country_codes)

In [164]:
mask = import_collector['year'] == 2014
imp_summary = import_collector[mask].groupby('countrycode')['import_val'].sum().reset_index().sort_values('import_val')
tr = go.Bar(
    x = imp_summary['import_val'],
    y = imp_summary['countrycode'],
    orientation='h'
)
font_dict = dict(
                family='Roboto',
                size=14
            )
layout = go.Layout(
    font=font_dict,
    width = 600,
    height = 600,
    title = 'Top Importers by Country'
)
fig = go.Figure(data=[tr], layout=layout)
ply.iplot(fig, filename='total_importers_2014')

In [165]:
mask = export_collector['year'] == 2014
exp_summary = export_collector[mask].groupby('countrycode')['export_val'].sum().reset_index().sort_values('export_val')
tr = go.Bar(
    x = exp_summary['export_val'],
    y = exp_summary['countrycode'],
    orientation='h'
)
font_dict = dict(
                family='Roboto',
                size=14
            )
layout = go.Layout(
    font=font_dict,
    width = 600,
    height = 600,
    title = 'Top Exporters by Country'
)
fig = go.Figure(data=[tr], layout=layout)
ply.iplot(fig, filename='total_exporters_2014')

## Create Changes in Export Values (delta)

In [102]:
country_id_lkup = get_countries_lkup()
import_collector, export_collector = all_ports(country_codes)

imask = import_collector['product_name'].isin(['Crude Petroleum','Petroleum oils, oils from bituminous minerals, crude']) == False
emask = export_collector['product_name'].isin(['Crude Petroleum','Petroleum oils, oils from bituminous minerals, crude']) == False

import_collector = import_collector[imask].copy()
export_collector = export_collector[emask].copy()

In [123]:
import_collector.sort_values(by=['countrycode','product_name','year'], inplace=True)
export_collector.sort_values(by=['countrycode','product_name','year'], inplace=True)
import_collector['delta'] = import_collector['import_val'].diff()
import_collector = import_collector[import_collector['year']!=1995].copy()
export_collector['delta'] = export_collector['export_val'].diff()
export_collector = export_collector[export_collector['year']!=1995].copy()

In [125]:
top_import_changes_neg = import_collector.sort_values('delta').head(10)
top_import_changes_pos = import_collector.sort_values('delta', ascending=False).head(10)

top_export_changes_neg = export_collector.sort_values('delta').head(10)
top_export_changes_pos = export_collector.sort_values('delta', ascending=False).head(10)

In [127]:
def get_combos(df):
    combos = []
    for row in df.values:
        combos.append((row[1], row[3]))
    return list(set(combos))

export_neg_combo = get_combos(top_export_changes_neg)
export_pos_combo = get_combos(top_export_changes_pos)
import_neg_combo = get_combos(top_import_changes_neg)
import_pos_combo = get_combos(top_export_changes_pos)

## Plotting exports and imports that have drastically changed 

In [140]:
def get_traces(df, filter_combos, val_field, title):
    traces = []
    for product, countrycode in filter_combos:
        mask = (df['product_name'] == product) & (df['countrycode'] == countrycode)
        tmp = df[mask].copy()
        tmp.sort_values('year')
        tr = go.Scatter(
            x = tmp['year'],
            y = tmp[val_field],
            mode = 'lines',
            line = dict(width=5),
            name = country_id_lkup[countrycode] + ' ' + product
        )
        traces.append(tr)

    font_dict = dict(
                family='Roboto',
                size=20
            )

    layout = go.Layout(
        title = title,
        hovermode='closest',
        width=1000,
        height=600,
        xaxis=dict(
            title='Year',
            titlefont=font_dict,
            tickfont=font_dict,        
        ),
        yaxis=dict(
            title=val_field,
            titlefont=font_dict,
            tickfont=font_dict,
        ),
    )

    fig = dict(data=traces, layout=layout)
    return fig

export_neg_fig = get_traces(export_collector, export_neg_combo, 'export_val', 'Largest Drops in Exports')
export_pos_fig = get_traces(export_collector, export_pos_combo, 'export_val', 'Largest Jumps in Exports')
import_neg_fig = get_traces(import_collector, import_neg_combo, 'import_val', 'Largest Drops in Imports')
import_pos_fig = get_traces(import_collector, import_pos_combo, 'import_val', 'Largest Jumps in Imports')

In [172]:
ply.iplot(export_neg_fig, filename='largest_drops_exports')

In [173]:
ply.iplot(export_pos_fig, filename='Largest_jumps_in_exports')

In [174]:
ply.iplot(import_pos_fig, filename='largest_jumps_imports')

In [175]:
ply.iplot(import_neg_fig, filename='largest_drops_imports')

In [80]:
traces

[{'type': 'scatter', 'x': 4799      1995.0
  10863     1996.0
  16925     1997.0
  22972     1998.0
  29017     1999.0
  35126     2000.0
  41316     2001.0
  47482     2002.0
  53646     2003.0
  59803     2004.0
  65968     2005.0
  72136     2006.0
  78292     2007.0
  84420     2008.0
  90521     2009.0
  96576     2010.0
  102610    2011.0
  108657    2012.0
  114690    2013.0
  120704    2014.0
  Name: year, dtype: float64, 'y': 4799      4.098790e+07
  10863     5.202274e+07
  16925     4.236958e+07
  22972     5.452878e+07
  29017     5.845065e+07
  35126     2.337116e+07
  41316     3.202624e+07
  47482     6.191575e+07
  53646     5.158936e+07
  59803     1.078894e+08
  65968     4.494443e+07
  72136     2.840680e+07
  78292     4.049030e+07
  84420     6.757842e+07
  90521     6.039019e+07
  96576     5.259883e+07
  102610    6.971555e+07
  108657    4.836816e+07
  114690    5.045121e+07
  120704    3.473077e+07
  Name: import_val, dtype: float64, 'mode': 'lines', 'name': 'C