## Food-Related Plots

In [2]:
import pandas as pd
import plotly
from plotly import tools

import plotly
# with open('/Users/timlee/Dropbox/keys/plotly_apikey.txt','r') as f:
#     api_key = f.read()
# plotly.tools.set_credentials_file(username='tdlee', api_key=api_key)

import plotly.plotly as ply
import plotly.graph_objs as go
from plotly.grid_objs import Grid, Column

COUNTRY_COLORS = ['#1b9e77'
,'#d95f02'
,'#7570b3'
,'#e7298a'
,'#66a61e'
,'#e6ab02']

FOOD_COLORS = ['#a6cee3'
,'#1f78b4'
,'#b2df8a'
,'#33a02c'
,'#fb9a99'
,'#e31a1c']

BAR_COLOR = '#e31a1c'

### Shared Functions

In [3]:
def get_data():
    df_raw = pd.read_csv('FAO.csv', encoding = "ISO-8859-1")    
    index_cols = ['Area Abbreviation', 'Area Code', 'Area', 'Item Code', 'Item',
       'Element Code', 'Element', 'Unit', 'latitude', 'longitude']
    
    year_cols = ['Y1961',
       'Y1962', 'Y1963', 'Y1964', 'Y1965', 'Y1966', 'Y1967', 'Y1968', 'Y1969',
       'Y1970', 'Y1971', 'Y1972', 'Y1973', 'Y1974', 'Y1975', 'Y1976', 'Y1977',
       'Y1978', 'Y1979', 'Y1980', 'Y1981', 'Y1982', 'Y1983', 'Y1984', 'Y1985',
       'Y1986', 'Y1987', 'Y1988', 'Y1989', 'Y1990', 'Y1991', 'Y1992', 'Y1993',
       'Y1994', 'Y1995', 'Y1996', 'Y1997', 'Y1998', 'Y1999', 'Y2000', 'Y2001',
       'Y2002', 'Y2003', 'Y2004', 'Y2005', 'Y2006', 'Y2007', 'Y2008', 'Y2009',
       'Y2010', 'Y2011', 'Y2012', 'Y2013']
    df = pd.melt(df_raw, id_vars = index_cols, var_name='year')
    df['year'] = df['year'].map(lambda x : int(x[1:]))
    
    return df



def get_topbot_n_by(field, n):
    mask = df['year'] == 2013
    summary = df[mask].groupby([field])['value'].sum().reset_index().sort_values('value')
    bot_n = summary[summary['value'] > 1000].head(n)
    top_n = summary.tail(6)

    top_n_by = top_n[field].values
    bot_n_by = bot_n[field].values

    mask = df[field].isin(bot_n_by)
    bot_over_time = (df[mask]
                         .groupby([field,'year'])['value']
                         .sum()
                         .reset_index()
                         .sort_values('year'))
    

    mask = df[field].isin(top_n_by)
    top_over_time = (df[mask]
                         .groupby([field,'year'])['value']
                         .sum()
                         .reset_index()
                         .sort_values('year'))
    
    top_traces = []
    traces = []
    for idx, itm in enumerate(top_n[field].values[::-1]):
        mask = top_over_time[field] == itm
        tmp = top_over_time[mask].copy()
        
        if field == 'Item':
            color = FOOD_COLORS[idx]
        else:
            color = COUNTRY_COLORS[idx]
            
        tr = go.Scatter(
            x = tmp['year'].values,
            y = tmp['value'].values *1000,
            mode = 'lines',
            name = itm,
            line = dict(color=color, width=10)
        )
        top_traces.append(tr)
    
    bot_traces = []
    for idx, itm in enumerate(bot_n[field].values[::-1]):
        mask = bot_over_time[field] == itm
        tmp = bot_over_time[mask].copy()
        
        if field == 'Item':
            color = FOOD_COLORS[idx]
        else:
            color = COUNTRY_COLORS[idx]
            
        tr = go.Scatter(
            x = tmp['year'].values,
            y = tmp['value'].values * 1000,
            mode = 'lines',
            name = itm,
            line = dict(color=color, width=10)
        )
        bot_traces.append(tr)
        
    return top_traces, bot_traces
    
n = 6
df = get_data()
top_food_traces, bot_food_traces = get_topbot_n_by('Item', n)
top_cust_traces, bot_cust_traces = get_topbot_n_by('Area', n)

## Plotting US Grown Foods over time

In [15]:
font_dict = dict(
            family='Roboto',
            size=20
        )

layout = go.Layout(
    font = font_dict,
    hovermode = 'closest',
    width=900,
    height=600,
    xaxis=dict(
        title='Year',
        titlefont=font_dict,
        tickfont=font_dict,        
    ),
    yaxis=dict(
        title='Tonnes',
        titlefont=font_dict,
        tickfont=font_dict,        
    ),
    legend=dict(
        x=0,
        y=1,
        traceorder='normal',
        font=dict(
            family='Roboto',
            size=12,
            color='#000'
        ),
        bgcolor='#E2E2E2',
        bordercolor='#FFFFFF',
        borderwidth=2
    )
)

In [16]:
layout['title'] = 'Top %d US Grown Foods' %n
fig = go.Figure(data=top_food_traces, layout=layout)
ply.iplot(fig, filename='top_6_food_over_time_scatter')

In [17]:
layout['title'] = 'Bottom %d US Grown Foods' %n
fig = go.Figure(data=bot_food_traces, layout=layout)
ply.iplot(fig, filename='bot_6_food_over_time_scatter')

In [18]:
layout['title'] = 'Top %d Eaters of US Grown Foods' %n
fig = go.Figure(data=top_cust_traces, layout=layout)
ply.iplot(fig, filename='top_6_cust_over_time_scatter')

In [19]:
# how to save offline
#plotly.offline.plot(fig, filename='top_6_cust_over_time_scatter.html')

In [20]:
layout['title'] = 'Bottom %d Eaters of US Grown Foods' %n
fig = go.Figure(data=bot_cust_traces, layout=layout)
ply.iplot(fig, filename='bot_6_cust_over_time_scatter')

## Top Consumers of US foods

In [21]:

field = 'Area'
mask = df['year'] == 2013
summary = df[mask].groupby([field])['value'].sum().reset_index().sort_values('value')
summary = summary[summary['value']*1000 > 100e6].sort_values(by='value')
top5_area = summary[field].tail(5)
tr = go.Bar(
    y = summary[field].values,
    x = summary['value'].values * 1000,
    marker = dict(color=BAR_COLOR),
    orientation = 'h'
)

layout = go.Layout(
    title = "2013's Top Consumers",
    font = font_dict,
    width = '800',
    height = '800',
    margin = go.Margin(
        l=300,
        r=10,
    ),
    
    xaxis = dict(
        type='log',
        tickfont = font_dict,
        autorange=True
    ),
    yaxis = dict(
        tickfont = font_dict
    )
)
fig = go.Figure(data=[tr], layout=layout)
ply.iplot(fig, filename='2013_top_US_Consumers_of_Grown_Foods')

## Top US Produced Foods

In [22]:

field = 'Item'
mask = df['year'] == 2013
summary = df[mask].groupby([field])['value'].sum().reset_index().sort_values('value')
summary = summary[summary['value']*1000 > 100e6].sort_values(by='value')
top5_item = summary[field].tail(5)
tr = go.Bar(
    y = summary[field].values,
    x = summary['value'].values * 1000,
    marker = dict(color=BAR_COLOR),
    orientation = 'h'
)

layout = go.Layout(
    title = "2013's Top US Grown Foods",
    font = font_dict,
    width = '800',
    height = '800',
    margin = go.Margin(
        l=300,
        r=10,
    ),
    
    xaxis = dict(
        type='log',
        tickfont = font_dict,
        autorange=True
    ),
    yaxis = dict(
        tickfont = font_dict
    )
)
fig = go.Figure(data=[tr], layout=layout)
ply.iplot(fig, filename='2013_top_US_Grown_Foods')

In [23]:
df.head()

Unnamed: 0,Area Abbreviation,Area Code,Area,Item Code,Item,Element Code,Element,Unit,latitude,longitude,year,value
0,AFG,2,Afghanistan,2511,Wheat and products,5142,Food,1000 tonnes,33.94,67.71,1961,1928.0
1,AFG,2,Afghanistan,2805,Rice (Milled Equivalent),5142,Food,1000 tonnes,33.94,67.71,1961,183.0
2,AFG,2,Afghanistan,2513,Barley and products,5521,Feed,1000 tonnes,33.94,67.71,1961,76.0
3,AFG,2,Afghanistan,2513,Barley and products,5142,Food,1000 tonnes,33.94,67.71,1961,237.0
4,AFG,2,Afghanistan,2514,Maize and products,5521,Feed,1000 tonnes,33.94,67.71,1961,210.0


## Top Country Consumers vs. Top US Grown foods

In [24]:


titles = []
for i, cust in enumerate(top5_area):
    for j, prod in enumerate(top5_item):
        titles.append('%s <br> %s' % (cust, prod) )
        
subplots = tools.make_subplots(rows=5, cols=5, subplot_titles=titles)

counter = 1
for i, cust in enumerate(top5_area):
    for j, prod in enumerate(top5_item):
        mask = (df['Item']==prod) & (df['Area'] == cust) & (df['value'] >0)
        tmp = df[mask].copy()        
        tmp = tmp.groupby('year')['value'].sum().reset_index()
        tr = go.Scatter(
            x = tmp['year'].values,
            y = tmp['value'].values,
            mode = 'lines',
            name = '%s eats %s' % (cust, prod),
            line = dict(width=7, color = FOOD_COLORS[j])
        )
        
        subplots.append_trace(tr, i+1,j+1)
        subplots['layout']['yaxis' + str(counter)].update(range=[0,2e5])
        counter += 1
        
subplots['layout'].update(title="Top Countries vs. Top Foods")
subplots['layout'].update(font=dict(family='Roboto', size=12))

for d in subplots['layout']['annotations']:
    d['font']['size']=12

subplots['layout'].update(height=1000, width=1000)
subplots['layout'].update(showlegend=False)
ply.iplot(subplots, filename='plotly_multiples')

This is the format of your plot grid:
[ (1,1) x1,y1 ]    [ (1,2) x2,y2 ]    [ (1,3) x3,y3 ]    [ (1,4) x4,y4 ]    [ (1,5) x5,y5 ]  
[ (2,1) x6,y6 ]    [ (2,2) x7,y7 ]    [ (2,3) x8,y8 ]    [ (2,4) x9,y9 ]    [ (2,5) x10,y10 ]
[ (3,1) x11,y11 ]  [ (3,2) x12,y12 ]  [ (3,3) x13,y13 ]  [ (3,4) x14,y14 ]  [ (3,5) x15,y15 ]
[ (4,1) x16,y16 ]  [ (4,2) x17,y17 ]  [ (4,3) x18,y18 ]  [ (4,4) x19,y19 ]  [ (4,5) x20,y20 ]
[ (5,1) x21,y21 ]  [ (5,2) x22,y22 ]  [ (5,3) x23,y23 ]  [ (5,4) x24,y24 ]  [ (5,5) x25,y25 ]

