# Pie Scatter Notebook


The goal of this notebook is to provide a series of examples:

* Using the International Atlas data
* Using the Subnational Atlas data
* Customize 
 * Colors
 * Size
 * Cutoff value
 


In [1]:
import sys
sys.path.append("./modules")
import vistk
import pandas as pd
from linnaeus import classification
import json

# International Atlas Data

In [2]:
with open("sourceData/nigeria_exports_2013.json") as data_file:    
    data = json.load(data_file)

In [3]:
with open("sourceData/nigeria_partners_2013.json") as data_file:    
    partners = json.load(data_file)

In [4]:
df_partners = pd.DataFrame(partners['data'])

In [5]:
df_attr_data = pd.DataFrame(partners['attr_data'])

In [6]:
df_countries = pd.merge(df_partners, df_attr_data, how='left', left_on='abbrv', right_on='name_3char')

In [7]:
with open("sourceData/kuwait_exports_2013.json") as data_file:    
    data = json.load(data_file)

df_kuwait = pd.DataFrame(data['data'])

## Pie-Scatter countries

Note: the `df_countries_cutoff` cutoff is a filtered of the dataframe for a given year and with a `cutoff` column which is the binary variable the pie chart relies upon.

In [8]:
df_countries_cutoff = df_countries[df_countries.year == 2013]
df_countries_cutoff.is_copy = False
df_countries_cutoff['cutoff'] = df_countries_cutoff['share'].map(lambda x: 1 if x > .05 else 0)

In [9]:
pie_scatterplot = vistk.PieScatterplot(id='abbrv', color='cutoff', name='abbrv', group='continent_x',
                                       x='id_x', y='value', r='value', year=2013)
pie_scatterplot.draw(df_countries_cutoff)

<IPython.core.display.Javascript object>

In [10]:
# Note: we have more circles than continents due to some missing metadata
pd.unique(df_countries_cutoff.continent_x.ravel())

array(['Europe', 'Africa', 'Asia', 'Americas', 'Oceania', None], dtype=object)

## Pie Scatter Products

Note: the `df_kuwait_cutoff` cutoff is a filtered of the dataframe for a given year and with a `cutoff` column which is the binary variable the pie chart relies upon.

In [11]:
df_kuwait_cutoff = df_kuwait[df_kuwait.year == 2013]
df_kuwait_cutoff.is_copy = False
df_kuwait_cutoff['cutoff'] = df_kuwait_cutoff['rca'].map(lambda x: 1 if x > 1 else 0)

In [12]:
pie_scatterplot = vistk.PieScatterplot(id='name',  color='cutoff', name='name', x='distance', 
                                y='pci', r='value', group='community_name', year=2013)
pie_scatterplot.draw(df_kuwait_cutoff)

<IPython.core.display.Javascript object>

# Mexican Atlas Data

## What products have the most potential for Mexico (Country)?

* Dataset: [Complexity and opportunity What products have the most potential for this Mexico.csv](sourceData/Complexity and opportunity What products have the most potential for this Mexico.csv) 
* Downloaded from [this page](http://complejidad.datos.gob.mx/#/graph_builder/location-0/source/products/visualization/scatter?endDate=2014&locale=en-mex&startDate=2014)



In [13]:
# Loading projections data
df = pd.read_csv('sourceData/Complexity and opportunity What products have the most potential for this Mexico.csv')

In [14]:
df.head()

Unnamed: 0,Code,Export,Parent,Year,"Exports, US$","Imports, US$",Revealed comparative advantage,Complexity,Distance,Opportunity gain
0,101,Horses,"Vegetables, foodstuffs and wood",2014,3689122.0,90508832,0.073635,1.859006,0.82377,0.848664
1,102,Bovine,"Vegetables, foodstuffs and wood",2014,744430200.0,52783524,3.622679,0.431213,0.77277,0.0
2,103,Swine,"Vegetables, foodstuffs and wood",2014,0.0,4444363,0.0,2.03478,0.800336,0.814991
3,104,Sheep,"Vegetables, foodstuffs and wood",2014,354930.0,2696730,0.012162,-1.060665,0.797307,0.200314
4,105,Fowl,"Vegetables, foodstuffs and wood",2014,0.0,22986096,0.0,0.692765,0.788141,0.727899


In [15]:
# Copy dataframe and add a cutoff attribute
df_cutoff = df[df.Year == 2014]
df_cutoff.is_copy = False
df_cutoff['cutoff'] = df_cutoff['Revealed comparative advantage'].map(lambda x: 1 if x > 1 else 0)

In [17]:
pie_scatterplot = vistk.PieScatterplot(id='Export',  color='cutoff', name='Export', x='Distance', 
                                y='Opportunity gain', r='Exports, US$', group='Parent', year=2014, var_time='Year')
pie_scatterplot.draw(df_cutoff)

<IPython.core.display.Javascript object>

## What products have the most potential for Mexico (City)?


In [None]:
## What products have the most potential for Mexico (City)?
