In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
#find the home directory path
#import os
#os.getcwd()

In [3]:
#import dataset
df = pd.read_csv('/Users/chrismader/Python/Indicators.csv')
print (df.shape)
#df.head()

(5656458, 6)


In [4]:
#inspect dataset
countries = df.CountryName.unique().tolist()
indicators = df.IndicatorName.unique().tolist()
years = df.Year.unique().tolist()
print('#countries:', len(countries))
print('#indicators:', len(indicators))
print('#years:', min(years),'to', max(years))

#countries: 247
#indicators: 1344
#years: 1960 to 2015


In [5]:
#select indicator (from list of indicators) and discard unnecessary columns
sel = 'GDP per capita (current US$)'
df = df[df.IndicatorName == sel].drop(['CountryCode','IndicatorName','IndicatorCode'], axis=1)

In [6]:
#define functions to retrieve time series for 1st quartile, 3rd quartile, chosen country

def find_1st_quartile(year):
    return (df[df.Year == year].quantile(0.25)[1])

def find_3rd_quartile(year):
    return (df[df.Year == year].quantile(0.75)[1])

def find_ctry(year, ctry):
    try:
        result = df[df.Year == year][df[df.Year == year].CountryName == ctry].iloc[0,2]
    except: 
        result = np.nan
    return result

def update_ctry(ctry,years): #creates a time series for a chosen indicator and country
    res = pd.DataFrame(years)
    res.columns = ['Year']
    
    res['1stQ'] = res.Year.apply(find_1st_quartile)
    res['3rdQ'] = res.Year.apply(find_3rd_quartile)
    res['3rdQ/1stQ'] = res['3rdQ'] / res['1stQ']
    
    res[ctry] = np.vectorize(find_ctry)(res.Year, ctry)
    
    pd.to_datetime(res.Year, format='%Y')
    res.set_index('Year', inplace=True)

    x = res['1stQ'].index
    q1 = res['1stQ']
    q3 = res['3rdQ'] 
    c = res[ctry]
    r1 = res['3rdQ/1stQ']
      
    return (x, q1, q3, c, r1)

In [7]:
#Select default country: United States
x, q1, q3, c, r1 = update_ctry('United States', years)

In [8]:
#create dropdown menu for bokeh chart
df_drop = pd.DataFrame(countries)
df_drop['No'] = df_drop.index.astype(str)
drop = list(zip(df_drop.iloc[:,1],df_drop.iloc[:,0]))

In [12]:
from bokeh.layouts import column
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Select, LinearAxis, Range1d
from bokeh.io import output_notebook

output_notebook()
ctry = 'United States' #default
def bokeh_plot(doc):

    source = ColumnDataSource(data={'x': x, 'y': c})

    plot = figure(x_axis_label='Date', y_axis_label='US Dollars', title=sel, plot_width=600, plot_height=400)
    
    #fist y_axis: 1st/3rd quartiles, Selected country
    plot.line(x= 'x', y='y', legend='United States', source=source, line_width=2, color='blue')
    plot.line(x, q1, legend='1st quartile, global', line_width=2, color='red')
    plot.line(x, q3, legend='3rd quartile, global', line_width=2, color='green')
    plot.legend.location = "top_left"
    plot.title.text_font_size = "16px"
    
    #2nd y_axis: ratio
    plot.extra_y_ranges = {'Ratio': Range1d(start=0, end=20)}
    plot.add_layout(LinearAxis(y_range_name='Ratio', axis_label='Ratio'), 'right')
    plot.line(x, r1, legend='3rd to 1st quartile ratio (RHS)', y_range_name="Ratio", line_width=1, color='black', line_dash='dotted')
    
    menu = Select(title='Selected country:', value='236', options=drop) #default = 236 'United States'
    
    def callback(attr, old, new):       
        ctry = str(drop[int(menu.value)][1])
        years = df.Year.unique().tolist()
        x, q1, q3, c, r1 = update_ctry(ctry, years)
        source.data={'x': x, 'y': c}
               
    menu.on_change('value', callback)
    
    layout = column(menu, plot)
    doc.add_root(layout)
   
show(bokeh_plot)

In [13]:
#Report: 3rd to 1st quartile

year1=1960
year2=1999
year3=2014

ratio1 = df[df.Year == year1].Value.quantile(0.75) / df[df.Year == year1].Value.quantile(0.25)
ratio2 = df[df.Year == year2].Value.quantile(0.75) / df[df.Year == year2].Value.quantile(0.25)
ratio3 = df[df.Year == year3].Value.quantile(0.75) / df[df.Year == year3].Value.quantile(0.25)

print ('3rd to 1st quartile ratios')
print (year1, round(ratio1,1))
print (year2, round(ratio2,1))
print (year3, round(ratio3,1))

3rd to 1st quartile ratios
1960 5.5
1999 16.3
2014 8.4


In [14]:
#Report: chosen ctry to 1st quartile

ctry='United States'

year1=1960
year2=1999
year3=2014

ratio1 = df[df.Year == year1][df[df.Year == year1].CountryName == ctry].iloc[0,2] / df[df.Year == year1].Value.quantile(0.25)
ratio2 = df[df.Year == year2][df[df.Year == year2].CountryName == ctry].iloc[0,2] / df[df.Year == year2].Value.quantile(0.25)
ratio3 = df[df.Year == year3][df[df.Year == year3].CountryName == ctry].iloc[0,2] / df[df.Year == year3].Value.quantile(0.25)

print (ctry + ' to 1st quartile ratio')
print (year1, round(ratio1,1))
print (year2, round(ratio2,1))
print (year3, round(ratio3,1))

United States to 1st quartile ratio
1960 25.9
1999 54.6
2014 29.6
