In [2]:
#pip install packages
!pip install CFEDemands --pre

[33mYou are using pip version 9.0.3, however version 20.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [3]:
import datascience as ds
import numpy as np
import plotly.offline as py
import plotly.graph_objs as go
import pandas as pd

from ipywidgets import interact, interactive, Dropdown, IntSlider
import ipywidgets as widgets
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.cm as cm
import cfe

In [18]:
#calculates total expenditure by product
def product_expenditure(product, year):
    if year == 2010:
        data_frame = x_2010
    elif year == 2012:
        data_frame = x_2012
    elif year == 2015:
        data_frame = x_2015
    elif year == 2018:
        data_frame = x_2018
    elif year == 'All':
        data_frame = x
    product_sum = (data_frame[str(product)].sum())
    return product_sum

def product_total_expenditure(product):
    product_sum = (x[str(product)].sum())
    return product_sum

def expenditure_percent_change(product):
    final_value = product_expenditure(product, 2018)
    initial_value = product_expenditure(product, 2010)
    percent_change = ((final_value-initial_value)/initial_value) * 100
    return str(round(percent_change, 2)) + '%'

def percent_change_graph(product):
    product_2010 = product_expenditure(product, 2010)
    product_2012 = product_expenditure(product, 2012)
    product_2015 = product_expenditure(product, 2015)
    product_2018 = product_expenditure(product, 2018)
    point_1 = (round(((product_2012-product_2010)/product_2010) *100,2)) 
    point_2 = (round(((product_2015-product_2012)/product_2015) *100,2))
    point_3 = (round(((product_2018-product_2015)/product_2018) *100,2))
    years_array = ds.make_array(2012, 2015, 2018)
    product_array = ds.make_array(product, product, product)
    percent_change_array = ds.make_array(point_1, point_2, point_3)
    graph_df = pd.DataFrame({
        'Product':product_array, 
        'Year':years_array, 
        'Percent Change':percent_change_array
    })
    return graph_df.plot.line(x = 'Year', y = 'Percent Change', title = str(product) + ' Expenditure Percent Chage')
def ln_graph(product):
    product_2010 = product_expenditure(product, 2010)
    product_2012 = product_expenditure(product, 2012)
    product_2015 = product_expenditure(product, 2015)
    product_2018 = product_expenditure(product, 2018)
    point_1 = np.log(product_2010)
    point_2 = np.log(product_2012)
    point_3 = np.log(product_2015)
    point_4 = np.log(product_2018)
    years_array = ds.make_array(2010, 2012, 2015, 2018)
    product_array = ds.make_array(product, product, product, product)
    percent_change_array = ds.make_array(point_1, point_2, point_3, point_4)
    graph_df = pd.DataFrame({
        'Product':product_array, 
        'Year':years_array, 
        'Log':percent_change_array
    })
    return graph_df.plot.line(x = 'Year', y = 'Log', title = str(product) + ' Demand Log Change')

def gdp_per_capita_df():
    years_list = [2010, 2012, 2015, 2018]
    gdp_per_capita_list = [344549.92, 432649.56, 525444.83, 659159.14]
    inflation_rate_list = ['13.72%', '12.22%', '9.01%', '12.1%']
    gdp_per_capita_df = pd.DataFrame({
        'Year':years_list, 
        'GDP per Capita (LCU)':gdp_per_capita_list,
        'Inflation Rate':inflation_rate_list
    })
    return gdp_per_capita_df 

def gdp_per_capita_percent_change():
    final_value = gdp_per_capita_df().loc[:, 'GDP per Capita (LCU)'].values[3]
    initial_value = gdp_per_capita_df().loc[:, 'GDP per Capita (LCU)'].values[0]
    percent_change = ((final_value-initial_value)/initial_value) * 100
    return str(round(percent_change, 2)) + '%'

def gdp_per_capita_percent_change_graph():
    gdp_2010 =  gdp_per_capita_df().loc[:, 'GDP per Capita (LCU)'].values[0]
    gdp_2012 =  gdp_per_capita_df().loc[:, 'GDP per Capita (LCU)'].values[1]
    gdp_2015 =  gdp_per_capita_df().loc[:, 'GDP per Capita (LCU)'].values[2] 
    gdp_2018 =  gdp_per_capita_df().loc[:, 'GDP per Capita (LCU)'].values[3]
    point_1 = (round(((gdp_2012-gdp_2010)/gdp_2010) *100,2)) 
    point_2 = (round(((gdp_2015-gdp_2012)/gdp_2015) *100,2))
    point_3 = (round(((gdp_2018-gdp_2015)/gdp_2018) *100,2))
    years_array = ds.make_array(2012, 2015, 2018)
    gdp_array = ds.make_array(gdp_2012, gdp_2015, gdp_2018)
    percent_change_array = ds.make_array(point_1, point_2, point_3)
    gdp_graph_df = pd.DataFrame({
        'GDP per Capita (LCU)':gdp_array, 
        'Year':years_array, 
        'Percent Change':percent_change_array
    })
    return gdp_graph_df.plot.line(x = 'Year', y = 'Percent Change', title = 'GDP per Capita (LCU) Percent Chage')

def interactable_estimate_graph(estimate_rate):
    xbar = np.exp(result.y).sum(['m','i']).to_dataframe('xbar').replace(0,np.nan).squeeze()
    xhat = result.get_predicted_expenditures().sum(['m','i']).to_dataframe('xhat').replace(0,np.nan).squeeze()
    df = pd.DataFrame({'Actual':np.log(estimate_rate*xbar),'Predicted':np.log(xhat)})
    df.plot.scatter(x='Predicted',y='Actual')
    v = plt.axis()
    vmin = np.max([v[0],v[2]])
    vmax = np.max([v[1],v[3]])
    return plt.plot([vmin,vmax],[vmin,vmax])

def household_mean_df():
    girls_mean = ds.make_array(average_df['Girls'].mean())
    boys_mean = ds.make_array(average_df['Boys'].mean())
    women_mean = ds.make_array(average_df['Women'].mean())
    men_mean = ds.make_array(average_df['Men'].mean())
    average_hh_array = ds.make_array(girls_mean, boys_mean, women_mean, men_mean)
    average_hh_size = sum(average_hh_array)
    household_df = pd.DataFrame({
        'Average Girls': girls_mean,
        'Average Boys': boys_mean,
        'Average Women': women_mean,
        'Average Men': men_mean,
        'Average Household Size': average_hh_size
    })
    return household_df

def average_product_expenditure(product):
    product_average = (x[str(product)].mean())
    return product_average

def average_product_expenditure_per_person(product):
    product_average = (x[str(product)]).mean()
    household_average_size = int(household_mean_df()['Average Household Size']) + .72
    per_person_average_expenditure = product_average/household_average_size
    return per_person_average_expenditure

In [6]:
HH_and_Expenditure_Sheet = {'P3 HH/E Characteristics':'1RpMK9oo6knSG_eJMTdUJltq937WlIqAK0IE5inTLvfQ',
        }
Sheets = HH_and_Expenditure_Sheet

def dataframe_from_gspreadsheet(sheet_name, key):
    url = 'https://docs.google.com/spreadsheets/d/{key}/gviz/tq?tqx=out:csv&sheet={sheet_name}&headers=1'.format(
        key=key, sheet_name=sheet_name.replace(' ', '%20'))
    df = pd.read_csv(url)
    return df.drop([col for col in df.columns if col.startswith('Unnamed')], axis=1)

x = dataframe_from_gspreadsheet("Nigeria Expenditures", HH_and_Expenditure_Sheet['P3 HH/E Characteristics'])

z = dataframe_from_gspreadsheet("Nigeria HH Characteristics", HH_and_Expenditure_Sheet['P3 HH/E Characteristics'])


z['m'] = 1
x['m'] = 1

# Take logs of expenditures; call this y
y = np.log(x.replace(0,np.nan).set_index(['j','t','m']))

z.set_index(['j','t','m'],inplace=True)
x

Unnamed: 0,t,j,"(Cocoyam, Spinach, etc)",Agricultural eggs,Animal fat,Apples,Avocado pear,Baby milk powder,Bananas,Beef,...,Tea,Tomato puree(canned),Tomatoes,Watermelon,Wheat flour,White beans,Wild game meat,Yam flour,Yam-roots,m
0,2010,10001,,280.0,,,,,200.0,500.0,...,,150.0,150.0,,,600.0,,,1500.0,1
1,2010,10002,,280.0,,,,,180.0,1200.0,...,140.0,240.0,120.0,,,400.0,,,1200.0,1
2,2010,10003,,180.0,,,,,100.0,500.0,...,60.0,90.0,100.0,,,100.0,,,400.0,1
3,2010,10004,,180.0,,,,,100.0,500.0,...,30.0,60.0,100.0,,,100.0,,,400.0,1
4,2010,10006,,,,,,,300.0,300.0,...,650.0,,400.0,,,,,,400.0,1
5,2010,10008,,360.0,,,90.0,,300.0,,...,,350.0,100.0,,,400.0,,,400.0,1
6,2010,10009,,,,,,600.0,100.0,300.0,...,60.0,120.0,200.0,,,270.0,,,400.0,1
7,2010,10010,,,,,,,150.0,500.0,...,30.0,120.0,200.0,,,300.0,,,400.0,1
8,2010,10011,,,,,,,,500.0,...,,60.0,100.0,,,200.0,,,400.0,1
9,2010,10012,,,,,,1200.0,,500.0,...,,120.0,150.0,,,300.0,,,600.0,1


In [7]:
#lists all products, stratifies year dataframes by groups, and makes the multiplication factors for graphs
Products = list(x)

x_2010 = x[0:4826]
x_2012 = x[4827:9585]
x_2015 = x[9586:14165]
x_2018 = x[14166: 19142]

y_2010 = y[0:4826]
y_2012 = y[4827:9585]
y_2015 = y[9586:14165]
y_2018 = y[14166: 19142]

Factor = [0.5, 0.75, 1 , 1.5, 2 , 3, 4, 32]

In [9]:
#interactable average household expenditure by product 
interact(lambda Products:
         print(("Average Household Expenditure: " + str(average_product_expenditure(product = Products)))), 
        Products=Dropdown(options=np.unique(Products)));
        

interactive(children=(Dropdown(description='Products', options=('(Cocoyam, Spinach, etc)', 'Agricultural eggs'…

In [10]:
#interactable total expenditure by product 
interact(lambda Products:
         print(("Total Expenditure: $" + str(product_total_expenditure(product = Products)))), 
        Products=Dropdown(options=np.unique(Products)));

interactive(children=(Dropdown(description='Products', options=('(Cocoyam, Spinach, etc)', 'Agricultural eggs'…

In [11]:
#interactable percent change in the product expenditure over the four years in the data         
interact(lambda Products:
         print(("Percent Change: " + str(expenditure_percent_change(product = Products)))), 
        Products=Dropdown(options=np.unique(Products)));

interactive(children=(Dropdown(description='Products', options=('(Cocoyam, Spinach, etc)', 'Agricultural eggs'…

In [14]:
#uploading the results frame 
result = cfe.Result(y=y,z=z)
result.get_reduced_form()
result.delta.to_dataframe().unstack('k')

In [15]:
result.get_beta().to_dataframe()

Unnamed: 0_level_0,beta
i,Unnamed: 1_level_1
"(Cocoyam, Spinach, etc)",0.304406
Agricultural eggs,0.562895
Bananas,0.40131
Beef,0.35605
Beer (local and imported),0.375364
Bread,0.393981
Brown beans,0.369553
Chicken,0.343113
Chocolate drinks,0.624608
Cocoyam,0.308647


In [16]:
#interactable estimate graph (changing the factor multiplies the budget out or in)
interact(lambda Factor:
         print((interactable_estimate_graph(estimate_rate = Factor))), 
        Factor=Dropdown(options=np.unique(Factor)));
        

interactive(children=(Dropdown(description='Factor', options=(0.5, 0.75, 1.0, 1.5, 2.0, 3.0, 4.0, 32.0), value…

In [19]:
#interactable LN graph      
interact(lambda Products:
         print((ln_graph(product = Products))), 
        Products=Dropdown(options=np.unique(Products)));

interactive(children=(Dropdown(description='Products', options=('(Cocoyam, Spinach, etc)', 'Agricultural eggs'…