In [2]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import scipy.stats as scs

!whoami
!date

ndbs
Sat Jun 27 00:14:41 PDT 2020


## Create a dataframe with flour fortification coverage data

In [6]:
coverage_levels = ['eats_fortified', 'eats_fortifiable', 'eats_vehicle']
coverage_stats = ['mean', 'lower', 'upper']

flour_df = pd.DataFrame(
    {
        'Ethiopia': [1.0, 0.0, 10.0, 15.0, 10.0, 20.0, 28.0, 23.0, 33.0],
        'India (Rajasthan)': [6.3, 4.8, 7.9, 7.1, 5.6, 9.1, 83.2, 79.5, 86.5],
        'Nigeria (Kano)': [22.7, 20.0, 25.5, 83.8, 81.4, 86.2, 83.9, 81.5, 86.3],
        'Nigeria (Lagos)': [5.4, 3.8, 6.9, 13.8, 11.5, 16.1, 14.2, 11.8, 16.5],
    },
    index = pd.MultiIndex.from_product([coverage_levels, coverage_stats])
)
# It's easiest to create the dataframe with countries as columns,
# but the transposed version looks nicer when viewing
flour_df.T

Unnamed: 0_level_0,eats_fortified,eats_fortified,eats_fortified,eats_fortifiable,eats_fortifiable,eats_fortifiable,eats_vehicle,eats_vehicle,eats_vehicle
Unnamed: 0_level_1,mean,lower,upper,mean,lower,upper,mean,lower,upper
Ethiopia,1.0,0.0,10.0,15.0,10.0,20.0,28.0,23.0,33.0
India (Rajasthan),6.3,4.8,7.9,7.1,5.6,9.1,83.2,79.5,86.5
Nigeria (Kano),22.7,20.0,25.5,83.8,81.4,86.2,83.9,81.5,86.3
Nigeria (Lagos),5.4,3.8,6.9,13.8,11.5,16.1,14.2,11.8,16.5


### Create columns for India and Nigeria based on subnational estimates

This is easier to do with countries as columns, but then we transpose the dataframe to view it.

In [7]:
rajastan_weight = 1
kano_weight = 4/25
lagos_weight = 21/25

flour_df['India'] = rajastan_weight*flour_df['India (Rajasthan)']
flour_df['Nigeria'] = kano_weight*flour_df['Nigeria (Kano)'] + lagos_weight*flour_df['Nigeria (Lagos)']
flour_df.T

Unnamed: 0_level_0,eats_fortified,eats_fortified,eats_fortified,eats_fortifiable,eats_fortifiable,eats_fortifiable,eats_vehicle,eats_vehicle,eats_vehicle
Unnamed: 0_level_1,mean,lower,upper,mean,lower,upper,mean,lower,upper
Ethiopia,1.0,0.0,10.0,15.0,10.0,20.0,28.0,23.0,33.0
India (Rajasthan),6.3,4.8,7.9,7.1,5.6,9.1,83.2,79.5,86.5
Nigeria (Kano),22.7,20.0,25.5,83.8,81.4,86.2,83.9,81.5,86.3
Nigeria (Lagos),5.4,3.8,6.9,13.8,11.5,16.1,14.2,11.8,16.5
India,6.3,4.8,7.9,7.1,5.6,9.1,83.2,79.5,86.5
Nigeria,8.168,6.392,9.876,25.0,22.684,27.316,25.352,22.952,27.668


In [36]:
flour_df

Unnamed: 0,Unnamed: 1,Ethiopia,India (Rajasthan),Nigeria (Kano),Nigeria (Lagos),India,Nigeria
eats_fortified,mean,1.0,6.3,22.7,5.4,6.3,8.168
eats_fortified,lower,0.0,4.8,20.0,3.8,4.8,6.392
eats_fortified,upper,10.0,7.9,25.5,6.9,7.9,9.876
eats_fortifiable,mean,15.0,7.1,83.8,13.8,7.1,25.0
eats_fortifiable,lower,10.0,5.6,81.4,11.5,5.6,22.684
eats_fortifiable,upper,20.0,9.1,86.2,16.1,9.1,27.316
eats_vehicle,mean,28.0,83.2,83.9,14.2,83.2,25.352
eats_vehicle,lower,23.0,79.5,81.5,11.8,79.5,22.952
eats_vehicle,upper,33.0,86.5,86.3,16.5,86.5,27.668


## Define and test the coverage function

In [23]:
t_start = 1 # coverage starts one year after start of sim
r = 0.1 # unfortified food decreases by 10%/year after initial jump

def coverage(t, a, b, c, t_start=t_start, r=r):
    # return a if t < t_start else b + (c-b)*(1-(1-r)**(t-t_start)) # non-vectorized version (works for scalars)
    return np.where(t < t_start, a, b + (c-b)*(1-(1-r)**(t-t_start))) # vectorized version (t can be an array)

In [25]:
# Check that we can exponentiate an array... yep!
t = np.random.normal(2, size=10)
2**t

array([4.0907594 , 6.04900731, 3.14331687, 2.07916684, 5.36444357,
       3.65382522, 3.89639433, 6.93108424, 2.87927119, 3.78635889])

In [16]:
# Use an index slice to easily access just the means - these are a,b,c
idx = pd.IndexSlice
flour_df.loc[idx[:,'mean'], 'India']

eats_fortified    mean     6.3
eats_fortifiable  mean     7.1
eats_vehicle      mean    83.2
Name: India, dtype: float64

In [20]:
# Use argument unpacking to assign a, b, and c in the function call
coverage(2, *flour_df.loc[idx[:,'mean'], 'India'])

14.709999999999999

In [26]:
# Apply the coverage function to an array of times
coverage(t, *flour_df.loc[idx[:,'mean'], 'India'])

array([14.94317919, 18.88330128, 12.15433106,  7.54772647, 17.69836523,
       13.76111387, 14.43624901, 20.20040356, 11.20045106, 14.13617163])

In [28]:
# Compute the coverage at t = [0,1,2,3,4] to see if the values look right
# Should have coverage(0) = 6.3, coverage(1) = 7.1 - yes, looks good
coverage(np.arange(5), *flour_df.loc[idx[:,'mean'], 'India'])

array([ 6.3   ,  7.1   , 14.71  , 21.559 , 27.7231])

In [35]:
# See what we get if we plug in the 'lower' values instead
coverage(np.arange(5), *flour_df.loc[idx[:,'lower'], 'India'])

array([ 4.8   ,  5.6   , 12.99  , 19.641 , 25.6269])

## Write a function to plot the coverage function