In [274]:
import json
import pandas as pd
import numpy as np

Edit this next cell to choose a different country / year report:

In [296]:
with open("../jsoncache/BGR_3_2001.json","r") as f:
    d = json.loads(f.read())

These next few conversions don't really work. The PPP data field seems wrong.

In [297]:
# Check poverty line conversion
DAYS_PER_MONTH = 30.4167
line_month_ppp_calc = d['inputs']['line_day_ppp'] * DAYS_PER_MONTH
line_month_ppp_given = d['inputs']['line_month_ppp']
print("Poverty line (PPP):", line_month_ppp_calc, "=", line_month_ppp_given)

ppp = d['inputs']['ppp']

line_month_lcu_calc = line_month_ppp_calc * ppp
line_month_lcu_given = d['inputs']['line_month_lcu']
print("Poverty line (LCU):", line_month_lcu_calc, "=", line_month_lcu_given)

Poverty line (PPP): 57.791729999999994 = 57.7917
Poverty line (LCU): 44.214603287639996 = 25.5526


In [298]:
# Check data mean
sample_mean_ppp_calc = d['sample']['mean_month_lcu'] / ppp
sample_mean_ppp_given = d['sample']['mean_month_ppp']
print("Data mean (PPP):", sample_mean_ppp_calc, "=", sample_mean_ppp_given)

implied_ppp = d['sample']['mean_month_lcu'] / d['sample']['mean_month_ppp']
print("Implied PPP:", implied_ppp, "cf.", ppp)

Data mean (PPP): 174.94784777300842 = 302.719
Implied PPP: 0.44214931999643237 cf. 0.765068


But this one only works if you use the PPP given applied to sample mean in LCU, which is odd.

In [299]:
pop_N = d['sample']['effective_pop_N']
total_wealth_calc = pop_N * sample_mean_ppp_calc
total_wealth_given = d['sample']['effective_pop_wealth']
print("Total wealth:", total_wealth_calc, '=', total_wealth_given)

Total wealth: 1304935.9965388698 = 1304940.0


Minimum and maximum can't be checked, so proceed to the poverty statistics. Gini is calculate directly from $L(p)$.

In [303]:
# Load the Lorenz curve
lorenz = pd.DataFrame(d['lorenz'])
lorenz = lorenz.drop("index",1)
lorenz = lorenz.append(pd.DataFrame({"L": 0, "p": 0}, index = [-1]))
lorenz = lorenz.sort_values("p")

lorenz['dp'] = lorenz.p.shift(-1)[:-1] - lorenz.p[:-1]
lorenz['dL'] = lorenz.L.shift(-1)[:-1] - lorenz.L[:-1]
lorenz['dLdp'] = lorenz.dL / lorenz.dp

# Now, F(y) = inverse of Q(p)
lorenz['y'] = lorenz.dLdp * sample_mean_ppp_given

# Calc and compare Ginis
G_calc = 1 - sum(0.5 * lorenz.dp[:-1] * (lorenz.L.shift(-1)[:-1] + lorenz.L[:-1])) / 0.5
G_given = d['dist']['Gini']
print("Gini:",G_calc, "=", G_given)

Gini: 0.326692529746 = 0.326822


In [304]:
# Headcount - this is very approx with no interpolation 
HC_calc = lorenz.p[(lorenz.y - line_month_ppp_given).abs().argmin()]
HC_given = d['dist']['HC']
print("HC:",HC_calc,"=",HC_given)

HC: 0.02011 = 0.0199759


In [305]:
# Poverty gap
lorenz['PG'] =  (line_month_ppp_given - lorenz.y) / line_month_ppp_given
lorenz.PG[lorenz.PG < 0] = 0

PG_calc = sum(lorenz.PG[:-1] * lorenz.dp[:-1])
PG_given = d['dist']['PG']
print("PG:",PG_calc,"=",PG_given)

# Poverty gap squared (FGT2)
lorenz.FGT2 = lorenz.PG * lorenz.PG
FGT2_calc = sum(lorenz.FGT2[:-1] * lorenz.dp[:-1])
FGT2_given = d['dist']['FGT2']
print("FGT2:",FGT2_calc,"=",FGT2_given)

PG: 0.00418877947006 = 0.00418911
FGT2: 0.0012046641454 = 0.00136586


In [306]:
# Median
median_calc = lorenz.y[(lorenz.p - 0.5).abs().argmin()]
median_given = d['dist']['median_ppp']
print("Median:",median_calc,"=",median_given)

Median: 261.794761431 = 259.852


In [307]:
# Mean log deviation (MLD)
lorenz.LD = np.log(sample_mean_ppp_given) - np.log(lorenz.y)
MLD_calc = sum(lorenz.LD[:-1] * lorenz.dp[:-1])
MLD_given = d['dist']['MLD']
print("MLD",MLD_calc, "=",MLD_given)

# Watts index
lorenz.Watts = np.log(line_month_ppp_given) - np.log(lorenz.y)
lorenz.Watts[lorenz.Watts < 0] = 0
Watts_calc = sum(lorenz.Watts[:-1] * lorenz.dp[:-1])
Watts_given = d['dist']['Watt']
print("Watts",Watts_calc,"=",Watts_given)

MLD 0.188236966696 = 0.188992
Watts 0.00496315954549 = 0.00514352
