In [1]:
import pandas as pd
from statsmodels.stats.weightstats import ttest_ind

df = pd.read_parquet('data/hh.parquet')
services = pd.read_parquet('data/services.parquet')
food = pd.read_parquet('data/food.parquet')
health = pd.read_parquet('data/health.parquet')
edu = pd.read_parquet('data/education.parquet')

---

### Elegibility - households with LPG; treatment - got LPG subsidy

In [2]:
services['received_subsidy_lpg']

hhid
22300101    1
22300201    1
22300202    1
22300203    2
22300204    1
           ..
49999314    2
49999315    2
49999316    2
49999317    1
49999318    2
Name: received_subsidy_lpg, Length: 261953, dtype: int64

In [3]:
df = df[df['energy_source_cooking'] == 'LPG']
services = services.loc[df.index]

df['treat'] = services['received_subsidy_lpg'] == 1

ix = df.index
for i in [food, health, edu]:
    ix = i.index.intersection(ix)
df = df.loc[ix]

df['FOOD'] = food.loc[df.index].sum(axis=1)
df['EDUCATION'] = edu.loc[df.index].sum(axis=1)
df['HEALTH'] = health.loc[df.index].sum(axis=1)
df.head()

Unnamed: 0_level_0,sector,nss_region,multiplier,family_size,n_children,n_schoolchildren,n_elderly,n_school_meals,employed_annual,nco_2015,...,hoh_religion,caste,energy_source_cooking,energy_source_lighting,rationcard_type,has_benefited_from_pmgky,treat,FOOD,EDUCATION,HEALTH
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22300302,urban,341,28599,5,0,0,2,0.0,yes,112.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Above Poverty Line (APL),no,True,19071.666667,550.0,133.333333
22300305,urban,341,28599,8,2,2,2,0.0,yes,112.0,...,Jainism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Above Poverty Line (APL),no,True,27221.0,175.0,241.666667
22300312,urban,341,28599,6,2,0,2,0.0,yes,112.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Above Poverty Line (APL),no,True,21395.0,316.666667,141.666667
22300313,urban,341,28599,3,1,1,0,0.0,yes,143.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Above Poverty Line (APL),no,True,15562.666667,233.333333,4000.0
22301301,urban,341,16099,4,1,2,0,22.0,yes,112.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Below Poverty Line (BPL),no,True,9921.0,225.0,70.833333


In [4]:
deltas = df.groupby(['sector', 'treat']).apply(lambda x: (x[['FOOD', 'EDUCATION', 'HEALTH']] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())
deltas

Unnamed: 0_level_0,Unnamed: 1_level_0,FOOD,EDUCATION,HEALTH
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rural,False,10783.316782,1000.952985,433.425004
rural,True,10304.244202,1077.460301,388.674376
urban,False,12302.77465,1214.521744,434.652115
urban,True,11456.505997,1308.326017,402.74417


In [5]:
def ttest(x, cols, alternative='two-sided'):
    trix = x[x['treat']].index
    crix = x[~x['treat']].index
    pvals = {}
    for col in cols:
        _, p, _ = ttest_ind(x.loc[crix, col], x.loc[trix, col], weights=(x.loc[crix, 'multiplier'], x.loc[trix, 'multiplier']), alternative=alternative)
        pvals[col] = p
    return pd.Series(pvals)
df.groupby('sector').apply(lambda x: ttest(x, ['FOOD', 'EDUCATION', 'HEALTH']))

Unnamed: 0_level_0,FOOD,EDUCATION,HEALTH
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,0.0,0.0,0.0
urban,0.0,0.0,0.0


In [6]:
edf = edu.loc[df.index]
edf['sector'] = df['sector']
edf['multiplier'] = df['multiplier']
edf['treat'] = df['treat']
edf.head()

Unnamed: 0_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
22300302,375.0,,66.666667,,,108.333333,urban,28599,True
22300305,,,150.0,25.0,,,urban,28599,True
22300312,208.333333,,41.666667,,,66.666667,urban,28599,True
22300313,125.0,,41.666667,,,66.666667,urban,28599,True
22301301,,,41.666667,183.333333,,,urban,16099,True


In [7]:
cols = edf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
edf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
rural,False,181.454195,6.45499,107.155802,556.564321,117.206238,32.117439
rural,True,195.943441,6.303216,120.719899,546.368484,184.091256,24.034005
urban,False,233.017976,8.945897,116.784912,618.747775,190.698963,46.326221
urban,True,251.554819,7.474849,126.16581,607.593996,277.255615,38.280928


In [8]:
edf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
fdf = food.loc[df.index]
fdf['sector'] = df['sector']
fdf['multiplier'] = df['multiplier']
fdf['treat'] = df['treat']
fdf.head()

Unnamed: 0_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
22300302,2100.0,921.0,156.0,2643.333333,1486.333333,2816.666667,2513.333333,975.0,1200.333333,1573.0,1473.333333,1213.333333,urban,28599,True
22300305,2335.0,1075.0,320.0,3640.0,2747.333333,1885.0,4983.333333,3033.333333,2357.333333,901.333333,1646.666667,2296.666667,urban,28599,True
22300312,2235.0,970.0,172.0,2556.666667,2201.333333,1841.666667,5070.0,1343.333333,1178.666667,1529.666667,1126.666667,1170.0,urban,28599,True
22300313,1455.0,689.0,163.0,1906.666667,1495.0,1863.333333,2808.0,1161.333333,897.0,936.0,1083.333333,1105.0,urban,28599,True
22301301,1515.0,458.0,122.0,888.333333,1105.0,1040.0,1863.333333,693.333333,641.333333,641.333333,173.333333,780.0,urban,16099,True


In [10]:
cols = fdf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
fdf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
rural,False,1126.300339,464.270172,202.767808,2146.789258,1273.40712,703.852694,1114.01337,608.877947,722.715537,619.949302,995.402019,804.971216
rural,True,1132.638433,459.172234,181.156115,1920.768315,1282.015771,654.377691,1121.928621,586.176392,671.945959,579.407206,960.481669,754.175795
urban,False,1225.452929,454.647028,195.051739,2586.013514,1291.629828,861.914723,1052.081047,587.536204,733.406543,859.044144,1355.982866,1100.014083
urban,True,1257.822365,444.87161,167.132154,2118.6345,1317.788106,786.109385,1306.384896,573.068969,680.462212,709.153508,1175.963021,919.115272


In [13]:
fdf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
hdf = health.loc[df.index]
hdf['sector'] = df['sector']
hdf['multiplier'] = df['multiplier']
hdf['treat'] = df['treat']
hdf.head()

item_code,410,411,412,413,414,420,421,422,423,424,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
22300302,,,,,,100.0,,33.333333,,,urban,28599,True
22300305,,,,,,208.333333,,33.333333,,,urban,28599,True
22300312,,,,,,108.333333,,33.333333,,,urban,28599,True
22300313,1250.0,375.0,2083.333333,208.333333,,66.666667,,16.666667,,,urban,28599,True
22301301,,,,,,54.166667,,16.666667,,,urban,16099,True


In [15]:
cols = hdf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
hdf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,item_code,410,411,412,413,414,420,421,422,423,424
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rural,False,117.677013,61.300292,62.800372,76.706319,27.605683,65.220341,7.138125,12.094292,0.431874,2.450693
rural,True,103.219823,51.106231,58.067028,73.482343,20.881345,60.979399,8.134439,10.296485,0.429572,2.077709
urban,False,106.330091,61.136936,66.344246,80.709912,25.319098,68.689864,7.986618,14.730733,0.532906,2.871711
urban,True,94.095501,56.769302,55.715626,80.119535,22.501003,70.648463,7.909721,12.710707,0.633854,1.640457


In [16]:
hdf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,410,411,412,413,414,420,421,422,423,424
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.83099e-137,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


---

### Treatment = free electricity; eligibility, households with electricity

In [17]:
services['received_free_electricity'].unique()

array([2, 1])

In [18]:
df = pd.read_parquet('data/hh.parquet')
services = pd.read_parquet('data/services.parquet')
food = pd.read_parquet('data/food.parquet')
health = pd.read_parquet('data/health.parquet')
edu = pd.read_parquet('data/education.parquet')

df = df[df['energy_source_lighting'].str.startswith('electricity')]

ix = df.index.intersection(services.index)
df, services = df.loc[ix], services.loc[ix]

df['treat'] = services['received_free_electricity'] == 1

ix = df.index
for i in [food, health, edu]:
    ix = i.index.intersection(ix)
df = df.loc[ix]

df['FOOD'] = food.loc[df.index].sum(axis=1)
df['EDUCATION'] = edu.loc[df.index].sum(axis=1)
df['HEALTH'] = health.loc[df.index].sum(axis=1)

In [19]:
deltas_ration = df.groupby(['sector', 'treat']).apply(lambda x: (x[['FOOD', 'EDUCATION', 'HEALTH']] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())
deltas_ration

Unnamed: 0_level_0,Unnamed: 1_level_0,FOOD,EDUCATION,HEALTH
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rural,False,9668.830559,847.809294,347.000672
rural,True,10441.379775,866.006397,370.846148
urban,False,11647.460165,1231.538564,410.731982
urban,True,12166.370237,1158.257402,440.722818


In [20]:
df.groupby('sector').apply(lambda x: ttest(x, ['FOOD', 'EDUCATION', 'HEALTH']))

Unnamed: 0_level_0,FOOD,EDUCATION,HEALTH
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,0.0,0.0,0.0
urban,0.0,0.0,0.0


In [21]:
edf = edu.loc[df.index]
edf['sector'] = df['sector']
edf['multiplier'] = df['multiplier']
edf['treat'] = df['treat']
edf.head()

Unnamed: 0_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
22300302,375.0,,66.666667,,,108.333333,urban,28599,False
22300305,,,150.0,25.0,,,urban,28599,False
22300312,208.333333,,41.666667,,,66.666667,urban,28599,False
22300313,125.0,,41.666667,,,66.666667,urban,28599,False
22301301,,,41.666667,183.333333,,,urban,16099,False


In [22]:
cols = edf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
edf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
rural,False,150.983145,5.156507,94.354093,387.353915,191.023861,18.937772
rural,True,148.190821,6.250337,118.417354,494.751959,66.305731,32.090195
urban,False,236.466095,7.3535,121.143798,577.807462,243.000408,45.767301
urban,True,244.744621,10.72522,124.894999,598.243544,132.709299,46.93972


In [23]:
edf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
fdf = food.loc[df.index]
fdf['sector'] = df['sector']
fdf['multiplier'] = df['multiplier']
fdf['treat'] = df['treat']
fdf.head()

Unnamed: 0_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
22300302,2100.0,921.0,156.0,2643.333333,1486.333333,2816.666667,2513.333333,975.0,1200.333333,1573.0,1473.333333,1213.333333,urban,28599,False
22300305,2335.0,1075.0,320.0,3640.0,2747.333333,1885.0,4983.333333,3033.333333,2357.333333,901.333333,1646.666667,2296.666667,urban,28599,False
22300312,2235.0,970.0,172.0,2556.666667,2201.333333,1841.666667,5070.0,1343.333333,1178.666667,1529.666667,1126.666667,1170.0,urban,28599,False
22300313,1455.0,689.0,163.0,1906.666667,1495.0,1863.333333,2808.0,1161.333333,897.0,936.0,1083.333333,1105.0,urban,28599,False
22301301,1515.0,458.0,122.0,888.333333,1105.0,1040.0,1863.333333,693.333333,641.333333,641.333333,173.333333,780.0,urban,16099,False


In [25]:
cols = fdf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
fdf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
rural,False,1063.722725,428.408157,184.256231,1763.322041,1277.150314,571.151379,1068.980119,605.280203,665.307665,475.841094,872.355116,693.055514
rural,True,1160.043186,417.91872,201.612692,2154.728579,1163.217224,625.80238,904.814725,536.818336,682.632355,612.427229,1256.094884,725.269466
urban,False,1160.750198,423.727923,178.400241,2309.127075,1244.823945,798.825029,1042.88895,574.467099,676.488997,779.503672,1434.989964,1023.46707
urban,True,1224.754043,445.65089,175.3518,2412.241672,1232.639493,869.31094,1144.353905,515.162987,698.789893,878.118563,1581.174468,988.821583


In [26]:
fdf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
hdf = health.loc[df.index]
hdf['sector'] = df['sector']
hdf['multiplier'] = df['multiplier']
hdf['treat'] = df['treat']
hdf.head()

item_code,410,411,412,413,414,420,421,422,423,424,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
22300302,,,,,,100.0,,33.333333,,,urban,28599,False
22300305,,,,,,208.333333,,33.333333,,,urban,28599,False
22300312,,,,,,108.333333,,33.333333,,,urban,28599,False
22300313,1250.0,375.0,2083.333333,208.333333,,66.666667,,16.666667,,,urban,28599,False
22301301,,,,,,54.166667,,16.666667,,,urban,16099,False


In [28]:
cols = hdf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
hdf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,item_code,410,411,412,413,414,420,421,422,423,424
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rural,False,94.873296,46.993553,45.352016,62.362947,20.484061,58.747835,6.512813,9.172506,0.411877,2.089771
rural,True,96.580406,53.393059,53.95812,65.384734,23.521128,57.919529,7.375221,10.828622,0.339232,1.546097
urban,False,95.800224,58.118197,60.955615,79.440296,22.728177,69.217542,7.884871,13.534963,0.566695,2.485403
urban,True,114.902247,60.809009,64.169723,83.498673,26.836601,65.517427,7.74482,14.75762,0.466093,2.020606


In [29]:
hdf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,410,411,412,413,414,420,421,422,423,424
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
