In [1]:
import pandas as pd
from statsmodels.stats.weightstats import ttest_ind

df = pd.read_parquet('data/hh.parquet')
services = pd.read_parquet('data/services.parquet')
food = pd.read_parquet('data/food.parquet')
health = pd.read_parquet('data/health.parquet')
edu = pd.read_parquet('data/education.parquet')

# eligibility = hh with enrolments, children of schoolgoing age
services = services[services['is_hhmem_edu'] == 1]
df = df[df['n_schoolchildren'] > 0]
ix = df.index.intersection(services.index)
df, services = df.loc[ix], services.loc[ix]
df['treat'] = services['received_fee_waiver'] == 1

ix = df.index
for i in [food, health, edu]:
    ix = i.index.intersection(ix)
df = df.loc[ix]

df['FOOD'] = food.loc[df.index].sum(axis=1)
df['EDUCATION'] = edu.loc[df.index].sum(axis=1)
df['HEALTH'] = health.loc[df.index].sum(axis=1)
df.head()

Unnamed: 0_level_0,sector,nss_region,multiplier,family_size,n_children,n_schoolchildren,n_elderly,n_school_meals,employed_annual,nco_2015,...,hoh_religion,caste,energy_source_cooking,energy_source_lighting,rationcard_type,has_benefited_from_pmgky,treat,FOOD,EDUCATION,HEALTH
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22300305,urban,341,28599,8,2,2,2,0.0,yes,112.0,...,Jainism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Above Poverty Line (APL),no,False,27221.0,175.0,241.666667
22300313,urban,341,28599,3,1,1,0,0.0,yes,143.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Above Poverty Line (APL),no,False,15562.666667,233.333333,4000.0
22301301,urban,341,16099,4,1,2,0,22.0,yes,112.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Below Poverty Line (BPL),no,False,9921.0,225.0,70.833333
22301304,urban,341,16099,4,1,1,0,0.0,yes,932.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Below Poverty Line (BPL),no,False,15111.333333,166.666667,166.666667
22301307,urban,341,16099,6,1,1,1,0.0,yes,931.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Below Poverty Line (BPL),yes,False,14048.666667,258.333333,112.5


In [2]:
df['treat'] = services.loc[df.index, 'received_fee_waiver'] == 1
d_fee = df.groupby(['sector', 'treat']).apply(lambda x: (x[['FOOD', 'EDUCATION', 'HEALTH']] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())
d_fee

Unnamed: 0_level_0,Unnamed: 1_level_0,FOOD,EDUCATION,HEALTH
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rural,False,10075.087852,881.368804,336.768279
rural,True,9675.666303,645.049532,305.76744
urban,False,12342.987679,1309.737457,411.746614
urban,True,10934.694227,982.678976,365.345256


In [3]:
def ttest(x, cols, alternative='two-sided'):
    trix = x[x['treat']].index
    crix = x[~x['treat']].index
    pvals = {}
    for col in cols:
        _, p, _ = ttest_ind(x.loc[crix, col], x.loc[trix, col], weights=(x.loc[crix, 'multiplier'], x.loc[trix, 'multiplier']), alternative=alternative)
        pvals[col] = p
    return pd.Series(pvals)
             
df.groupby('sector').apply(lambda x: ttest(x, ['FOOD', 'EDUCATION', 'HEALTH']))

Unnamed: 0_level_0,FOOD,EDUCATION,HEALTH
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,0.0,0.0,0.0
urban,0.0,0.0,0.0


In [4]:
df['treat'] = df['n_school_meals'] > 0
d_meals = df.groupby(['sector', 'treat']).apply(lambda x: (x[['FOOD', 'EDUCATION', 'HEALTH']] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())
d_meals

Unnamed: 0_level_0,Unnamed: 1_level_0,FOOD,EDUCATION,HEALTH
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rural,False,10209.720504,1143.067863,357.716601
rural,True,9810.489783,539.58604,305.54885
urban,False,12502.633228,1413.04373,423.075536
urban,True,11019.912911,697.48801,340.207103


In [5]:
df.groupby('sector').apply(lambda x: ttest(x, ['FOOD', 'EDUCATION', 'HEALTH']))

Unnamed: 0_level_0,FOOD,EDUCATION,HEALTH
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,0.0,0.0,0.0
urban,0.0,0.0,0.0


In [6]:
edf = edu.loc[df.index]

In [7]:
edf['sector'] = df['sector']
edf['multiplier'] = df['multiplier']
edf['treat'] = services.loc[edf.index, 'received_fee_waiver'] == 1
edf.head()

Unnamed: 0_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
22300305,,,150.0,25.0,,,urban,28599,False
22300313,125.0,,41.666667,,,66.666667,urban,28599,False
22301301,,,41.666667,183.333333,,,urban,16099,False
22301304,,,41.666667,125.0,,,urban,16099,False
22301307,,,133.333333,125.0,,,urban,16099,False


In [8]:
cols = edf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
edf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
rural,False,160.186284,5.139489,104.646124,421.80683,174.792242,14.797835
rural,True,111.194684,4.702096,103.025044,239.758329,170.737919,15.63146
urban,False,261.407854,7.589263,130.15338,624.436244,255.419029,30.731687
urban,True,166.702851,5.823154,119.800581,416.546826,250.740906,23.064658


In [9]:
edf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
fdf = food.loc[df.index]
fdf['sector'] = df['sector']
fdf['multiplier'] = df['multiplier']
fdf['treat'] = services.loc[fdf.index, 'received_fee_waiver'] == 1
fdf.head()

Unnamed: 0_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
22300305,2335.0,1075.0,320.0,3640.0,2747.333333,1885.0,4983.333333,3033.333333,2357.333333,901.333333,1646.666667,2296.666667,urban,28599,False
22300313,1455.0,689.0,163.0,1906.666667,1495.0,1863.333333,2808.0,1161.333333,897.0,936.0,1083.333333,1105.0,urban,28599,False
22301301,1515.0,458.0,122.0,888.333333,1105.0,1040.0,1863.333333,693.333333,641.333333,641.333333,173.333333,780.0,urban,16099,False
22301304,1323.0,473.0,103.0,1516.666667,1538.333333,2210.0,2513.333333,780.0,832.0,658.666667,1300.0,1863.333333,urban,16099,False
22301307,2316.0,830.0,130.0,1646.666667,1239.333333,736.666667,2080.0,1040.0,858.0,875.333333,1126.666667,1170.0,urban,16099,False


In [11]:
cols = fdf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
fdf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
rural,False,1118.777011,441.376599,189.69101,1846.26173,1287.2015,591.999022,1071.392982,605.631206,677.888661,508.040247,1021.266984,715.560899
rural,True,1094.388616,388.881301,203.435254,1816.5432,1217.759447,504.691574,938.545433,602.3521,679.734884,467.763055,1095.444121,666.127316
urban,False,1273.099912,463.919554,191.773704,2476.517071,1329.61673,845.930402,1154.270304,602.116793,728.461825,828.258144,1394.21034,1054.8129
urban,True,1167.923986,421.407509,194.757671,2036.261587,1272.884743,668.856466,1061.728289,632.480192,696.126429,640.068887,1258.482574,883.715894


In [12]:
fdf.fillna(value=0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
hdf = health.loc[df.index]
hdf['sector'] = df['sector']
hdf['multiplier'] = df['multiplier']
hdf['treat'] = services.loc[hdf.index, 'received_fee_waiver'] == 1
hdf.head()

item_code,410,411,412,413,414,420,421,422,423,424,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
22300305,,,,,,208.333333,,33.333333,,,urban,28599,False
22300313,1250.0,375.0,2083.333333,208.333333,,66.666667,,16.666667,,,urban,28599,False
22301301,,,,,,54.166667,,16.666667,,,urban,16099,False
22301304,,,,,,150.0,,16.666667,,,urban,16099,False
22301307,,,,,,91.666667,,20.833333,,,urban,16099,False


In [14]:
cols = hdf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
hdf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,item_code,410,411,412,413,414,420,421,422,423,424
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rural,False,92.132199,45.121226,44.596676,60.092015,19.617541,57.525076,6.322216,9.214368,0.368441,1.778523
rural,True,84.176089,41.89368,40.146079,50.93432,18.454223,53.515537,5.924399,8.682221,0.50775,1.533143
urban,False,99.501777,56.278132,59.366645,78.492185,24.045225,69.262107,8.093865,13.96061,0.590204,2.155862
urban,True,94.253443,51.537699,46.926758,64.292561,22.214667,65.307881,6.363039,12.278787,0.512178,1.658243


In [15]:
hdf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,410,411,412,413,414,420,421,422,423,424
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


---

In [16]:
df['treat'] = df['n_school_meals'] > 0
d_meals = df.groupby(['sector', 'treat']).apply(lambda x: (x[['FOOD', 'EDUCATION', 'HEALTH']] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())
d_meals

Unnamed: 0_level_0,Unnamed: 1_level_0,FOOD,EDUCATION,HEALTH
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rural,False,10209.720504,1143.067863,357.716601
rural,True,9810.489783,539.58604,305.54885
urban,False,12502.633228,1413.04373,423.075536
urban,True,11019.912911,697.48801,340.207103


In [17]:
df.groupby('sector').apply(lambda x: ttest(x, ['FOOD', 'EDUCATION', 'HEALTH']))

Unnamed: 0_level_0,FOOD,EDUCATION,HEALTH
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,0.0,0.0,0.0
urban,0.0,0.0,0.0


In [18]:
edf['sector'] = df['sector']
edf['multiplier'] = df['multiplier']
edf['treat'] = df['treat']
edf.head()

Unnamed: 0_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
22300305,,,150.0,25.0,,,urban,28599,False
22300313,125.0,,41.666667,,,66.666667,urban,28599,False
22301301,,,41.666667,183.333333,,,urban,16099,True
22301304,,,41.666667,125.0,,,urban,16099,False
22301307,,,133.333333,125.0,,,urban,16099,False


In [19]:
cols = edf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
edf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
rural,False,215.221532,6.319366,122.363473,620.515805,159.500881,19.146807
rural,True,88.535096,3.799205,86.100487,161.456034,189.056219,10.638999
urban,False,285.952726,8.485909,136.98427,687.826838,260.79714,32.996846
urban,True,108.84519,2.723029,94.376097,245.742913,228.700621,17.100159


In [20]:
edf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0


In [21]:
fdf = food.loc[df.index]
fdf['sector'] = df['sector']
fdf['multiplier'] = df['multiplier']
fdf['treat'] = df['treat']
fdf.head()

Unnamed: 0_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
22300305,2335.0,1075.0,320.0,3640.0,2747.333333,1885.0,4983.333333,3033.333333,2357.333333,901.333333,1646.666667,2296.666667,urban,28599,False
22300313,1455.0,689.0,163.0,1906.666667,1495.0,1863.333333,2808.0,1161.333333,897.0,936.0,1083.333333,1105.0,urban,28599,False
22301301,1515.0,458.0,122.0,888.333333,1105.0,1040.0,1863.333333,693.333333,641.333333,641.333333,173.333333,780.0,urban,16099,True
22301304,1323.0,473.0,103.0,1516.666667,1538.333333,2210.0,2513.333333,780.0,832.0,658.666667,1300.0,1863.333333,urban,16099,False
22301307,2316.0,830.0,130.0,1646.666667,1239.333333,736.666667,2080.0,1040.0,858.0,875.333333,1126.666667,1170.0,urban,16099,False


In [22]:
cols = fdf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
fdf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
rural,False,1118.598726,452.446879,207.032319,2192.071188,1305.87898,629.967101,982.571931,611.098073,694.828789,549.981619,712.302752,752.942148
rural,True,1111.166839,413.341234,176.436901,1484.899667,1246.011532,525.472888,1119.328061,599.020961,661.241593,452.496395,1359.341932,661.731781
urban,False,1299.190184,474.130329,196.924653,2619.47745,1351.818399,869.147192,1104.046063,613.699715,738.225091,845.163762,1304.469881,1086.340509
urban,True,1109.049537,399.152643,169.300102,1634.409217,1203.697046,665.096168,1346.911153,561.45522,670.004146,671.845509,1750.788342,838.203828


In [24]:
fdf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
hdf = health.loc[df.index]
hdf['sector'] = df['sector']
hdf['multiplier'] = df['multiplier']
hdf['treat'] = df['treat']
hdf.head()

item_code,410,411,412,413,414,420,421,422,423,424,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
22300305,,,,,,208.333333,,33.333333,,,urban,28599,False
22300313,1250.0,375.0,2083.333333,208.333333,,66.666667,,16.666667,,,urban,28599,False
22301301,,,,,,54.166667,,16.666667,,,urban,16099,True
22301304,,,,,,150.0,,16.666667,,,urban,16099,False
22301307,,,,,,91.666667,,20.833333,,,urban,16099,False


In [26]:
cols = hdf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
hdf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,item_code,410,411,412,413,414,420,421,422,423,424
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rural,False,95.792518,47.951241,46.695065,65.310805,21.998652,60.610728,7.083264,9.728186,0.453388,2.092752
rural,True,85.865944,41.210498,41.039653,51.856106,16.82306,53.104403,5.420741,8.521538,0.326512,1.380395
urban,False,100.791054,58.364128,60.838777,80.828293,24.902215,71.845674,8.281219,14.364464,0.647429,2.212284
urban,True,91.373787,44.693409,47.394327,61.806736,19.334701,55.714371,6.508549,11.398799,0.294117,1.688306


In [27]:
hdf.fillna(value=0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,410,411,412,413,414,420,421,422,423,424
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
