In [1]:
import pandas as pd
from statsmodels.stats.weightstats import ttest_ind

df = pd.read_parquet('data/hh.parquet')
services = pd.read_parquet('data/services.parquet')
food = pd.read_parquet('data/food.parquet')
health = pd.read_parquet('data/health.parquet')
edu = pd.read_parquet('data/education.parquet')

---

### Hospitalized family member, received health benefits vs did not

In [2]:
services['is_hospitalization'].unique()

array([4, 2, 3, 1])

In [3]:
services = services[services['is_hospitalization'] != 4]

ix = df.index.intersection(services.index)
df, services = df.loc[ix], services.loc[ix]

df['treat'] = services['is_benefit_healthscheme'] == 1

ix = df.index
for i in [food, health, edu]:
    ix = i.index.intersection(ix)
df = df.loc[ix]

df['FOOD'] = food.loc[df.index].sum(axis=1)
df['EDUCATION'] = edu.loc[df.index].sum(axis=1)
df['HEALTH'] = health.loc[df.index].sum(axis=1)
df.head()

Unnamed: 0_level_0,sector,nss_region,multiplier,family_size,n_children,n_schoolchildren,n_elderly,n_school_meals,employed_annual,nco_2015,...,hoh_religion,caste,energy_source_cooking,energy_source_lighting,rationcard_type,has_benefited_from_pmgky,treat,FOOD,EDUCATION,HEALTH
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
22300313,urban,341,28599,3,1,1,0,0.0,yes,143.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Above Poverty Line (APL),no,False,15562.666667,233.333333,4000.0
22301313,urban,341,16099,6,1,2,1,24.0,yes,411.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Below Poverty Line (BPL),no,False,13724.333333,266.666667,150.0
22302306,urban,341,14732,4,2,0,0,0.0,yes,833.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Below Poverty Line (BPL),no,False,11470.333333,625.0,791.666667
22302310,urban,341,14732,4,2,2,0,0.0,yes,112.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Above Poverty Line (APL),no,False,17289.0,1375.0,2900.0
22302311,urban,341,14732,5,2,1,1,0.0,yes,522.0,...,Hinduism,other backward class (OBC),LPG,electricity (incl. generated by solar or wind ...,Below Poverty Line (BPL),yes,False,14668.0,3375.0,116.666667


In [4]:
d_hb = df.groupby(['sector', 'treat']).apply(lambda x: (x[['FOOD', 'EDUCATION', 'HEALTH']] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())
d_hb

Unnamed: 0_level_0,Unnamed: 1_level_0,FOOD,EDUCATION,HEALTH
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rural,False,11096.314746,958.725754,1686.011275
rural,True,11020.543687,805.226476,1780.221989
urban,False,13016.688002,1258.968944,2075.206731
urban,True,12393.71952,1110.914989,2255.705955


In [6]:
def ttest(x, cols, alternative='two-sided'):
    trix = x[x['treat']].index
    crix = x[~x['treat']].index
    pvals = {}
    for col in cols:
        _, p, _ = ttest_ind(x.loc[crix, col], x.loc[trix, col], weights=(x.loc[crix, 'multiplier'], x.loc[trix, 'multiplier']), alternative=alternative)
        pvals[col] = p
    return pd.Series(pvals)

df.groupby('sector').apply(lambda x: ttest(x, ['FOOD', 'EDUCATION', 'HEALTH']))

Unnamed: 0_level_0,FOOD,EDUCATION,HEALTH
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,0.0,0.0,0.0
urban,0.0,0.0,0.0


In [7]:
edf = edu.loc[df.index]

In [8]:
edf['sector'] = df['sector']
edf['multiplier'] = df['multiplier']
edf['treat'] = services.loc[edf.index, 'is_benefit_healthscheme'] == 1
edf.head()

Unnamed: 0_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
22300313,125.0,,41.666667,,,66.666667,urban,28599,False
22301313,,,116.666667,150.0,,,urban,16099,False
22302306,208.333333,,158.333333,8.333333,250.0,,urban,14732,False
22302310,750.0,,208.333333,,416.666667,,urban,14732,False
22302311,791.666667,,208.333333,2083.333333,291.666667,,urban,14732,False


In [9]:
cols = edf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
edf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
rural,False,170.684661,6.168564,108.433806,454.291664,192.92268,26.22438
rural,True,138.396824,6.076897,106.986442,406.781623,115.838931,31.145759
urban,False,250.270835,7.125506,121.456373,605.381124,229.31935,45.415757
urban,True,190.097173,8.143542,118.427223,570.729124,186.630111,36.887815


In [10]:
edf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
rural,0.0,1.738439e-248,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
fdf = food.loc[df.index]
fdf['sector'] = df['sector']
fdf['multiplier'] = df['multiplier']
fdf['treat'] = services.loc[fdf.index, 'is_benefit_healthscheme'] == 1
fdf.head()

Unnamed: 0_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
22300313,1455.0,689.0,163.0,1906.666667,1495.0,1863.333333,2808.0,1161.333333,897.0,936.0,1083.333333,1105.0,urban,28599,False
22301313,2405.0,603.0,130.0,888.333333,1469.0,736.666667,2080.0,1213.333333,879.666667,762.666667,2383.333333,173.333333,urban,16099,False
22302306,875.0,545.0,140.0,1755.0,1148.333333,1170.0,1022.666667,346.666667,546.0,910.0,2210.0,801.666667,urban,14732,False
22302310,975.0,595.0,145.0,2513.333333,1711.666667,866.666667,2665.0,780.0,862.333333,1105.0,3380.0,1690.0,urban,14732,False
22302311,1140.0,605.0,170.0,2361.666667,1473.333333,1191.666667,2296.666667,693.333333,879.666667,845.0,1603.333333,1408.333333,urban,14732,False


In [12]:
cols = fdf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
fdf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
rural,False,1230.514669,478.393297,209.908625,2090.554226,1356.36979,693.134735,1197.467085,643.407942,732.724499,582.055256,1056.983408,824.801215
rural,True,1174.068897,419.244392,217.499245,2358.619693,1315.023928,664.002103,1020.525208,593.844548,715.238807,617.230442,1124.873462,800.372962
urban,False,1299.940318,491.847987,200.333292,2553.203868,1364.103267,942.800265,1246.456925,624.967318,757.33511,909.755252,1482.149277,1143.795125
urban,True,1270.789207,453.529323,225.281749,2668.000897,1414.267846,835.159269,1223.1929,609.771199,734.052408,758.698628,1177.663588,1023.312506


In [13]:
fdf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
hdf = health.loc[df.index]
hdf['sector'] = df['sector']
hdf['multiplier'] = df['multiplier']
hdf['treat'] = services.loc[hdf.index, 'is_benefit_healthscheme'] == 1
hdf.head()

item_code,410,411,412,413,414,420,421,422,423,424,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
22300313,1250.0,375.0,2083.333333,208.333333,,66.666667,,16.666667,,,urban,28599,False
22301313,12.5,,,,,116.666667,,20.833333,,,urban,16099,False
22302306,166.666667,333.333333,125.0,,41.666667,100.0,,25.0,,,urban,14732,False
22302310,,1375.0,666.666667,666.666667,83.333333,83.333333,,25.0,,,urban,14732,False
22302311,50.0,66.666667,,,,,,,,,urban,14732,False


In [15]:
cols = hdf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
hdf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,item_code,410,411,412,413,414,420,421,422,423,424
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rural,False,550.59507,272.500138,270.54281,365.772587,116.049852,83.967395,9.777869,13.307619,0.507103,2.990831
rural,True,539.057831,323.730289,284.178249,343.658285,163.542515,96.405434,12.613428,12.914444,0.523185,3.59833
urban,False,605.173245,346.196034,372.691747,485.417372,134.180968,98.656766,11.444102,17.109675,1.229554,3.107267
urban,True,593.530861,433.179944,371.539407,481.873093,223.038436,114.031318,16.660682,18.821495,0.34846,2.682259


In [16]:
hdf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,410,411,412,413,414,420,421,422,423,424
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,1.67535e-74,0.0,0.0,0.0,0.0,0.0,0.0,0.0


---

### Nobody hospitalized, PMJAY benefecirary vs not

In [17]:
df = pd.read_parquet('data/hh.parquet')
services = pd.read_parquet('data/services.parquet')
food = pd.read_parquet('data/food.parquet')
health = pd.read_parquet('data/health.parquet')
edu = pd.read_parquet('data/education.parquet')

services = services[services['is_hospitalization'] == 4]

ix = df.index.intersection(services.index)
df, services = df.loc[ix], services.loc[ix]

df['treat'] = services['is_hhmem_pmjay'] == 1

ix = df.index
for i in [food, health, edu]:
    ix = i.index.intersection(ix)
df = df.loc[ix]

df['FOOD'] = food.loc[df.index].sum(axis=1)
df['EDUCATION'] = edu.loc[df.index].sum(axis=1)
df['HEALTH'] = health.loc[df.index].sum(axis=1)

In [18]:
d_ben = df.groupby(['sector', 'treat']).apply(lambda x: (x[['FOOD', 'EDUCATION', 'HEALTH']] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())
d_ben

Unnamed: 0_level_0,Unnamed: 1_level_0,FOOD,EDUCATION,HEALTH
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
rural,False,9753.667989,873.851048,70.809653
rural,True,9264.862887,761.465109,67.865275
urban,False,11800.429563,1218.747178,84.993028
urban,True,10533.165765,1173.27021,84.439953


In [19]:
df.groupby('sector').apply(lambda x: ttest(x, ['FOOD', 'EDUCATION', 'HEALTH']))

Unnamed: 0_level_0,FOOD,EDUCATION,HEALTH
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
rural,0.0,0.0,0.0
urban,0.0,0.0,0.0


In [20]:
edf = edu.loc[df.index]
edf['sector'] = df['sector']
edf['multiplier'] = df['multiplier']
edf['treat'] = df['treat']
edf.head()

Unnamed: 0_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
22300302,375.0,,66.666667,,,108.333333,urban,28599,False
22300305,,,150.0,25.0,,,urban,28599,False
22300312,208.333333,,41.666667,,,66.666667,urban,28599,False
22301301,,,41.666667,183.333333,,,urban,16099,False
22301303,166.666667,,83.333333,1000.0,,,urban,16099,False


In [21]:
cols = edf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
edf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
rural,False,161.684166,5.792447,99.286453,441.416451,143.596286,22.075245
rural,True,122.036762,4.313266,95.325019,338.52502,182.559959,18.705082
urban,False,249.198132,8.850112,124.847954,583.060083,203.74263,49.048267
urban,True,194.036941,6.149658,112.214172,557.941883,266.31177,36.615785


In [22]:
edf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,books_1st,books_2nd,stationery,fees,coaching,edu_other
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
fdf = food.loc[df.index]
fdf['sector'] = df['sector']
fdf['multiplier'] = df['multiplier']
fdf['treat'] = df['treat']
fdf.head()

Unnamed: 0_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
22300302,2100.0,921.0,156.0,2643.333333,1486.333333,2816.666667,2513.333333,975.0,1200.333333,1573.0,1473.333333,1213.333333,urban,28599,False
22300305,2335.0,1075.0,320.0,3640.0,2747.333333,1885.0,4983.333333,3033.333333,2357.333333,901.333333,1646.666667,2296.666667,urban,28599,False
22300312,2235.0,970.0,172.0,2556.666667,2201.333333,1841.666667,5070.0,1343.333333,1178.666667,1529.666667,1126.666667,1170.0,urban,28599,False
22301301,1515.0,458.0,122.0,888.333333,1105.0,1040.0,1863.333333,693.333333,641.333333,641.333333,173.333333,780.0,urban,16099,False
22301303,994.0,345.0,108.0,1568.666667,780.0,437.666667,632.666667,541.666667,1070.333333,121.333333,216.666667,238.333333,urban,16099,False


In [24]:
cols = fdf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
fdf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
rural,False,1075.525293,435.443303,190.465197,1927.246957,1236.873464,572.336139,946.278105,596.519451,662.196648,499.275997,912.438756,699.068679
rural,True,1022.485544,385.103925,171.528108,1566.376327,1218.200427,539.098481,1089.418374,552.395155,645.186339,471.188868,970.935247,632.94609
urban,False,1158.618606,424.620487,175.22742,2392.986783,1212.096967,816.706102,972.003455,552.513191,667.4839,822.929673,1557.931251,1047.311729
urban,True,1115.08441,386.748284,163.491711,1902.527097,1227.221359,697.889332,1219.840091,535.839845,660.01912,640.214844,1180.889051,803.400619


In [25]:
fdf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

Unnamed: 0_level_0,cereals,pulses,sugar_salt,dairy,veg,fruits,meat,oil,spice,bev,cooked,processed
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
rural,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
urban,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
hdf = health.loc[df.index]
hdf['sector'] = df['sector']
hdf['multiplier'] = df['multiplier']
hdf['treat'] = df['treat']
hdf.head()

item_code,410,411,412,413,414,420,421,422,423,424,sector,multiplier,treat
hhid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
22300302,,,,,,100.0,,33.333333,,,urban,28599,False
22300305,,,,,,208.333333,,33.333333,,,urban,28599,False
22300312,,,,,,108.333333,,33.333333,,,urban,28599,False
22301301,,,,,,54.166667,,16.666667,,,urban,16099,False
22301303,,,,,,29.166667,,8.333333,,,urban,16099,False


In [27]:
cols = hdf.drop(['treat', 'sector', 'multiplier'], axis=1).columns
hdf.groupby(['sector', 'treat']).apply(lambda x: (x[cols] * x['multiplier'].values.reshape(-1, 1)).sum(axis=0) / x['multiplier'].sum())

Unnamed: 0_level_0,item_code,410,411,412,413,414,420,421,422,423,424
sector,treat,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
rural,False,0.0,0.0,0.0,0.0,0.0,53.507225,5.799735,9.121388,0.360572,2.020733
rural,True,0.0,0.0,0.0,0.0,0.0,51.797205,6.247396,8.146979,0.396042,1.277653
urban,False,0.0,0.0,0.0,0.0,0.0,61.716947,6.659787,13.672841,0.456075,2.487378
urban,True,0.0,0.0,0.0,0.0,0.0,63.251844,8.318756,11.110323,0.318311,1.440718


In [28]:
hdf.fillna(0).groupby('sector').apply(lambda x: ttest(x, cols))

  tstat = (value1 - value2 - diff) / std_diff


Unnamed: 0_level_0,410,411,412,413,414,420,421,422,423,424
sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
rural,,,,,,0.0,0.0,0.0,0.0,0.0
urban,,,,,,0.0,0.0,0.0,0.0,0.0
