# Summary stats

This notebook creates the table, `sumstats_demo.tex`, which is the Appendix table with summary statistics by each of the 8 demographic groups. 

In [1]:
import pandas as pd 
import numpy as np 

In [2]:
demo = pd.read_excel('8x4/demo.xlsx').query('year < 2009') # we do not use 2009

In [3]:
wm = lambda x: np.average(x, weights=demo.loc[x.index, "count"])

demo_g = demo.groupby('tau').agg(wm).drop('count', axis=1)
demo_g.rename(columns={'age':'Age', 'bigcity1':'1(Urban)', 'nkids':'No. kids', 'real_inc':'Income', 'single':'1(Single)', 'wd':'Work distance'}, inplace=True)
demo_g.drop(['year'],axis=1,inplace=True)
demo_g.Income = demo_g.Income/1000
demo_g['N'] = demo.groupby('tau')['count'].sum()    
# change the order of the columns 
demo_g = demo_g.iloc[:, [6,3,4,5,0,1,2]]

In [4]:
#Add names for the household types 
tau_names = [f'{wd} WD, {c}, {y}' for wd in ['Low', 'High'] for c in ['Couple', 'Single'] for y in ['Poor', 'Rich']]
demo_g.rename(index={i+1:f'{i+1}: {tau_names[i]}' for i in range(8)}, inplace=True)
tab = demo_g.round(2).rename(columns={'tw':'Share'})
tab

Unnamed: 0_level_0,N,Income,1(Single),Work distance,Age,1(Urban),No. kids
tau,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"1: Low WD, Couple, Poor",6667634,311.7,0.0,0.0,55.04,0.22,0.48
"2: Low WD, Couple, Rich",6538448,777.44,0.0,0.0,46.38,0.21,1.03
"3: Low WD, Single, Poor",7969411,109.92,1.0,0.0,54.21,0.35,0.11
"4: Low WD, Single, Rich",7815269,301.23,1.0,0.0,48.22,0.33,0.2
"5: High WD, Couple, Poor",4147914,494.67,0.0,34.63,40.59,0.12,0.99
"6: High WD, Couple, Rich",3952150,862.57,0.0,42.14,43.57,0.12,1.21
"7: High WD, Single, Poor",1228816,215.04,1.0,26.71,33.85,0.25,0.22
"8: High WD, Single, Rich",1194766,413.36,1.0,32.98,41.15,0.22,0.24


In [5]:
tab.to_latex('sumstats_demo.tex')

  tab.to_latex('sumstats_demo.tex')
