Housekeeping

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.optimize import fsolve
import itertools

from f_detrend import detrend_fuction

sn.set_style('whitegrid')
%matplotlib inline
%load_ext autoreload
%autoreload 2

Auxiliary functions

In [364]:
# Expanded and modified version (first column now string)
def read_table(filename,keyword='ciclo',rescale=1,ncols=5,start=2,end_row='Total',
               endword=None,omit_list=['t20_21'],index_to_int=True,debug=False):
    t0 = start
    t1 = ncols+start
    results_follow = 0
    row_names = []
    interaction = 0
    f = open(filename,'r')
    for line in f:
        words = line.split()
        if debug:
            print(words)
        if len(words)>1 and words[0] == keyword:
            results_follow=1
            if endword==None:
                colnames = words[t0:t1]
            else:
                buff_v = []
                for w in words[t0:]:
                    if w==endword:
                        break
                    else:
                        buff_v.append(w)
                colnames = buff_v[:-1]
                t1 = len(buff_v)+t0-1
            table_raw = np.empty(t1-t0)
        elif len(words)>1 and words[0] == end_row:
            break
        elif len(words) == 1:
            interaction = 0
            i_count = 0
        elif len(words)>1 and results_follow==1: # and words[0] not in omit_list
            if len(words)==2 and words[1]=="|":
                var_name = words[0]
                interaction = 1
                i_count = 0
            else:
                if interaction==0:
                    row_names.append(words[0])
                else:
                    if words[1]=='|':
                        if var_name+"_"+words[0] in row_names:
                            new_name = var_name+"_"+str(int(words[0])+1)
                        else:
                            new_name = var_name+"_"+words[0]
                        row_names.append(new_name)  
                    else:
                        if var_name+"_"+words[1] in row_names:
                            new_name = var_name+"_"+str(int(words[1])+1)
                        else:
                            new_name = var_name+"_"+words[1]
                        row_names.append(new_name)  
                
                if words[2]=='|':
                    t00= t0+1
                    t11 = t1+1
                else:
                    t00 = t0
                    t11 = t1

                buff_v = []
                for w in words[t00:t11]: 
                    w = w.replace(",","")
                    buff_v.append(w)
                table_raw = np.vstack((table_raw,np.array(buff_v,dtype=float)/rescale))
    table_raw = table_raw[1:,:]
    if debug:
        print(table_raw.shape)
        print(colnames)
    if index_to_int:
        return pd.DataFrame(table_raw, columns=colnames,index=np.array(row_names,dtype=int))
    else:
        return pd.DataFrame(table_raw, columns=colnames,index=np.array(row_names))
    
    
def read_descriptive_stats(filename,keyword_data,keyword_col,ncols,rownames,start=2,debug=False):
    t0 = start
    t1 = ncols+start
    table_raw = np.empty(t1-t0)
    f = open(filename,'r')
    for line in f:
        words = line.split()
        if debug:
            print(words)
        if len(words)>1 and words[0] == keyword_data:
            buff_v = []
            for w in words[t0:t1]: 
                w = w.replace(",","")
                buff_v.append(w)
            table_raw = np.vstack((table_raw,np.array(buff_v,dtype=float)))
        elif len(words)>1 and words[0] == keyword_col:
            colnames = words[t0:t1]

    return pd.DataFrame(table_raw[1:,:], columns=colnames, index=rownames)
 
    
def file_split(filename,path,startwords=['Summary',],endword="Total",header=None,Noobs=False):
    f = open(path+filename,'r')
    parent_file = filename[:filename.find('.')]
    di = 1
    R2s = []
    writing = 0
    for line in f:
        words = line.split()
        if len(words)>0 and 'Number' in words:
            noobs = words[-1]
        if len(words)>0 and 'R2' in words:
            R2s.append(words[-1])
        if len(words)>0 and words[0] in startwords:
            sf = open(path+parent_file+'_{}.log'.format(di), "w")
            writing = 1
            if header!=None:
                sf.write(header)
        elif len(words)>0 and words[0]==endword and writing:
            sf.write(line)
            writing = 0
            sf.close()
            di+=1
        if writing:
            sf.write(line)
    if Noobs==True:
        return (noobs,R2s)
            
def normalise_table(table):
    cols = table.columns
    table['Total'] = table.sum(axis=1).copy()
    for col in cols:
        table[col] = table[col]/table['Total']
    return table   

In [3]:
dates = []
for year in range(1987,2022):
    for quarter in range(1,5):
        dates.append(str(year)+"Q"+str(quarter))
dates = dates[1:]

t0 = 59
t05 = 71
T = len(dates)
date_dict = dict(zip(range(t0,T+t0),dates))
date_dict_reverse = dict(zip(dates,range(0,T)))
date_dict_reverse_ciclo = dict(zip(dates,range(t0,T+t0)))

time_periods_thin = []
for y in range(2006,2022):
    time_periods_thin.append(str(y))
    
time_periods = ["t08_11", "t11_14", "t14_17", "t17_20", "t20","t21"]
period_labels = ['2008-2011','2011-2014','2014-2017','2017-2020','2020','2021']
    
def rename_interaction(tab,int_subs):
    tab_chunk_1 = tab.iloc[0:7].copy()
    tab_chunk_2 = tab.iloc[7:].copy()
    tab_chunk_1.rename(index=int_subs,inplace=True)
    tab = pd.concat((tab_chunk_1,tab_chunk_2))
    return tab

In [401]:
path = './results/'
omits = ['sexo1#c.ttrend','sexo1#c.ttrend2',"covid#sexo1"]
# int_subs ={}
# for t in time_periods:
#     int_subs[t] = 'ten_'+t[1:]
name = "sqtreg_3035_hemp_time_int_diff_simple"
N, r2s = file_split('{}.log'.format(name),path,startwords=['q25','q50','q75'],endword="_cons",
           header="ten_y | Coefficient std t p_stat lower_95 upper_95",Noobs=True)

tab_moms_25 = read_table(path+'{}_1.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
tab_moms_50 = read_table(path+'{}_2.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
tab_moms_75 = read_table(path+'{}_3.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)

name = "sqtreg_3035_hemp_time_int_diff_simple_0k"
N_0k, r2s_0k = file_split('{}.log'.format(name),path,startwords=['q25','q50','q75'],endword="_cons",
           header="ten_y | Coefficient std t p_stat lower_95 upper_95",Noobs=True)

tab_moms_25_0k = read_table(path+'{}_1.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
tab_moms_50_0k = read_table(path+'{}_2.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
tab_moms_75_0k = read_table(path+'{}_3.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)


In [402]:
old_names_var = ['other_ten_y', 'ttrend', 'ttrend2', 'sexo1#c.ttrend_1','sexo1#c.ttrend2_1', 
                 'covid_1', 'covid_2', '1.sexo1', 'covid#sexo1_1','covid#sexo1_2', '_cons']
new_names_var = [r'partner tenure (years)',r'$t$',r'$t^{2}$',r'$t$ x female',r'$t^2$ x female',
                     r'$\delta_{2020}$','$\delta_{2021}$','female',r'$\delta_{2020}$ x female',
                     r'$\delta_{2021}$ x female',r'$\beta_0$']
reorder_cols = [r'$\beta_0$','female',r'partner tenure (years)',r'$t$',r'$t^{2}$',r'$t$ x female',r'$t^2$ x female',
                     r'$\delta_{2020}$','$\delta_{2021}$',r'$\delta_{2020}$ x female',
                     r'$\delta_{2021}$ x female']

def rename_results(df,old_names=old_names_var,new_names=new_names_var,order=reorder_cols):
    
    df.rename(columns=dict(zip(old_names,new_names)),inplace='True')
    df = df.reindex(columns=order)
    return df

def pass_regression_to_latex_column(df):

    coefs = df["Coefficient"].copy()
    names = coefs.index.copy()
    N = names.size

    coefs = coefs.apply(lambda x: "{:,.3f}".format(float(x)))
    coefs = coefs.rename(index=(dict(zip(names,np.arange(0,N*2,2)))))

    stds = df['std'].copy()
    stds = stds.apply(lambda x: "({:,.4f})".format(float(x)))
    stds = stds.rename(index=(dict(zip(names,np.arange(1,N*2,2)))))

    final_table = pd.concat([coefs, stds]).sort_index(kind='merge')
    final_table = final_table.rename(index=dict(zip(np.arange(0,N*2,2),names)))
    final_table = final_table.rename(index=dict(zip(np.arange(1,N*2,2),[""]*N)))
    
    return final_table

In [404]:
res_names = ['Parents, 25th pctl','No children, 25th pctl','Parents, 50th pctl',
            'No children, 50th pctl','Parents, 75th pctl','No Children, 75th pctl']

all_results = [tab_moms_25,tab_moms_25_0k,tab_moms_50,
               tab_moms_50_0k,tab_moms_75,tab_moms_75_0k]


for i in range(len(all_results)):
    all_results[i] = rename_results(all_results[i].T)
table = pd.DataFrame(pass_regression_to_latex_column(all_results[0].T),columns=[res_names[0],])
for ir,res in enumerate(all_results[1:]):
    col = pass_regression_to_latex_column(res.T)
    table[res_names[ir+1]] = col.values
    
    
for i,reg in enumerate(res_names):
    for var in reorder_cols:
        if all_results[i].loc['p_stat'][var] < 0.1 and all_results[i].loc['p_stat'][var] > 0.05:
            table[reg].loc[var] = r"${}^*$".format(table[reg].loc[var])
        elif all_results[i].loc['p_stat'][var] < 0.05 and all_results[i].loc['p_stat'][var] > 0.01:
            table[reg].loc[var] = r"${}^{{**}}$".format(table[reg].loc[var])
        elif all_results[i].loc['p_stat'][var] < 0.01:
            table[reg].loc[var] = r"${}^{{***}}$".format(table[reg].loc[var])
        else:
            table[reg].loc[var] = r"${}$".format(table[reg].loc[var])
            
N_row = pd.DataFrame(np.tile([N, N_0k],3), index=res_names, columns=['N',]).T
R2_row = pd.DataFrame(np.vstack((r2s,r2s_0k)).T.ravel(), index=res_names, columns=[r'Pseudo $R^2$',]).T
table = pd.concat([table,N_row,R2_row])

# print(table.loc[[r'$\delta_{2020}$','$\delta_{2021}$',r'$\delta_{2020}$ x female',
#                      r'$\delta_{2021}$ x female']].to_latex(escape=False))
print(table.to_latex(escape=False))

\begin{tabular}{lllllll}
\toprule
{} & Parents, 25th pctl & No children, 25th pctl & Parents, 50th pctl & No children, 50th pctl & Parents, 75th pctl & No Children, 75th pctl \\
\midrule
$\beta_0$                &      $0.858^{***}$ &          $1.221^{***}$ &      $3.323^{***}$ &          $3.571^{***}$ &      $7.722^{***}$ &          $7.029^{***}$ \\
                         &           (0.0578) &               (0.0550) &           (0.0852) &               (0.0516) &           (0.1003) &               (0.1210) \\
female                   &     $-0.829^{***}$ &         $-0.528^{***}$ &     $-1.379^{***}$ &         $-0.982^{***}$ &     $-1.298^{***}$ &         $-1.381^{***}$ \\
                         &           (0.0870) &               (0.0577) &           (0.1384) &               (0.0997) &           (0.1757) &               (0.1369) \\
partner tenure (years)   &      $0.103^{***}$ &          $0.070^{***}$ &      $0.209^{***}$ &          $0.144^{***}$ &      $0.170^{***}$ &          

In [376]:
table.head()

Unnamed: 0,"Parents, 25th pctl","No children, 25th pctl","Parents, 50th pctl","No children, 50th pctl","Parents, 75th pctl","No Children, 75th pctl"
$\beta_0$,$0.858^{***}$,$1.221^{***}$,$3.323^{***}$,$3.571^{***}$,$7.722^{***}$,$7.029^{***}$
,(0.0578),(0.0550),(0.0852),(0.0516),(0.1003),(0.1210)
female,$-0.829^{***}$,$-0.528^{***}$,$-1.379^{***}$,$-0.982^{***}$,$-1.298^{***}$,$-1.381^{***}$
,(0.0870),(0.0577),(0.1384),(0.0997),(0.1757),(0.1369)
partner tenure (years),$0.103^{***}$,$0.070^{***}$,$0.209^{***}$,$0.144^{***}$,$0.170^{***}$,$0.161^{***}$


In [397]:
path = './results/'
omits = ['sexo1#c.ttrend','sexo1#c.ttrend2',"covid#sexo1"]
# int_subs ={}
# for t in time_periods:
#     int_subs[t] = 'ten_'+t[1:]
name = "sqtreg_3035_hemp_time_int_diff"
N, r2s = file_split('{}.log'.format(name),path,startwords=['q25','q50','q75'],endword="_cons",
           header="ten_y | Coefficient std t p_stat lower_95 upper_95",Noobs=True)

tab_moms_25 = read_table(path+'{}_1.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
tab_moms_50 = read_table(path+'{}_2.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
tab_moms_75 = read_table(path+'{}_3.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)

name = "sqtreg_3035_hemp_time_int_diff_0k"
N_0k, r2s_0k = file_split('{}.log'.format(name),path,startwords=['q25','q50','q75'],endword="_cons",
           header="ten_y | Coefficient std t p_stat lower_95 upper_95",Noobs=True)

tab_moms_25_0k = read_table(path+'{}_1.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
tab_moms_50_0k = read_table(path+'{}_2.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
tab_moms_75_0k = read_table(path+'{}_3.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)

In [398]:
old_names_var = ['other_ten_y', 'ttrend', 'ttrend2', 'sexo1#c.ttrend_1',
       'sexo1#c.ttrend2_1', 'covid_1', 'covid_2', '1.sexo1', 'covid#sexo1_1',
       'covid#sexo1_2', 'part_time#sexo1_0', 'part_time#sexo1_1',
       'college#sexo1_0', 'college#sexo1_1', 'less_hs#sexo1_0',
       'less_hs#sexo1_1', 'other_se#sexo1_0', 'other_se#sexo1_1',
       'other_college#sexo1_0', 'other_college#sexo1_1', 
        'other_less_hs#sexo1_0', 'other_less_hs#sexo1_1','_cons']
new_names_var = [r'partner tenure (years)',r'$t$',r'$t^{2}$',r'$t$ x female',r'$t^2$ x female',
                     r'$\delta_{2020}$','$\delta_{2021}$','female',r'$\delta_{2020}$ x female',
                     r'$\delta_{2021}$ x female','part-time','part-time x female',
       'college', 'college x female', 'less than HS',
       'less_than HS x female', 'partner is SE', 'partner is SE x female',
       'partner college', 'partner college x female',
       'partner less than HS', 'partner less than HS x female',r'$\beta_0$']
reorder_cols = [r'$\beta_0$','female',r'partner tenure (years)',r'$t$',r'$t^{2}$',r'$t$ x female',r'$t^2$ x female',
                r'$\delta_{2020}$',r'$\delta_{2021}$',r'$\delta_{2020}$ x female',r'$\delta_{2021}$ x female',
                'part-time','part-time x female','college', 'college x female', 'less than HS',
                'less_than HS x female', 'partner is SE', 'partner is SE x female','partner college', 'partner college x female',
       'partner less than HS', 'partner less than HS x female']

def rename_results(df,old_names=old_names_var,new_names=new_names_var,order=reorder_cols):
    
    df.rename(columns=dict(zip(old_names,new_names)),inplace='True')
    df = df.reindex(columns=order)
    return df

table = rename_results(tab_moms_25.T)

In [400]:
res_names = ['Parents, 25th pctl','No children, 25th pctl','Parents, 50th pctl',
            'No children, 50th pctl','Parents, 75th pctl','No Children, 75th pctl']

all_results = [tab_moms_25,tab_moms_25_0k,tab_moms_50,
               tab_moms_50_0k,tab_moms_75,tab_moms_75_0k]


for i in range(len(all_results)):
    all_results[i] = rename_results(all_results[i].T)
table = pd.DataFrame(pass_regression_to_latex_column(all_results[0].T),columns=[res_names[0],])
for ir,res in enumerate(all_results[1:]):
    col = pass_regression_to_latex_column(res.T)
    table[res_names[ir+1]] = col.values
    
    
for i,reg in enumerate(res_names):
    for var in reorder_cols:
        if all_results[i].loc['p_stat'][var] <= 0.1 and all_results[i].loc['p_stat'][var] > 0.05:
            table[reg].loc[var] = r"${}^*$".format(table[reg].loc[var])
        elif all_results[i].loc['p_stat'][var] <= 0.05 and all_results[i].loc['p_stat'][var] > 0.01:
            table[reg].loc[var] = r"${}^{{**}}$".format(table[reg].loc[var])
        elif all_results[i].loc['p_stat'][var] <= 0.01:
            table[reg].loc[var] = r"${}^{{***}}$".format(table[reg].loc[var])
        else:
            table[reg].loc[var] = r"${}$".format(table[reg].loc[var])
            
N_row = pd.DataFrame(np.tile([N, N_0k],3), index=res_names, columns=['N',]).T
R2_row = pd.DataFrame(np.vstack((r2s,r2s_0k)).T.ravel(), index=res_names, columns=[r'Pseudo $R^2$',]).T
table = pd.concat([table,N_row,R2_row])

print(table.to_latex(escape=False))

\begin{tabular}{lllllll}
\toprule
{} & Parents, 25th pctl & No children, 25th pctl & Parents, 50th pctl & No children, 50th pctl & Parents, 75th pctl & No Children, 75th pctl \\
\midrule
$\beta_0$                     &      $0.744^{***}$ &          $0.961^{***}$ &      $3.116^{***}$ &          $3.540^{***}$ &      $8.427^{***}$ &          $7.939^{***}$ \\
                              &           (0.0845) &               (0.0521) &           (0.1081) &               (0.0939) &           (0.1157) &               (0.1480) \\
female                        &     $-0.630^{***}$ &               $-0.128$ &     $-0.974^{***}$ &         $-0.803^{***}$ &     $-1.597^{***}$ &         $-1.073^{***}$ \\
                              &           (0.0921) &               (0.0804) &           (0.1280) &               (0.1079) &           (0.1574) &               (0.1736) \\
partner tenure (years)        &      $0.080^{***}$ &          $0.061^{***}$ &      $0.180^{***}$ &          $0.136^{***}$ &      

In [405]:
path = './results/'
omits = ['sexo1#c.ttrend','sexo1#c.ttrend2',"covid#sexo1"]
tenure_bins = np.arange(1,4)
tables_cond_ten = {}
tables_cond_ten_0k = {}
Ns, r2s, Ns_0k, r2s_0k = {}, {}, {}, {}
# for t in time_periods:
#     int_subs[t] = 'ten_'+t[1:]
for t in tenure_bins:
    name = "sqtreg_3035_hemp_time_int_diff_t{}".format(t)
    Ns[t], r2s[t] = file_split('{}.log'.format(name),path,startwords=['q25','q50','q75'],endword="_cons",
               header="ten_y | Coefficient std t p_stat lower_95 upper_95",Noobs=True)
    dict_i = {}
    for i,q in enumerate([25,50,75]):
        tab_i = read_table(path+'{}_{}.log'.format(name,i+1),
                             keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)   
        dict_i[q] = tab_i
    tables_cond_ten[t] = dict_i.copy()
    
    dict_i = {}
    name = "sqtreg_3035_hemp_time_int_diff_t{}_0k".format(t)
    Ns_0k[t], r2s_0k[t] = file_split('{}.log'.format(name),path,startwords=['q25','q50','q75'],endword="_cons",
               header="ten_y | Coefficient std t p_stat lower_95 upper_95",Noobs=True)
    for i,q in enumerate([25,50,75]):
        tab_i_0k = read_table(path+'{}_{}.log'.format(name,i+1),
                             keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)    
        dict_i[q] = tab_i_0k
    tables_cond_ten_0k[t] = dict_i.copy()

In [406]:
old_names_var = ['ttrend', 'ttrend2', 'sexo1#c.ttrend_1',
       'sexo1#c.ttrend2_1', 'covid_1', 'covid_2', '1.sexo1', 'covid#sexo1_1',
       'covid#sexo1_2', 'part_time#sexo1_0', 'part_time#sexo1_1',
       'college#sexo1_0', 'college#sexo1_1', 'less_hs#sexo1_0',
       'less_hs#sexo1_1', 'other_se#sexo1_0', 'other_se#sexo1_1',
       'other_college#sexo1_0', 'other_college#sexo1_1', 
        'other_less_hs#sexo1_0', 'other_less_hs#sexo1_1','_cons']
new_names_var = [r'$t$',r'$t^{2}$',r'$t$ x female',r'$t^2$ x female',
                     r'$\delta_{2020}$','$\delta_{2021}$','female',r'$\delta_{2020}$ x female',
                     r'$\delta_{2021}$ x female','part-time','part-time x female',
       'college', 'college x female', 'less than HS',
       'less_than HS x female', 'partner is SE', 'partner is SE x female',
       'partner college', 'partner college x female',
       'partner less than HS', 'partner less than HS x female',r'$\beta_0$']
reorder_cols = [r'$\beta_0$','female',r'$t$',r'$t^{2}$',r'$t$ x female',r'$t^2$ x female',
                r'$\delta_{2020}$',r'$\delta_{2021}$',r'$\delta_{2020}$ x female',r'$\delta_{2021}$ x female',
                'part-time','part-time x female','college', 'college x female', 'less than HS',
                'less_than HS x female', 'partner is SE', 'partner is SE x female','partner college', 'partner college x female',
       'partner less than HS', 'partner less than HS x female']

def rename_results(df,old_names=old_names_var,new_names=new_names_var,order=reorder_cols):    
    df.rename(columns=dict(zip(old_names,new_names)),inplace='True')
    df = df.reindex(columns=order)
    return df


In [411]:
res_names = ['Parents, 25th pctl','No children, 25th pctl','Parents, 50th pctl',
            'No children, 50th pctl','Parents, 75th pctl','No Children, 75th pctl']
for ten_level in range(1,4):
    all_results = []
    for i,q in enumerate([25,50,75]):
        all_results.append(tables_cond_ten[ten_level][q])
        all_results.append(tables_cond_ten_0k[ten_level][q])


    for i in range(len(all_results)):
        all_results[i] = rename_results(all_results[i].T)
    table = pd.DataFrame(pass_regression_to_latex_column(all_results[0].T),columns=[res_names[0],])
    for ir,res in enumerate(all_results[1:]):
        col = pass_regression_to_latex_column(res.T)
        table[res_names[ir+1]] = col.values


    for i,reg in enumerate(res_names):
        for var in reorder_cols:
            if all_results[i].loc['p_stat'][var] <= 0.1 and all_results[i].loc['p_stat'][var] > 0.05:
                table[reg].loc[var] = r"${}^*$".format(table[reg].loc[var])
            elif all_results[i].loc['p_stat'][var] <= 0.05 and all_results[i].loc['p_stat'][var] > 0.01:
                table[reg].loc[var] = r"${}^{{**}}$".format(table[reg].loc[var])
            elif all_results[i].loc['p_stat'][var] <= 0.01:
                table[reg].loc[var] = r"${}^{{***}}$".format(table[reg].loc[var])
            else:
                table[reg].loc[var] = r"${}$".format(table[reg].loc[var])
                
    N_row = pd.DataFrame(np.tile([Ns[ten_level], Ns_0k[ten_level]],3), index=res_names, columns=['N',]).T
    R2_row = pd.DataFrame(np.vstack((r2s[ten_level],r2s_0k[ten_level])).T.ravel(), index=res_names, columns=[r'Pseudo $R^2$',]).T
    table = pd.concat([table,N_row,R2_row])

    # print(table.to_latex(escape=False))
    print(table.loc[[r'$\delta_{2020}$','$\delta_{2021}$',r'$\delta_{2020}$ x female',
                         r'$\delta_{2021}$ x female','N',r'Pseudo $R^2$']].to_latex(escape=False))

\begin{tabular}{lllllll}
\toprule
{} & Parents, 25th pctl & No children, 25th pctl & Parents, 50th pctl & No children, 50th pctl & Parents, 75th pctl & No Children, 75th pctl \\
\midrule
$\delta_{2020}$          &            $0.111$ &                $0.065$ &           $-0.174$ &               $-0.415$ &            $0.036$ &         $-1.850^{***}$ \\
$\delta_{2021}$          &            $0.046$ &                $0.078$ &            $0.536$ &               $-0.281$ &            $0.359$ &               $-1.303$ \\
$\delta_{2020}$ x female &           $-0.044$ &               $-0.042$ &           $-0.185$ &               $-0.069$ &           $-1.382$ &               $-0.181$ \\
$\delta_{2021}$ x female &            $0.347$ &                $0.174$ &            $0.277$ &                $0.668$ &            $0.729$ &                $0.311$ \\
N                        &             24,058 &                 17,955 &             24,058 &                 17,955 &             24,058 &          

In [412]:
path = './results/'
omits = ['sexo1#c.ttrend','sexo1#c.ttrend2',"covid#sexo1"]
tenure_bins = np.arange(1,4)
tables_cond_ten = {}
tables_cond_ten_0k = {}
Ns, r2s, Ns_0k, r2s_0k = {}, {}, {}, {}

for t in tenure_bins:
    name = "sqtreg_3035_hemp_time_int_diff_simple_t{}".format(t)
    Ns[t], r2s[t] = file_split('{}.log'.format(name),path,startwords=['q25','q50','q75'],endword="_cons",
               header="ten_y | Coefficient std t p_stat lower_95 upper_95",Noobs=True)
    dict_i = {}
    for i,q in enumerate([25,50,75]):
        tab_i = read_table(path+'{}_{}.log'.format(name,i+1),
                             keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)   
        dict_i[q] = tab_i
    tables_cond_ten[t] = dict_i.copy()
    
    dict_i_0k = {}
    name = "sqtreg_3035_hemp_time_int_diff_simple_t{}_0k".format(t)
    Ns_0k[t], r2s_0k[t] = file_split('{}.log'.format(name),path,startwords=['q25','q50','q75'],endword="_cons",
               header="ten_y | Coefficient std t p_stat lower_95 upper_95",Noobs=True)
    for i,q in enumerate([25,50,75]):
        tab_i_0k = read_table(path+'{}_{}.log'.format(name,i+1),
                             keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)    
        dict_i_0k[q] = tab_i_0k
    tables_cond_ten_0k[t] = dict_i_0k.copy()

In [413]:
old_names_var = ['ttrend', 'ttrend2', 'sexo1#c.ttrend_1',
       'sexo1#c.ttrend2_1', 'covid_1', 'covid_2', '1.sexo1', 'covid#sexo1_1',
       'covid#sexo1_2', 'part_time#sexo1_0', 'part_time#sexo1_1',
       'college#sexo1_0', 'college#sexo1_1', 'less_hs#sexo1_0',
       'less_hs#sexo1_1', 'other_se#sexo1_0', 'other_se#sexo1_1',
       'other_college#sexo1_0', 'other_college#sexo1_1', 
        'other_less_hs#sexo1_0', 'other_less_hs#sexo1_1','_cons']
new_names_var = [r'$t$',r'$t^{2}$',r'$t$ x female',r'$t^2$ x female',
                     r'$\delta_{2020}$','$\delta_{2021}$','female',r'$\delta_{2020}$ x female',
                     r'$\delta_{2021}$ x female','part-time','part-time x female',
       'college', 'college x female', 'less than HS',
       'less_than HS x female', 'partner is SE', 'partner is SE x female',
       'partner college', 'partner college x female',
       'partner less than HS', 'partner less than HS x female',r'$\beta_0$']
reorder_cols = [r'$\beta_0$','female',r'$t$',r'$t^{2}$',r'$t$ x female',r'$t^2$ x female',
                r'$\delta_{2020}$',r'$\delta_{2021}$',r'$\delta_{2020}$ x female',r'$\delta_{2021}$ x female',
                'part-time','part-time x female','college', 'college x female', 'less than HS',
                'less_than HS x female', 'partner is SE', 'partner is SE x female','partner college', 'partner college x female',
       'partner less than HS', 'partner less than HS x female']

def rename_results(df,old_names=old_names_var,new_names=new_names_var,order=reorder_cols):    
    df.rename(columns=dict(zip(old_names,new_names)),inplace='True')
    df = df.reindex(columns=order)
    return df


In [415]:
res_names = ['Parents, 25th pctl','No children, 25th pctl','Parents, 50th pctl',
            'No children, 50th pctl','Parents, 75th pctl','No Children, 75th pctl']
for ten_level in range(1,4):
    all_results = []
    for i,q in enumerate([25,50,75]):
        all_results.append(tables_cond_ten[ten_level][q])
        all_results.append(tables_cond_ten_0k[ten_level][q])


    for i in range(len(all_results)):
        all_results[i] = rename_results(all_results[i].T)
    table = pd.DataFrame(pass_regression_to_latex_column(all_results[0].T),columns=[res_names[0],])
    for ir,res in enumerate(all_results[1:]):
        col = pass_regression_to_latex_column(res.T)
        table[res_names[ir+1]] = col.values


    for i,reg in enumerate(res_names):
        for var in reorder_cols:
            if all_results[i].loc['p_stat'][var] <= 0.1 and all_results[i].loc['p_stat'][var] > 0.05:
                table[reg].loc[var] = r"${}^*$".format(table[reg].loc[var])
            elif all_results[i].loc['p_stat'][var] <= 0.05 and all_results[i].loc['p_stat'][var] > 0.01:
                table[reg].loc[var] = r"${}^{{**}}$".format(table[reg].loc[var])
            elif all_results[i].loc['p_stat'][var] <= 0.01:
                table[reg].loc[var] = r"${}^{{***}}$".format(table[reg].loc[var])
            else:
                table[reg].loc[var] = r"${}$".format(table[reg].loc[var])
                
    N_row = pd.DataFrame(np.tile([Ns[ten_level], Ns_0k[ten_level]],3), index=res_names, columns=['N',]).T
    R2_row = pd.DataFrame(np.vstack((r2s[ten_level],r2s_0k[ten_level])).T.ravel(), index=res_names, columns=[r'Pseudo $R^2$',]).T
    table = pd.concat([table,N_row,R2_row])

    # print(table.to_latex(escape=False))
    print(table.loc[[r'$\delta_{2020}$','$\delta_{2021}$',r'$\delta_{2020}$ x female',
                         r'$\delta_{2021}$ x female','N',r'Pseudo $R^2$']].to_latex(escape=False))

\begin{tabular}{lllllll}
\toprule
{} & Parents, 25th pctl & No children, 25th pctl & Parents, 50th pctl & No children, 50th pctl & Parents, 75th pctl & No Children, 75th pctl \\
\midrule
$\delta_{2020}$          &          $0.224^*$ &                $0.087$ &            $0.240$ &               $-0.016$ &            $0.046$ &         $-2.362^{***}$ \\
$\delta_{2021}$          &            $0.189$ &               $-0.155$ &            $0.669$ &                $0.027$ &            $0.386$ &          $-1.697^{**}$ \\
$\delta_{2020}$ x female &            $0.009$ &               $-0.026$ &            $0.031$ &               $-0.415$ &      $-1.799^{**}$ &                $0.283$ \\
$\delta_{2021}$ x female &            $0.289$ &          $0.499^{***}$ &            $0.921$ &                $0.532$ &           $-0.064$ &                $0.923$ \\
N                        &             24,058 &                 17,955 &             24,058 &                 17,955 &             24,058 &          

In [342]:
table.loc[[r'$\delta_{2020}$','$\delta_{2021}$',r'$\delta_{2020}$ x female',
                         r'$\delta_{2021}$ x female']]

Unnamed: 0,"Parents, 25th pctl","No children, 25th pctl","Parents, 50th pctl","No children, 50th pctl","Parents, 75th pctl","No Children, 75th pctl"
$\delta_{2020}$,$0.935^{***}$,$0.935^{***}$,$0.338$,$0.338$,$-0.388^*$,$-0.388^*$
$\delta_{2021}$,$1.199^{***}$,$1.199^{***}$,$0.704^{**}$,$0.704^{**}$,$-0.532^{**}$,$-0.532^{**}$
$\delta_{2020}$ x female,$-0.642^{**}$,$-0.642^{**}$,$-1.381^{***}$,$-1.381^{***}$,$0.025$,$0.025$
$\delta_{2021}$ x female,$-0.823^{***}$,$-0.823^{***}$,$-2.490^{***}$,$-2.490^{***}$,$-0.969^{***}$,$-0.969^{***}$
