Housekeeping

In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.optimize import fsolve
import itertools
from astropy.table import Table

from f_detrend import detrend_fuction
pd.set_option('display.max_rows', None)

sn.set_style('whitegrid')
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Auxiliary functions

In [5]:
def read_table(filename,keyword='ciclo',rescale=1,ncols=5,start=2,end_row='Total',
               endword=None,omit_list=['t20_21'],index_to_int=True,debug=False):
    t0 = start
    t1 = ncols+start
    results_follow = 0
    row_names = []
    interaction = 0
    f = open(filename,'r')
    for line in f:
        words = line.split()
        if debug:
            print(words)
        if len(words)>1 and words[0] == keyword:
            results_follow=1
            if endword==None:
                colnames = words[t0:t1]
            else:
                buff_v = []
                for w in words[t0:]:
                    if w==endword:
                        break
                    else:
                        buff_v.append(w)
                colnames = buff_v[:-1]
                t1 = len(buff_v)+t0-1
            table_raw = np.empty(t1-t0)
        elif len(words)>1 and words[0] == end_row:
            break
        elif len(words) == 1:
            interaction = 0
            i_count = 0
        elif len(words)>1 and results_follow==1: # and words[0] not in omit_list
            if (len(words)==2 and words[1]=="|") or (len(words)==3 and words[2]=="|"):
                var_name = words[0]
                interaction = 1
                i_count = 0
            else:
                if interaction==0:
                    row_names.append(words[0])
                else:
                    end_idx = words.index("|")
                    new_name = var_name+"_{}"*end_idx
                    new_name = new_name.format(*words[:end_idx])
                    row_names.append(new_name)                 
                if words[2]=='|':
                    t00= t0+1
                    t11 = t1+1
                elif words[3]=='|':
                    t00= t0+2
                    t11 = t1+2
                else:
                    t00 = t0
                    t11 = t1

                buff_v = []
                for w in words[t00:t11]: 
                    w = w.replace(",","")
                    buff_v.append(w)
                table_raw = np.vstack((table_raw,np.array(buff_v,dtype=float)/rescale))
        
    table_raw = table_raw[1:,:]
    if debug:
        print(table_raw.shape)
        print(colnames)
    if index_to_int:
        return pd.DataFrame(table_raw, columns=colnames,index=np.array(row_names,dtype=int))
    else:
        return pd.DataFrame(table_raw, columns=colnames,index=np.array(row_names))
    
    
def read_descriptive_stats(filename,keyword_data,keyword_col,ncols,rownames,start=2,debug=False):
    t0 = start
    t1 = ncols+start
    table_raw = np.empty(t1-t0)
    f = open(filename,'r')
    for line in f:
        words = line.split()
        if debug:
            print(words)
        if len(words)>1 and words[0] == keyword_data:
            buff_v = []
            for w in words[t0:t1]: 
                w = w.replace(",","")
                buff_v.append(w)
            table_raw = np.vstack((table_raw,np.array(buff_v,dtype=float)))
        elif len(words)>1 and words[0] == keyword_col:
            colnames = words[t0:t1]

    return pd.DataFrame(table_raw[1:,:], columns=colnames, index=rownames)
 
    
def file_split(filename,path,startwords=['Summary',],endword="Total",header=None,Noobs=False):
    f = open(path+filename,'r')
    parent_file = filename[:filename.find('.')]
    di = 1
    R2s = []
    writing = 0
    for line in f:
        words = line.split()
        if len(words)>0 and 'Number' in words:
            noobs = words[-1]
        if len(words)>0 and 'R2' in words:
            R2s.append(words[-1])
        if len(words)>0 and words[0] in startwords:
            sf = open(path+parent_file+'_{}.log'.format(di), "w")
            writing = 1
            if header!=None:
                sf.write(header)
        elif len(words)>0 and words[0]==endword and writing:
            sf.write(line)
            writing = 0
            sf.close()
            di+=1
        if writing:
            sf.write(line)
    if Noobs==True:
        return (noobs,R2s)
            
def normalise_table(table):
    cols = table.columns
    table['Total'] = table.sum(axis=1).copy()
    for col in cols:
        table[col] = table[col]/table['Total']
    return table   

In [3]:
dates = []
for year in range(1987,2022):
    for quarter in range(1,5):
        dates.append(str(year)+"Q"+str(quarter))
dates = dates[1:]

t0 = 59
t05 = 71
T = len(dates)
date_dict = dict(zip(range(t0,T+t0),dates))
date_dict_reverse = dict(zip(dates,range(0,T)))
date_dict_reverse_ciclo = dict(zip(dates,range(t0,T+t0)))

time_periods_thin = []
for y in range(2006,2022):
    time_periods_thin.append(str(y))
    
time_periods = ["t08_11", "t11_14", "t14_17", "t17_20", "t20","t21"]
period_labels = ['2008-2011','2011-2014','2014-2017','2017-2020','2020','2021']
    
def rename_interaction(tab,int_subs):
    tab_chunk_1 = tab.iloc[0:7].copy()
    tab_chunk_2 = tab.iloc[7:].copy()
    tab_chunk_1.rename(index=int_subs,inplace=True)
    tab = pd.concat((tab_chunk_1,tab_chunk_2))
    return tab

In [7]:
path = './tables/'
omits = ['sexo1#c.ttrend','sexo1#c.ttrend2',"covid#sexo1","sexo1#occgroup","covid#sexo1#occgroup","sexo1#c.urate","occgroup","covid#occgroup"]

name = "sqtreg_table_advanced_simple_age3040__10_agefix"
N, r2s = file_split('{}.log'.format(name),path,startwords=['q25','q50','q75'],endword="_cons",
           header="ten_y | Coefficient std t p_stat lower_95 upper_95",Noobs=True)

tab_moms_25 = read_table(path+'{}_1.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
tab_moms_50 = read_table(path+'{}_2.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
tab_moms_75 = read_table(path+'{}_3.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)


In [27]:
new_names_var = ['$t$', '$t$ x female', 'unemployment rate x male',
       'unemployment rate x female', 'female', 'age 35-40', 'age 35-40 x female',
       'occ group 2','occ group 3','occ group 4','occ group 5',
       '$\delta_{2020}$ x occ group 1', '$\delta_{2020}$ x occ group 2',
       '$\delta_{2020}$ x occ group 3', '$\delta_{2020}$ x occ group 4',
       '$\delta_{2020}$ x occ group 5', '$\delta_{2021}$ x occ group 1',
       '$\delta_{2021}$ x occ group 2', '$\delta_{2021}$ x occ group 3',
       '$\delta_{2021}$ x occ group 4', '$\delta_{2021}$ x occ group 5',
       '$\delta_{2022}$ x occ group 1', '$\delta_{2022}$ x occ group 2',
       '$\delta_{2022}$ x occ group 3', '$\delta_{2022}$ x occ group 4',
       '$\delta_{2022}$ x occ group 5',
        'occ group 2 x female','occ group 3 x female','occ group 4 x female','occ group 5 x female',     
       '$\delta_{2020}$ x occ group 1 x female',
       '$\delta_{2020}$ x occ group 2 x female',
       '$\delta_{2020}$ x occ group 3 x female',
       '$\delta_{2020}$ x occ group 4 x female',
       '$\delta_{2020}$ x occ group 5 x female',
       '$\delta_{2021}$ x occ group 1 x female',
       '$\delta_{2021}$ x occ group 2 x female',
       '$\delta_{2021}$ x occ group 3 x female',
       '$\delta_{2021}$ x occ group 4 x female',
       '$\delta_{2021}$ x occ group 5 x female',
       '$\delta_{2022}$ x occ group 1 x female',
       '$\delta_{2022}$ x occ group 2 x female',
       '$\delta_{2022}$ x occ group 3 x female',
       '$\delta_{2022}$ x occ group 4 x female',
       '$\delta_{2022}$ x occ group 5 x female', '$\\beta_0$']

# reorder_cols = [r'$\beta_0$','female',r'partner tenure (years)',r'$t$',r'$t^{2}$',r'$t$ x female',r'$t^2$ x female',
#                      r'$\delta_{2020}$','$\delta_{2021}$',r'$\delta_{2020}$ x female',
#                      r'$\delta_{2021}$ x female']

def rename_results(df,old_names,new_names,order):
    
    df.rename(columns=dict(zip(old_names,new_names)),inplace='True')
    df = df.reindex(columns=order)
    return df

def pass_regression_to_latex_column(df):

    coefs = df["Coefficient"].copy()
    names = coefs.index.copy()
    N = names.size

    coefs = coefs.apply(lambda x: "{:,.3f}".format(float(x)))
    coefs = coefs.rename(index=(dict(zip(names,np.arange(0,N*2,2)))))

    stds = df['std'].copy()
    stds = stds.apply(lambda x: "({:,.4f})".format(float(x)))
    stds = stds.rename(index=(dict(zip(names,np.arange(1,N*2,2)))))

    final_table = pd.concat([coefs, stds]).sort_index(kind='merge')
    final_table = final_table.rename(index=dict(zip(np.arange(0,N*2,2),names)))
    final_table = final_table.rename(index=dict(zip(np.arange(1,N*2,2),[""]*N)))
    
    return final_table

In [28]:
res_names = ['25th pctl','50th pctl','75th pctl']

all_results = [tab_moms_25,tab_moms_50,tab_moms_75]
reorder_cols = new_names_var


for i in range(len(all_results)):
    all_results[i] = rename_results(all_results[i].T,all_results[i].index,new_names_var,new_names_var)
table = pd.DataFrame(pass_regression_to_latex_column(all_results[0].T),columns=[res_names[0],])
for ir,res in enumerate(all_results[1:]):
    col = pass_regression_to_latex_column(res.T)
    table[res_names[ir+1]] = col.values
    
    
for i,reg in enumerate(res_names):
    for var in reorder_cols:
        if all_results[i].loc['p_stat'][var] < 0.1 and all_results[i].loc['p_stat'][var] > 0.05:
            table[reg].loc[var] = r"${}^*$".format(table[reg].loc[var])
        elif all_results[i].loc['p_stat'][var] < 0.05 and all_results[i].loc['p_stat'][var] > 0.01:
            table[reg].loc[var] = r"${}^{{**}}$".format(table[reg].loc[var])
        elif all_results[i].loc['p_stat'][var] < 0.01:
            table[reg].loc[var] = r"${}^{{***}}$".format(table[reg].loc[var])
        else:
            table[reg].loc[var] = r"${}$".format(table[reg].loc[var])
            
N_row = pd.DataFrame([N]*3, index=res_names, columns=['N',]).T
R2_row = pd.DataFrame(r2s, index=res_names, columns=[r'Pseudo $R^2$',]).T
table = pd.concat([table,N_row,R2_row])

# print(table.loc[[r'$\delta_{2020}$','$\delta_{2021}$',r'$\delta_{2020}$ x female',
#                      r'$\delta_{2021}$ x female']].to_latex(escape=False))
print(table)
table.to_latex('./tables/ssqtreg_figure5_allwomen_age3040_10_agefix.tex',escape=False)

                                             25th pctl       50th pctl  \
$t$                                     $-0.011^{***}$  $-0.007^{***}$   
                                              (0.0007)        (0.0014)   
$t$ x female                             $0.012^{***}$   $0.038^{***}$   
                                              (0.0009)        (0.0019)   
unemployment rate x male                 $0.034^{***}$   $0.046^{***}$   
                                              (0.0029)        (0.0040)   
unemployment rate x female               $0.055^{***}$   $0.019^{***}$   
                                              (0.0020)        (0.0034)   
female                                        $-0.063$    $0.191^{**}$   
                                              (0.0884)        (0.0829)   
age 35-40                                $0.781^{***}$   $2.353^{***}$   
                                              (0.0306)        (0.0405)   
age 35-40 x female                    

  table.to_latex('./tables/qtregs_occ_3040_full.tex',escape=False)


In [30]:
IoI_20,IoI_21,IoI_22 = [], [], []
std_idx_20, std_idx_21, std_idx_22 = [], [], []
for i,idx in enumerate(table.index):
    if str(idx)[:13]=="$\delta_{2020":
        if idx.split(' x ')[-1]=='female':
            IoI_20.append(idx)
            std_idx_20.append(i+1)
    elif str(idx)[:13]=="$\delta_{2021":
        if idx.split(' x ')[-1]=='female':
            IoI_21.append(idx)
            std_idx_21.append(i+1)
    elif str(idx)[:13]=="$\delta_{2022":
        if idx.split(' x ')[-1]=='female':
            IoI_22.append(idx)
            std_idx_22.append(i+1)

table_dummies_std_f_20,table_dummies_std_f_21, table_dummies_std_f_22 = table.iloc[std_idx_20,:].copy(),table.iloc[std_idx_21,:].copy(),table.iloc[std_idx_22,:].copy()
table_dummies_f_20,table_dummies_f_21, table_dummies_f_22 = table.loc[IoI_20,:].copy(),table.loc[IoI_21,:].copy(),table.loc[IoI_22,:].copy()

# IoI_20[1] = IoI_20[1].replace('Manifactures','Manufactures')
# IoI_20[3] = IoI_20[3].replace('manifuctures','Manufactures')

table_dummies_std_f_20['coefs'] = IoI_20
table_dummies_std_f_21['coefs'] = IoI_21
table_dummies_std_f_22['coefs'] = IoI_22
table_dummies_std_f_20.set_index('coefs',inplace=True)
table_dummies_std_f_21.set_index('coefs',inplace=True)
table_dummies_std_f_22.set_index('coefs',inplace=True)

all_data = [table_dummies_f_20.T,table_dummies_f_21.T,table_dummies_f_22.T]
all_stds = [table_dummies_std_f_20.T,table_dummies_std_f_21.T,table_dummies_std_f_22.T]

One_table = {}
for i in range(3):
    coefs, stds = all_data[i], all_stds[i]
    cols= coefs.keys()
    New_table = coefs[cols[0]]
    New_table = pd.concat((New_table.rename(cols[0][18:]),stds[cols[0]].rename(cols[0][18:]+'_std')),axis=1)
    for name in cols[1:]:
        New_table = pd.concat((New_table,coefs[name].rename(name[18:]),stds[name].rename(name[18:]+'_std')),axis=1)
    
    One_table[2020+i] = New_table.T.copy()
    
Final_table = pd.concat((One_table[2020]['25th pctl'].rename("2020"),
           One_table[2021]['25th pctl'].rename("2021"),
          One_table[2022]['25th pctl'].rename("2022")),axis=1)

for ind in Final_table.index[1::2]:
    Final_table.rename({ind:""}, inplace=True)
Final_table.replace({np.nan:'--'}, inplace=True)

Final_table2 = pd.concat((One_table[2020]['50th pctl'].rename("2020"),
           One_table[2021]['50th pctl'].rename("2021"),
          One_table[2022]['50th pctl'].rename("2022")),axis=1)

for ind in Final_table2.index[1::2]:
    Final_table2.rename({ind:""}, inplace=True)
Final_table2.replace({np.nan:'--'}, inplace=True)

Final_table3 = pd.concat((One_table[2020]['75th pctl'].rename("2020"),
           One_table[2021]['75th pctl'].rename("2021"),
          One_table[2022]['75th pctl'].rename("2022")),axis=1)

for ind in Final_table3.index[1::2]:
    Final_table3.rename({ind:""}, inplace=True)
Final_table3.replace({np.nan:'--'}, inplace=True)

Final_final_table = pd.concat((Final_table,Final_table2,Final_table3),axis=1,keys=['25th percentile','50th percentile','75th percentile'])
Final_final_table.loc["N",:] = table.loc['N'][0]
print(Final_final_table)
# Final_final_table.to_latex('./tables/qtregs_ind_3040_text.tex',escape=False)

                     25th percentile                                  \
                                2020            2021            2022   
occ group 1 x female        $-0.182$        $-0.260$  $-0.935^{***}$   
                            (0.2070)        (0.2759)        (0.1739)   
occ group 2 x female      $-0.418^*$  $-0.708^{***}$  $-1.096^{***}$   
                            (0.2360)        (0.2595)        (0.2716)   
occ group 3 x female  $-0.615^{***}$  $-0.783^{***}$         $0.314$   
                            (0.2279)        (0.2414)        (0.2723)   
occ group 4 x female        $-0.151$        $-0.298$  $-0.840^{***}$   
                            (0.2250)        (0.2295)        (0.2013)   
occ group 5 x female  $-0.466^{***}$  $-0.775^{***}$  $-0.820^{***}$   
                            (0.0811)        (0.1025)        (0.1952)   
N                            409,631         409,631         409,631   

                     50th percentile                           

In [31]:
IoI_20,IoI_21,IoI_22 = [], [], []
std_idx_20, std_idx_21, std_idx_22 = [], [], []
for i,idx in enumerate(table.index):
    if str(idx)[:13]=="$\delta_{2020":
        if idx.split(' x ')[-1]!='female':
            IoI_20.append(idx)
            std_idx_20.append(i+1)
    elif str(idx)[:13]=="$\delta_{2021":
        if idx.split(' x ')[-1]!='female':
            IoI_21.append(idx)
            std_idx_21.append(i+1)
    elif str(idx)[:13]=="$\delta_{2022":
        if idx.split(' x ')[-1]!='female':
            IoI_22.append(idx)
            std_idx_22.append(i+1)
table_dummies_std_f_20,table_dummies_std_f_21, table_dummies_std_f_22 = table.iloc[std_idx_20,:].copy(),table.iloc[std_idx_21,:].copy(),table.iloc[std_idx_22,:].copy()
table_dummies_f_20,table_dummies_f_21, table_dummies_f_22 = table.loc[IoI_20,:].copy(),table.loc[IoI_21,:].copy(),table.loc[IoI_22,:].copy()

# IoI_20[1] = IoI_20[1].replace('Manifactures','Manufactures')
# IoI_20[3] = IoI_20[3].replace('manifuctures','Manufactures')

table_dummies_std_f_20['coefs'] = IoI_20
table_dummies_std_f_21['coefs'] = IoI_21
table_dummies_std_f_22['coefs'] = IoI_22
table_dummies_std_f_20.set_index('coefs',inplace=True)
table_dummies_std_f_21.set_index('coefs',inplace=True)
table_dummies_std_f_22.set_index('coefs',inplace=True)

all_data = [table_dummies_f_20.T,table_dummies_f_21.T,table_dummies_f_22.T]
all_stds = [table_dummies_std_f_20.T,table_dummies_std_f_21.T,table_dummies_std_f_22.T]

One_table = {}
for i in range(3):
    coefs, stds = all_data[i], all_stds[i]
    cols= coefs.keys()
#     print(cols)
    New_table = coefs[cols[0]]
    New_table = pd.concat((New_table.rename(cols[0][18:]),stds[cols[0]].rename(cols[0][18:]+'_std')),axis=1)
    for name in cols[1:]:
        New_table = pd.concat((New_table,coefs[name].rename(name[18:]),stds[name].rename(name[18:]+'_std')),axis=1)
        
    One_table[2020+i] = New_table.T.copy()
    
Final_table = pd.concat((One_table[2020]['25th pctl'].rename("2020"),
           One_table[2021]['25th pctl'].rename("2021"),
          One_table[2022]['25th pctl'].rename("2022")),axis=1)

for ind in Final_table.index[1::2]:
    Final_table.rename({ind:""}, inplace=True)
Final_table.replace({np.nan:'--'}, inplace=True)

Final_table2 = pd.concat((One_table[2020]['50th pctl'].rename("2020"),
           One_table[2021]['50th pctl'].rename("2021"),
          One_table[2022]['50th pctl'].rename("2022")),axis=1)

for ind in Final_table2.index[1::2]:
    Final_table2.rename({ind:""}, inplace=True)
Final_table2.replace({np.nan:'--'}, inplace=True)

Final_table3 = pd.concat((One_table[2020]['75th pctl'].rename("2020"),
           One_table[2021]['75th pctl'].rename("2021"),
          One_table[2022]['75th pctl'].rename("2022")),axis=1)

for ind in Final_table3.index[1::2]:
    Final_table3.rename({ind:""}, inplace=True)
Final_table3.replace({np.nan:'--'}, inplace=True)

Final_final_table_b = pd.concat((Final_table,Final_table2,Final_table3),axis=1,keys=['25th percentile','50th percentile','75th percentile'])
# print(Final_final_table_b.to_latex(escape=False))

Final_final_final_table = pd.concat((Final_final_table_b,Final_final_table))
Final_final_final_table.to_latex('./tables/sqtreg_advanced_age3040_10_agefix_format.tex',escape=False,column_format='l|ccc|ccc|ccc|')
print(Final_final_final_table)

                     25th percentile                                  \
                                2020            2021            2022   
occ group 1           $-0.741^{***}$  $-0.809^{***}$        $-0.050$   
                            (0.1535)        (0.1630)        (0.2040)   
occ group 2                  $0.000$         $0.236$       $0.424^*$   
                            (0.1239)        (0.2035)        (0.2321)   
occ group 3            $0.825^{***}$   $0.807^{***}$        $-0.022$   
                            (0.2226)        (0.2196)        (0.2381)   
occ group 4            $0.436^{***}$   $0.663^{***}$   $0.603^{***}$   
                            (0.0932)        (0.0943)        (0.1329)   
occ group 5            $0.472^{***}$   $0.695^{***}$   $0.777^{***}$   
                            (0.0780)        (0.0829)        (0.1763)   
occ group 1 x female        $-0.182$        $-0.260$  $-0.935^{***}$   
                            (0.2070)        (0.2759)        (0.1

  Final_final_final_table.to_latex('./tables/sqtreg_advanced_age3040_10_agefix_format.tex',escape=False,column_format='l|ccc|ccc|ccc|')


In [34]:
IoI_rest = []
std_idx_rest = []
for i,idx in enumerate(table.index):
    if str(idx)[:12]!="$\delta_{202" and str(idx)!='':
        
        IoI_rest.append(idx)
        if str(idx) in ['N',"Pseudo $R^2$"]:
            pass
        else:
            std_idx_rest.append(i+1)

table_rest_std_f_20 = table.iloc[std_idx_rest,:].copy()
table_rest_f_20 = table.loc[IoI_rest,:].copy()

table_rest_f_20 = table_rest_f_20.T

table_rest_std_f_20['coefs'] = IoI_rest[:-2]
table_rest_std_f_20.set_index('coefs',inplace=True)
table_rest_std_f_20 = table_rest_std_f_20.T

cols= table_rest_f_20.keys()[:-2]
# print(cols)
New_table = table_rest_f_20[cols[0]]
New_table = pd.concat((New_table.rename(cols[0]),table_rest_std_f_20[cols[0]].rename(cols[0]+'_std')),axis=1)
for name in cols[1:]:
    New_table = pd.concat((New_table,table_rest_f_20[name].rename(name),
                         table_rest_std_f_20[name].rename(name+'_std')),axis=1)


Final_table =  New_table.T.copy()

for ind in Final_table.index[1::2]:
    Final_table.rename({ind:""}, inplace=True)
Final_table.replace({np.nan:'--'}, inplace=True)
Final_table.loc['N',:] = table.loc['N',:]
Final_table.loc["Pseudo $R^2$",:] = table.loc["Pseudo $R^2$",:]
Final_table.to_latex('./tables/sqtreg_advanced_age3040_10_agefix_part1.tex',escape=False,column_format='l|ccc|')
print(Final_table)

                                 25th pctl       50th pctl       75th pctl
$t$                         $-0.011^{***}$  $-0.007^{***}$   $-0.004^{**}$
                                  (0.0007)        (0.0014)        (0.0019)
$t$ x female                 $0.012^{***}$   $0.038^{***}$   $0.026^{***}$
                                  (0.0009)        (0.0019)        (0.0023)
unemployment rate x male     $0.034^{***}$   $0.046^{***}$         $0.004$
                                  (0.0029)        (0.0040)        (0.0038)
unemployment rate x female   $0.055^{***}$   $0.019^{***}$  $-0.028^{***}$
                                  (0.0020)        (0.0034)        (0.0030)
female                            $-0.063$    $0.191^{**}$   $0.538^{***}$
                                  (0.0884)        (0.0829)        (0.0763)
age 35-40                    $0.781^{***}$   $2.353^{***}$   $3.481^{***}$
                                  (0.0306)        (0.0405)        (0.0526)
age 35-40 x female       

  Final_table.to_latex('./tables/sqtreg_advanced_age3040_10_agefix_part1.tex',escape=False,column_format='l|ccc|')


## Robustness

Different parent and child ages.

Reorganize variables

In [90]:
# No age fixed effects necessary
new_names_var.index('age 35-40')
new_names_var2 = new_names_var.copy()
new_names_var2.pop(5)
new_names_var2.pop(5)

# For splitting the tables
cut1 = (new_names_var2.index('occ group 5')*2)+2
cut2 = (new_names_var2.index('occ group 2 x female')*2)
cut3 = (new_names_var2.index('occ group 5 x female')*2)+2
cut4 = len(new_names_var2)*2

In [93]:
path = './tables/'
omits = ['sexo1#c.ttrend','sexo1#c.ttrend2',"covid#sexo1","sexo1#occgroup","covid#sexo1#occgroup","sexo1#c.urate","occgroup","covid#occgroup"]


All_results= []

all_names = ['5','10','15']
noobs, r2ss = [], []
formal_names = [ "Children younger than 5","Children younger than 10","Children younger than 15"]

for age in all_names:
    name = "sqtreg_table_advanced_simple_age3035__{}".format(age)
    N, r2s = file_split('{}.log'.format(name),path,startwords=['q25','q50','q75'],endword="_cons",
               header="ten_y | Coefficient std t p_stat lower_95 upper_95",Noobs=True)
    noobs.append(N)
    r2ss.append(r2s)

    tab_moms_25 = read_table(path+'{}_1.log'.format(name),
                             keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
    tab_moms_50 = read_table(path+'{}_2.log'.format(name),
                             keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
    tab_moms_75 = read_table(path+'{}_3.log'.format(name),
                             keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
    
    All_results.append([tab_moms_25,tab_moms_50,tab_moms_75])


res_names = ['25th pctl','50th pctl','75th pctl']
reorder_cols = new_names_var2
all_tabs = []

for j in range(len(all_names)):
    all_results = All_results[j]
    for i in range(len(all_results)):
        all_results[i] = rename_results(all_results[i].T,all_results[i].index,new_names_var2,new_names_var2)
    table = pd.DataFrame(pass_regression_to_latex_column(all_results[0].T),columns=[res_names[0],])
    for ir,res in enumerate(all_results[1:]):
        col = pass_regression_to_latex_column(res.T)
        table[res_names[ir+1]] = col.values


    for i,reg in enumerate(res_names):
        for var in reorder_cols:
            if all_results[i].loc['p_stat'][var] < 0.1 and all_results[i].loc['p_stat'][var] > 0.05:
                table[reg].loc[var] = r"${}^*$".format(table[reg].loc[var])
            elif all_results[i].loc['p_stat'][var] < 0.05 and all_results[i].loc['p_stat'][var] > 0.01:
                table[reg].loc[var] = r"${}^{{**}}$".format(table[reg].loc[var])
            elif all_results[i].loc['p_stat'][var] < 0.01:
                table[reg].loc[var] = r"${}^{{***}}$".format(table[reg].loc[var])
            else:
                table[reg].loc[var] = r"${}$".format(table[reg].loc[var])

    N_row = pd.DataFrame([N]*3, index=res_names, columns=['N',]).T
    R2_row = pd.DataFrame(r2s, index=res_names, columns=[r'Pseudo $R^2$',]).T
    table = pd.concat([table,N_row,R2_row])
    
    all_tabs.append(table)

Big_tab = pd.concat(all_tabs,axis=1,keys=(formal_names))
Big_tab_1 = pd.concat((Big_tab.iloc[:cut1,:],Big_tab.iloc[cut2:cut3,:],Big_tab.iloc[cut4-2:,:]))
Big_tab_2 = pd.concat((Big_tab.iloc[cut1:cut2,:],Big_tab.iloc[cut3:cut4-2,:],Big_tab.iloc[cut4:,:]))

Big_tab_1.to_latex('./tables/sqtreg_advanced_age3035_10_agefix_part1.tex',escape=False,column_format='l|ccc|ccc|ccc|')
Big_tab_2.to_latex('./tables/sqtreg_advanced_age3035_10_agefix_part2.tex',escape=False,column_format='l|ccc|ccc|ccc|')
print(Big_tab)

                                       Children younger than 5  \
                                                     25th pctl   
$t$                                             $-0.008^{***}$   
                                                      (0.0012)   
$t$ x female                                     $0.005^{***}$   
                                                      (0.0018)   
unemployment rate x male                         $0.034^{***}$   
                                                      (0.0031)   
unemployment rate x female                       $0.062^{***}$   
                                                      (0.0037)   
female                                           $-0.228^{**}$   
                                                      (0.1094)   
occ group 2                                     $-0.665^{***}$   
                                                      (0.0744)   
occ group 3                                     $-1.401^{***}$   
          

  Big_tab_1.to_latex('./tables/sqtreg_advanced_age3035_10_agefix_part1.tex',escape=False,column_format='l|ccc|ccc|ccc|')
  Big_tab_2.to_latex('./tables/sqtreg_advanced_age3035_10_agefix_part2.tex',escape=False,column_format='l|ccc|ccc|ccc|')


In [94]:
path = './tables/'
omits = ['sexo1#c.ttrend','sexo1#c.ttrend2',"covid#sexo1","sexo1#occgroup","covid#sexo1#occgroup","sexo1#c.urate","occgroup","covid#occgroup"]


All_results= []

all_names = ['5','10','15']
noobs, r2ss = [], []
formal_names = [ "Children younger than 5","Children younger than 10","Children younger than 15"]

for age in all_names:
    name = "sqtreg_table_advanced_simple_age3540__{}".format(age)
    N, r2s = file_split('{}.log'.format(name),path,startwords=['q25','q50','q75'],endword="_cons",
               header="ten_y | Coefficient std t p_stat lower_95 upper_95",Noobs=True)
    noobs.append(N)
    r2ss.append(r2s)

    tab_moms_25 = read_table(path+'{}_1.log'.format(name),
                             keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
    tab_moms_50 = read_table(path+'{}_2.log'.format(name),
                             keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
    tab_moms_75 = read_table(path+'{}_3.log'.format(name),
                             keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
    
    All_results.append([tab_moms_25,tab_moms_50,tab_moms_75])


res_names = ['25th pctl','50th pctl','75th pctl']
reorder_cols = new_names_var2
all_tabs = []

for j in range(len(all_names)):
    all_results = All_results[j]
    for i in range(len(all_results)):
        all_results[i] = rename_results(all_results[i].T,all_results[i].index,new_names_var2,new_names_var2)
    table = pd.DataFrame(pass_regression_to_latex_column(all_results[0].T),columns=[res_names[0],])
    for ir,res in enumerate(all_results[1:]):
        col = pass_regression_to_latex_column(res.T)
        table[res_names[ir+1]] = col.values


    for i,reg in enumerate(res_names):
        for var in reorder_cols:
            if all_results[i].loc['p_stat'][var] < 0.1 and all_results[i].loc['p_stat'][var] > 0.05:
                table[reg].loc[var] = r"${}^*$".format(table[reg].loc[var])
            elif all_results[i].loc['p_stat'][var] < 0.05 and all_results[i].loc['p_stat'][var] > 0.01:
                table[reg].loc[var] = r"${}^{{**}}$".format(table[reg].loc[var])
            elif all_results[i].loc['p_stat'][var] < 0.01:
                table[reg].loc[var] = r"${}^{{***}}$".format(table[reg].loc[var])
            else:
                table[reg].loc[var] = r"${}$".format(table[reg].loc[var])

    N_row = pd.DataFrame([N]*3, index=res_names, columns=['N',]).T
    R2_row = pd.DataFrame(r2s, index=res_names, columns=[r'Pseudo $R^2$',]).T
    table = pd.concat([table,N_row,R2_row])
    
    all_tabs.append(table)

Big_tab = pd.concat(all_tabs,axis=1,keys=(formal_names))
Big_tab_1 = pd.concat((Big_tab.iloc[:cut1,:],Big_tab.iloc[cut2:cut3,:],Big_tab.iloc[cut4-2:,:]))
Big_tab_2 = pd.concat((Big_tab.iloc[cut1:cut2,:],Big_tab.iloc[cut3:cut4-2,:],Big_tab.iloc[cut4:,:]))

Big_tab_1.to_latex('./tables/sqtreg_advanced_age3540_part1.tex',escape=False,column_format='l|ccc|ccc|ccc|')
Big_tab_2.to_latex('./tables/sqtreg_advanced_age3540_part2.tex',escape=False,column_format='l|ccc|ccc|ccc|')
print(Big_tab)

                                       Children younger than 5  \
                                                     25th pctl   
$t$                                             $-0.013^{***}$   
                                                      (0.0013)   
$t$ x female                                     $0.019^{***}$   
                                                      (0.0019)   
unemployment rate x male                         $0.035^{***}$   
                                                      (0.0037)   
unemployment rate x female                       $0.046^{***}$   
                                                      (0.0039)   
female                                          $-0.622^{***}$   
                                                      (0.1248)   
occ group 2                                     $-0.336^{***}$   
                                                      (0.1017)   
occ group 3                                     $-2.467^{***}$   
          

  Big_tab_1.to_latex('./tables/sqtreg_advanced_age3540_part1.tex',escape=False,column_format='l|ccc|ccc|ccc|')
  Big_tab_2.to_latex('./tables/sqtreg_advanced_age3540_part2.tex',escape=False,column_format='l|ccc|ccc|ccc|')
