In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.optimize import fsolve
import itertools

sn.set_style('whitegrid')
%matplotlib inline
#%load_ext autoreload
#%autoreload 2

In [10]:
# Expanded and modified version (first column now string)
def read_table(filename,keyword='ciclo',rescale=1,ncols=5,start=2,end_row='Total',
               endword=None,omit_list=['t20_21'],index_to_int=True,debug=False):
    t0 = start
    t1 = ncols+start
    results_follow = 0
    row_names = []
    interaction = 0
    f = open(filename,'r')
    for line in f:
        words = line.split()
        if debug:
            print(words)
        if len(words)>1 and words[0] == keyword:
            results_follow=1
            if endword==None:
                colnames = words[t0:t1]
            else:
                buff_v = []
                for w in words[t0:]:
                    if w==endword:
                        break
                    else:
                        buff_v.append(w)
                colnames = buff_v[:-1]
                t1 = len(buff_v)+t0-1
            table_raw = np.empty(t1-t0)
        elif len(words)>1 and words[0] == end_row:
            break
        elif len(words) == 1:
            interaction = 0
            i_count = 0
        elif len(words)>1 and results_follow==1: # and words[0] not in omit_list
            if len(words)==2 and words[1]=="|":
                var_name = words[0]
                interaction = 1
                i_count = 0
            else:
                if interaction==0:
                    row_names.append(words[0])
                elif interaction==1:
                    if words[1]=='|':
                        if var_name+"_"+words[0] in row_names:
                            new_name = var_name+"_"+str(int(words[0])+int(words[-1]))
                        else:
                            new_name = var_name+"_"+words[0]
                        row_names.append(new_name)
                    else:
                        if var_name+"_"+words[1] in row_names:
                            new_name = var_name+"_"+str(int(words[0]))
                        else:
                            new_name = var_name+"_"+words[1]
                        row_names.append(new_name)  
                
                if words[2]=='|':
                    t00= t0+1
                    t11 = t1+1
                else:
                    t00 = t0
                    t11 = t1

                buff_v = []
                for w in words[t00:t11]: 
                    w = w.replace(",","")
                    buff_v.append(w)
                table_raw = np.vstack((table_raw,np.array(buff_v,dtype=float)/rescale))
    table_raw = table_raw[1:,:]
    if debug:
        print(table_raw.shape)
        print(colnames)
    if index_to_int:
        return pd.DataFrame(table_raw, columns=colnames,index=np.array(row_names,dtype=int))
    else:
        return pd.DataFrame(table_raw, columns=colnames,index=np.array(row_names))
    
    
def read_descriptive_stats(filename,keyword_data,keyword_col,ncols,rownames,start=2,debug=False):
    t0 = start
    t1 = ncols+start
    table_raw = np.empty(t1-t0)
    f = open(filename,'r')
    for line in f:
        words = line.split()
        if debug:
            print(words)
        if len(words)>1 and words[0] == keyword_data:
            buff_v = []
            for w in words[t0:t1]: 
                w = w.replace(",","")
                buff_v.append(w)
            table_raw = np.vstack((table_raw,np.array(buff_v,dtype=float)))
        elif len(words)>1 and words[0] == keyword_col:
            colnames = words[t0:t1]

    return pd.DataFrame(table_raw[1:,:], columns=colnames, index=rownames)
 
    
def file_split(filename,path,startwords=['Summary',],endword="Total",header=None,Noobs=False):
    f = open(path+filename,'r')
    parent_file = filename[:filename.find('.')]
    di = 1
    noobs=[]
    R2s = []
    writing = 0
    for line in f:
        words = line.split()
        if len(words)>0 and 'Number' in words:
            noobs.append(words[-1])
        if len(words)>0 and 'R2' in words:
            R2s.append(words[-1])
        if len(words)>0 and words[0] in startwords:
            sf = open(path+parent_file+'_{}.log'.format(di), "w")
            writing = 1
            if header!=None:
                sf.write(header)
        elif len(words)>0 and words[0]==endword and writing:
            sf.write(line)
            writing = 0
            sf.close()
            di+=1
        if writing:
            sf.write(line)
    if Noobs==True:
        return (noobs,R2s)
            
def normalise_table(table):
    cols = table.columns
    table['Total'] = table.sum(axis=1).copy()
    for col in cols:
        table[col] = table[col]/table['Total']
    return table   

# Parents Age 30-40

## Children Age <10

In [20]:
path = './tables/'
omits = ['sexo1#c.ttrend','sexo1#c.ttrend2',"covid#sexo1"]
name = "sqtreg_table_simple_age3040__10_agefix"
N, r2s = file_split('{}.log'.format(name),path,startwords=['q25','q50','q75'],endword="_cons",
           header="ten_y | Coefficient std t p_stat lower_95 upper_95",Noobs=True)

tab1_25 = read_table(path+'{}_1.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
tab1_50 = read_table(path+'{}_2.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)
tab1_75 = read_table(path+'{}_3.log'.format(name),
                         keyword='ten_y',rescale=1,ncols=6,start=2,omit_list=omits,index_to_int=False)

In [21]:
old_names_var = ['ttrend','sexo1#c.ttrend_1','sexo1#c.urate_0','sexo1#c.urate_1','1.sexo1','35.age','sexo1#age_35',
                 'covid_1','covid_2','covid_3', 'covid#sexo1_1', 'covid#sexo1_2','covid#sexo1_3', '_cons']
new_names_var = [r'$t$',r'$t$ x female',r'unemployment rate',r'unemployment rate x female',
                 'female','Age 35-40',r'Age 35-40 x female',
                 r'$\delta_{2020}$',r'$\delta_{2021}$',r'$\delta_{2022}$',r'$\delta_{2020}$ x female',r'$\delta_{2021}$ x female',
                 r'$\delta_{2022}$ x female',r'$\beta_0$']
reorder_cols = [r'$\beta_0$','female',r'$t$',r'$t$ x female',r'unemployment rate',r'unemployment rate x female',
                'Age 35-40',r'Age 35-40 x female',
                 r'$\delta_{2020}$',r'$\delta_{2020}$ x female',r'$\delta_{2021}$',r'$\delta_{2021}$ x female',r'$\delta_{2022}$',r'$\delta_{2022}$ x female']

def rename_results(df,old_names=old_names_var,new_names=new_names_var,order=reorder_cols):
    
    df.rename(columns=dict(zip(old_names,new_names)),inplace='True')
    df = df.reindex(columns=order)
    return df

def pass_regression_to_latex_column(df):

    coefs = df["Coefficient"].copy()
    names = coefs.index.copy()
    N = names.size

    coefs = coefs.apply(lambda x: "{:,.3f}".format(float(x)))
    coefs = coefs.rename(index=(dict(zip(names,np.arange(0,N*2,2)))))

    stds = df['std'].copy()
    stds = stds.apply(lambda x: "({:,.4f})".format(float(x)))
    stds = stds.rename(index=(dict(zip(names,np.arange(1,N*2,2)))))

    final_table = pd.concat([coefs, stds]).sort_index(kind='merge')
    final_table = final_table.rename(index=dict(zip(np.arange(0,N*2,2),names)))
    final_table = final_table.rename(index=dict(zip(np.arange(1,N*2,2),[""]*N)))
    
    return final_table

In [22]:
res_names = ['Parents, 25th pctl','Parents, 50th pctl','Parents, 75th pctl']

all_results = [tab1_25,tab1_50,tab1_75]


for i in range(len(all_results)):
    all_results[i] = rename_results(all_results[i].T)
table = pd.DataFrame(pass_regression_to_latex_column(all_results[0].T),columns=[res_names[0],])
for ir,res in enumerate(all_results[1:]):
    col = pass_regression_to_latex_column(res.T)
    table[res_names[ir+1]] = col.values
    
    
for i,reg in enumerate(res_names):
    for var in reorder_cols:
        if all_results[i].loc['p_stat'][var] < 0.1 and all_results[i].loc['p_stat'][var] > 0.05:
            table[reg].loc[var] = r"${}^*$".format(table[reg].loc[var])
        elif all_results[i].loc['p_stat'][var] < 0.05 and all_results[i].loc['p_stat'][var] > 0.01:
            table[reg].loc[var] = r"${}^{{**}}$".format(table[reg].loc[var])
        elif all_results[i].loc['p_stat'][var] < 0.01:
            table[reg].loc[var] = r"${}^{{***}}$".format(table[reg].loc[var])
        else:
            table[reg].loc[var] = r"${}$".format(table[reg].loc[var])
            
N_row = pd.DataFrame(np.tile([N[0]],3), index=res_names, columns=['N',]).T
R2_row = pd.DataFrame(np.vstack(r2s[0:3]).T.ravel(), index=res_names, columns=[r'Pseudo $R^2$',]).T
table = pd.concat([table,N_row,R2_row])

print(table.to_latex(escape=False))
# table.to_latex('./tables/tex/sqtreg_simple_age3040_10_agefix.tex',escape=False)

\begin{tabular}{llll}
\toprule
{} & Parents, 25th pctl & Parents, 50th pctl & Parents, 75th pctl \\
\midrule
$\beta_0$                  &      $0.943^{***}$ &      $4.243^{***}$ &      $9.473^{***}$ \\
                           &           (0.0444) &           (0.0598) &           (0.0662) \\
female                     &     $-0.930^{***}$ &     $-1.087^{***}$ &     $-1.038^{***}$ \\
                           &           (0.0551) &           (0.0890) &           (0.0780) \\
$t$                        &     $-0.013^{***}$ &           $-0.000$ &            $0.001$ \\
                           &           (0.0007) &           (0.0022) &           (0.0017) \\
$t$ x female               &      $0.018^{***}$ &      $0.045^{***}$ &      $0.023^{***}$ \\
                           &           (0.0013) &           (0.0025) &           (0.0020) \\
unemployment rate          &      $0.061^{***}$ &      $0.053^{***}$ &      $-0.012^{**}$ \\
                           &           (0.0030) &     

  print(table.to_latex(escape=False))


In [8]:
res_names = ['Parents, 25th pctl','Parents, 50th pctl','Parents, 75th pctl']

all_results = [tab1_25,tab1_50,tab1_75]


for i in range(len(all_results)):
    all_results[i] = rename_results(all_results[i].T)
table = pd.DataFrame(pass_regression_to_latex_column(all_results[0].T),columns=[res_names[0],])
for ir,res in enumerate(all_results[1:]):
    col = pass_regression_to_latex_column(res.T)
    table[res_names[ir+1]] = col.values
    
    
for i,reg in enumerate(res_names):
    for var in reorder_cols:
        if all_results[i].loc['p_stat'][var] < 0.1 and all_results[i].loc['p_stat'][var] > 0.05:
            table[reg].loc[var] = r"${}^*$".format(table[reg].loc[var])
        elif all_results[i].loc['p_stat'][var] < 0.05 and all_results[i].loc['p_stat'][var] > 0.01:
            table[reg].loc[var] = r"${}^{{**}}$".format(table[reg].loc[var])
        elif all_results[i].loc['p_stat'][var] < 0.01:
            table[reg].loc[var] = r"${}^{{***}}$".format(table[reg].loc[var])
        else:
            table[reg].loc[var] = r"${}$".format(table[reg].loc[var])
            
N_row = pd.DataFrame(np.tile([N[0]],3), index=res_names, columns=['N',]).T
R2_row = pd.DataFrame(np.vstack(r2s[0:3]).T.ravel(), index=res_names, columns=[r'Pseudo $R^2$',]).T
table = pd.concat([table,N_row,R2_row])

print(table.to_latex(escape=False))
# table.to_latex('./tables/tex/sqtreg_simple_age3040_10_agefix.tex',escape=False)

\begin{tabular}{llll}
\toprule
{} & Parents, 25th pctl & Parents, 50th pctl & Parents, 75th pctl \\
\midrule
$\beta_0$                  &      $0.926^{***}$ &      $4.241^{***}$ &      $9.460^{***}$ \\
                           &           (0.0498) &           (0.0824) &           (0.0974) \\
female                     &     $-0.912^{***}$ &     $-1.096^{***}$ &     $-1.032^{***}$ \\
                           &           (0.0583) &           (0.1038) &           (0.0848) \\
$t$                        &     $-0.014^{***}$ &           $-0.000$ &            $0.001$ \\
                           &           (0.0009) &           (0.0021) &           (0.0017) \\
$t$ x female               &      $0.018^{***}$ &      $0.045^{***}$ &      $0.023^{***}$ \\
                           &           (0.0018) &           (0.0030) &           (0.0017) \\
unemployment rate          &      $0.062^{***}$ &      $0.053^{***}$ &      $-0.012^{**}$ \\
                           &           (0.0028) &     

  print(table.to_latex(escape=False))
