In [2]:
# importing libraries
from __future__ import division
import numpy as np#
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style("white")
from scipy import stats
import pandas as pd
import itertools

%load_ext autoreload
%autoreload 2

Auxiliary functions

In [21]:
def intercala_listas(a, b):
    c = list(zip(a, b))
    return [elt for sublist in c for elt in sublist]

def format_names(names):
    names_latex = []
    hi_there = 0
    for n in names:
        if n=='lsig_c':
            hi_there+=1
            if hi_there==2:
                n += '2'
        if n[-1]=='2':
            n = n.replace('2','^x')
        if n[0]=='s':
            n = n.replace('sig',r'$\sigma')
            n += '$'
        elif n[:2]=='ls':
            n = n.replace('lsig',r'$log(\sigma')
            n += ')$'
        elif n[:2]=='ps':
            n = n.replace('psig',r'$\sigma')
            n += '$%'
        elif n[:2]=='pl':
            n = n.replace('plsig',r'$log(\sigma')
            n += ')$%'
        elif n=="mu_y":
            n = n.replace('mu',r'$\mu')
            n += '$'
        else:
            n = n.replace('lpsig',r'$log(\sigma')
            n += '^x)$%'
    #     print n
        names_latex.append(n)
    return names_latex

def format_short(AllTab):
    tab_short = AllTab.iloc[-8:].copy()
    tab_short.rename(index={'lsig_y2': 'log_variance', 
                                   'sig_x': 'spell_number',
                                  'lsig_c': 'constant',
                                   'lsig_e2': 'duration_dependence',
                                   'lsig_b2': 'heterogeneity',
                                   'lpsig_x': 'spell_number_percentage',
                                  'lpsig_x': 'spell_number_percentage',
                                  'lpsig_e': 'duration_dependence_percentage',
                                  'lpsig_b': 'heterogeneity_percentage'},inplace='True')
    tab_short = tab_short.append(pd.Series(1-AllTab.iloc[-3:].sum(),name='constant_percentage'))
    tab_short = tab_short.reindex(['log_variance', 'spell_number', 'constant', 'duration_dependence',
           'heterogeneity', 'constant_percentage','spell_number_percentage',
           'duration_dependence_percentage', 'heterogeneity_percentage'])
    return tab_short

def format_very_short(AllTab):
    tab_short = AllTab.iloc[-3:].copy()
    tab_short = tab_short.T
    tab_short.reset_index(level=0,inplace=True)
    tab_short['shareC'] = 1-AllTab.iloc[-3:].sum().values
    tab_short['var'] = AllTab.iloc[-8].values
    tab_short.rename(columns={'lpsig_x': 'shareS',
                            'lpsig_e': 'shareDD',
                            'lpsig_b': 'shareH',
                             'index':'cat'},inplace='True')
    tab_short = tab_short[['shareC','shareDD','shareH','shareS','cat','var']]
    return tab_short

def format_column_names(column_names):
    var_name = []
    type1 = []
    for name in column_names:
        split = name.split(',')
        n,t = split[0],split[1:]
        var_name.append(n)
        type1.append(t)
    return var_name,type1

def read_results(inputfile):
    table_raw = np.empty(23)
    names = []
    sigmas = []
    column_names = []
    noobs = []
    results_follow = 0
    first_row = 1
    f = open(inputfile,'r')
    for line in f:
        words = line.split()
    #     print(words)
        if len(words)>1 and words[1]=="******":
            column_names.append("".join(words[2:-1]))
            results_follow=1
        elif results_follow==1 and len(words)==7:
            if first_row==1:
                names.append(words[0])
            sigmas.append(float(words[3]))
            if words[0]=='lpsig_b':
                results_follow=0
                table_raw = np.vstack((table_raw,np.array(sigmas)))
                noobs.append(int(words[2].replace(',',"")))
                sigmas = []
                first_row = 0
    f.close()
    return table_raw, names, column_names, noobs

def read_descriptive_stats(inputfile):
    table_raw = np.zeros((5,11,7))
    var_names = []
    row_n = 0
    table_n = 0
    column_names = []
    results_follow = 0
    names_done = 0
    f = open(inputfile,'r')
    for line in f:
        words = line.split()
        if len(words)>1 and words[0] == "Variable":
            results_follow=1
            if names_done == 0:
                stats_names = words[2:]       
        elif len(words)>1 and results_follow==1:
            if words[0]!='before92':
                if names_done ==0:
                    column_names.append(words[0])
                idx = 0
                for w in words[2:]: 
                    w = w.replace(",","")
                    table_raw[idx,row_n,table_n] = np.array(w)
                    idx+=1
                row_n += 1

            elif words[0]=='before92':
                if names_done ==0:
                    column_names.append(words[0])
                    names_done = 1 
                idx=0
                for w in words[2:]: 
                    w = w.replace(",","")
                    table_raw[idx,row_n,table_n] = np.array(w)
                    idx+=1
                results_follow=0
                table_n +=1
                row_n = 0
    f.close()
    column_names_format =  ['duration','duration_last','age','female','college',
                  'year_of_spell','last_T','last_P','last_A','before_84','before_92']
    table_names_format = ['Raw', 'LTU', 'STU_noAjd', 'STU','STU_Recalls', 'NE_noAjd', 'NE']
    means_table = pd.DataFrame(table_raw[1,:,:].T, columns=column_names_format) #, index=names_latex
    means_table['version'] = table_names_format
    stds_table = pd.DataFrame(table_raw[2,:,:].T, columns=column_names_format) #, index=names_latex
    stds_table['version'] = table_names_format

    final_table = pd.concat([means_table, stds_table]).sort_index(kind='merge')
    final_table['stat'] = ['mean','std']*7
    final_table = final_table[list(final_table.columns[-2:])+list(final_table.columns[:-2])]
    
    return final_table, means_table

## Reading the data
### New version

In [74]:
table_raw, names, column_names, noobs = read_results('results/all_results96noA.log')

names_latex = format_names(names)
# names_latex = format_names(names)
names_latex = names
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=column_names)
short_tab96_noA= format_very_short(AllTab)
short_tab96_noA['N'] = np.array(noobs)
short_tab96_noA.round(3).to_stata('results/all_results96_noA.dta')
short_tab96_noA

Unnamed: 0,shareC,shareDD,shareH,shareS,cat,var,N
0,1.290154,-0.507585,0.206385,0.011046,Raw,1.27499,430272
1,1.123429,-0.367165,0.237267,0.006469,LTU,1.464209,430272
2,0.723769,0.089989,0.185434,0.000809,STU,2.272735,656600
3,0.577915,0.242369,0.174557,0.005159,STU+SpellAdj,2.846325,656600
4,0.56952,0.181201,0.249761,-0.000483,NE(NOSpellAdj),2.888282,844674
5,0.499273,0.266956,0.230164,0.003607,NE+SpellAdj,3.29466,844674
6,0.500506,0.275227,0.220459,0.003809,STU+Recalls+SpellAdj,3.286546,794578


In [32]:
table_raw, names, column_names, noobs = read_results('results/all_results96noA_x.log')

names_latex = format_names(names)
# names_latex = format_names(names)
names_latex = names
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=column_names)
short_tab96_noA_x= format_very_short(AllTab)
short_tab96_noA_x['N'] = np.array(noobs)
short_tab96_noA_x.round(3).to_stata('results/all_results96_noA_x.dta')
short_tab96_noA_x

Unnamed: 0,shareC,shareDD,shareH,shareS,cat,var,N
0,1.290154,-0.521582,0.184311,0.047117,Raw,1.27499,430272
1,1.123429,-0.379343,0.214822,0.041092,LTU,1.464209,430272
2,0.723768,0.064751,0.155446,0.056035,STU,2.272735,656600
3,0.577915,0.220057,0.139812,0.062216,STU+SpellAdj,2.846325,656600
4,0.56952,0.15964,0.219797,0.051042,NE(NOSpellAdj),2.888282,844674
5,0.499273,0.249339,0.202666,0.048723,NE+SpellAdj,3.29466,844674
6,0.500506,0.257077,0.195543,0.046875,STU+Recalls+SpellAdj,3.286546,794578


In [4]:
table_raw, names, column_names, noobs = read_results('results/all_results.log')

names_latex = format_names(names)
# names_latex = format_names(names)
names_latex = names
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=column_names)
short_tab = format_very_short(AllTab)
short_tab['N'] = np.array(noobs)
# AllTab.round(3).to_csv('results/all_results.csv')
short_tab.round(3).to_stata('results/all_results.dta')

In [37]:
table_raw, names, column_names, noobs = read_results('results/all_results96.log')

names_latex = format_names(names)
# names_latex = format_names(names)
names_latex = names
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=column_names)
short_tab96 = format_very_short(AllTab)
short_tab96['N'] = np.array(noobs)
# AllTab.round(3).to_csv('results/all_results.csv')
short_tab.round(3).to_stata('results/all_results96.dta')

In [5]:
table_raw, names, column_names, noobs = read_results('results/all_results84.log')

names_latex = format_names(names)
# names_latex = format_names(names)
names_latex = names
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=column_names)
short_tab84 = format_very_short(AllTab)
short_tab84['N'] = np.array(noobs)
# AllTab.round(3).to_csv('results/all_results.csv')
short_tab.round(3).to_stata('results/all_results84.dta')

In [10]:
table_raw, names, column_names, noobs = read_results('results/all_results_noA.log')

names_latex = format_names(names)
# names_latex = format_names(names)
names_latex = names
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=column_names)
short_tab_noA= format_very_short(AllTab)
short_tab_noA['N'] = np.array(noobs)
short_tab_noA.round(3).to_stata('results/all_results_noA.dta')

Business cycles

In [60]:
table_raw, names, column_names, noobs = read_results('results/BC_all_noA.log')

names_latex = names
var_names, year_names = format_column_names(column_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['period'] = list(itertools.chain.from_iterable(year_names))
short_tab['N'] = np.array(noobs)
short_tab.round(3).to_stata('results/BC_noA_results.dta')

In [61]:
table_raw, names, column_names, noobs = read_results('results/BC_all_noA_x.log')

names_latex = names
var_names, year_names = format_column_names(column_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['period'] = list(itertools.chain.from_iterable(year_names))
short_tab['N'] = noobs
short_tab.round(3).to_stata('results/BC_noA_results_x.dta')

In [58]:
table_raw, names, column_names, noobs = read_results('results/BC_all.log')

names_latex = names
var_names, year_names = format_column_names(column_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['period'] = list(itertools.chain.from_iterable(year_names))
short_tab['N'] = np.array(noobs)
short_tab.round(3).to_stata('results/BC_results.dta')

In [13]:
table_raw, names, column_names, noobs = read_results('results/BC_educ.log')

names_latex = names
var_names, educ_names = format_column_names(column_names)
data_names = np.array(educ_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['educ'] = data_names[:,1]
short_tab['period'] = data_names[:,0]
short_tab.round(3).to_stata('results/results_BC_educ.dta')

In [14]:
table_raw, names, column_names, noobs = read_results('results/BC_educ_noA.log')

names_latex = names
var_names, educ_names = format_column_names(column_names)
data_names = np.array(educ_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['educ'] = data_names[:,1]
short_tab['period'] = data_names[:,0]
short_tab.round(3).to_stata('results/results_BC_educ_noA.dta')

Characteristics breakdown - sex

In [79]:
table_raw, names, column_names, noobs = read_results('results/all_results_sex_96_noA.log')

names_latex = names
var_names, sex_names = format_column_names(column_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['gender'] = list(itertools.chain.from_iterable(sex_names))
short_tab['N'] = np.array(noobs)
short_tab.round(3).to_stata('results/results_sex_96_noA.dta')
short_tab

Unnamed: 0,shareC,shareDD,shareH,shareS,cat,var,gender,N
0,1.347545,-0.535927,0.181236,0.007146,Raw,1.22069,women,237036
1,1.235186,-0.48209,0.230087,0.016816,Raw,1.33173,men,193236
2,1.219648,-0.438044,0.21458,0.003816,LTU,1.348696,women,237036
3,1.037621,-0.300214,0.25215,0.010443,LTU,1.585293,men,193236
4,0.753076,0.062248,0.18457,0.000106,STU,2.184288,women,357730
5,0.704904,0.121107,0.171528,0.002461,STU,2.333557,men,298870
6,0.598924,0.227794,0.169362,0.003921,STU+SpellAdj,2.746483,women,357730
7,0.575372,0.267643,0.149934,0.00705,STU+SpellAdj,2.858903,men,298870
8,0.533866,0.240055,0.223386,0.002694,STU+Recalls+SpellAdj,3.081174,women,423884
9,0.476811,0.316245,0.201614,0.00533,STU+Recalls+SpellAdj,3.449867,men,370694


In [78]:
table_raw, names, column_names, noobs = read_results('results/all_results_sex_96_noA_x.log')

names_latex = names
var_names, sex_names = format_column_names(column_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['gender'] = list(itertools.chain.from_iterable(sex_names))
short_tab['N'] = np.array(noobs)
short_tab.round(3).to_stata('results/results_sex_96_noA_x.dta')
short_tab

Unnamed: 0,shareC,shareDD,shareH,shareS,cat,var,gender,N
0,1.347545,-0.555567,0.167465,0.040557,Raw,1.22069,women,237036
1,1.235186,-0.491821,0.201818,0.054817,Raw,1.33173,men,193236
2,1.219648,-0.456199,0.202911,0.033641,LTU,1.348696,women,237036
3,1.037621,-0.309131,0.226827,0.044682,LTU,1.585293,men,193236
4,0.753076,0.030056,0.171553,0.045315,STU,2.184288,women,357730
5,0.704904,0.101569,0.139607,0.053919,STU,2.333557,men,298870
6,0.598924,0.197226,0.160052,0.043798,STU+SpellAdj,2.746483,women,357730
7,0.575372,0.253335,0.116275,0.055018,STU+SpellAdj,2.858903,men,298870
8,0.533866,0.214964,0.21513,0.036039,STU+Recalls+SpellAdj,3.081174,women,423884
9,0.476811,0.304645,0.174309,0.044235,STU+Recalls+SpellAdj,3.449867,men,370694


In [40]:
table_raw, names, column_names, noobs = read_results('results/all_results_sex.log')

names_latex = names
var_names, sex_names = format_column_names(column_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['gender'] = list(itertools.chain.from_iterable(sex_names))
short_tab['N'] = np.array(noobs)
short_tab.round(3).to_stata('results/results_sex.dta')

In [47]:
table_raw, names, column_names, noobs = read_results('results/all_results_sex_96.log')

names_latex = names
var_names, sex_names = format_column_names(column_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['gender'] = list(itertools.chain.from_iterable(sex_names))
short_tab['N'] = np.array(noobs)
short_tab.round(3).to_stata('results/results_sex_96.dta')

In [42]:
table_raw, names, column_names, noobs = read_results('results/all_results_sex_noA.log')

names_latex = names
var_names, sex_names = format_column_names(column_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['gender'] = list(itertools.chain.from_iterable(sex_names))
short_tab['N'] = np.array(noobs)
short_tab.round(3).to_stata('results/results_sex_noA.dta')

Characteristics breakdown - education level

In [43]:
table_raw, names, column_names, noobs = read_results('results/all_results_educ.log')

names_latex = names
var_names, educ_names = format_column_names(column_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['educ'] = list(itertools.chain.from_iterable(educ_names))
short_tab['N'] = np.array(noobs)
short_tab.round(3).to_stata('results/results_educ.dta')

In [73]:
table_raw, names, column_names, noobs = read_results('results/all_results_educ_96_noA.log')

names_latex = names
var_names, educ_names = format_column_names(column_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['educ'] = list(itertools.chain.from_iterable(educ_names))
short_tab['N'] = np.array(noobs)
short_tab.round(3).to_stata('results/results_educ_96_noA.dta')
short_tab

Unnamed: 0,shareC,shareDD,shareH,shareS,cat,var,educ,N
0,1.350152,-0.574152,0.210954,0.013047,Raw,1.218333,college,61742
1,1.287753,-0.500536,0.202135,0.010648,Raw,1.277368,nocollege,377158
2,1.150727,-0.407687,0.250758,0.006202,LTU,1.429474,college,61742
3,1.12317,-0.362564,0.233058,0.006336,LTU,1.464545,nocollege,377158
4,0.665265,0.163744,0.170458,0.000533,STU,2.4726,college,118202
5,0.741332,0.073562,0.184347,0.000759,STU,2.218889,nocollege,557084
6,0.566626,0.279353,0.148818,0.005204,STU+SpellAdj,2.903035,college,118202
7,0.583011,0.232864,0.179193,0.004932,STU+SpellAdj,2.821445,nocollege,557084
8,0.508567,0.284105,0.203346,0.003981,STU+Recalls+SpellAdj,3.234449,college,149260
9,0.501574,0.270684,0.224066,0.003676,STU+Recalls+SpellAdj,3.279546,nocollege,667402


In [72]:
table_raw, names, column_names, noobs = read_results('results/all_results_educ_96_noA_x.log')

names_latex = names
var_names, educ_names = format_column_names(column_names)
AllTab = pd.DataFrame(table_raw[1:,:].T, index=names_latex, columns=var_names)
short_tab = format_very_short(AllTab)
short_tab['educ'] = list(itertools.chain.from_iterable(educ_names))
short_tab['N'] = np.array(noobs)
short_tab.round(3).to_stata('results/results_educ_96_noA_x.dta')
short_tab

Unnamed: 0,shareC,shareDD,shareH,shareS,cat,var,educ,N
0,1.350152,-0.685207,0.168341,0.166714,Raw,1.218333,college,61742
1,1.287753,-0.51668,0.181959,0.046969,Raw,1.277368,nocollege,377158
2,1.150727,-0.50619,0.205068,0.150394,LTU,1.429474,college,61742
3,1.12317,-0.377529,0.210282,0.044076,LTU,1.464545,nocollege,377158
4,0.665265,0.038632,0.127746,0.168357,STU,2.4726,college,118202
5,0.741332,0.043732,0.156217,0.058719,STU,2.218889,nocollege,557084
6,0.566626,0.161738,0.110247,0.161389,STU+SpellAdj,2.903035,college,118202
7,0.583011,0.2066,0.140842,0.069547,STU+SpellAdj,2.821445,nocollege,557084
8,0.508567,0.186568,0.160974,0.143891,STU+Recalls+SpellAdj,3.234449,college,149260
9,0.501574,0.248905,0.196271,0.05325,STU+Recalls+SpellAdj,3.279546,nocollege,667402


## Descriptive stats

In [23]:
final_table, means_table = read_descriptive_stats('results/descriptive_stats_part2.log')
final_table.to_stata('results/descriptive_stats.dta')
means_table

Unnamed: 0,duration,duration_last,age,female,college,year_of_spell,last_T,last_P,last_A,before_84,before_92,version
0,248.2181,453.7352,32.30829,0.413735,0.108958,1998.404,0.682072,0.311945,0.005592,0.112318,0.269028,Raw
1,353.977,453.7352,32.30829,0.413735,0.108958,1998.404,0.682072,0.311945,0.005592,0.112318,0.269028,LTU
2,386.1211,495.1598,32.13768,0.417592,0.132442,1997.895,0.634614,0.319946,0.043952,0.133043,0.274082,STU_noAjd
3,354.1303,481.6906,30.71043,0.417592,0.132442,1996.467,0.630408,0.306978,0.061842,0.151393,0.305498,STU
4,290.4843,501.4177,30.35788,0.426858,0.140676,1995.792,0.409996,0.536245,0.050294,0.1661,0.327789,STU_Recalls
5,363.1301,539.248,31.97252,0.426718,0.141452,1997.265,0.423268,0.537649,0.034178,0.149958,0.293736,NE_noAjd
6,295.4518,517.6939,30.18505,0.426718,0.141452,1995.478,0.404219,0.546139,0.045808,0.175789,0.33854,NE


In [24]:
final_table, means_table = read_descriptive_stats('results/descriptive_stats96.log')
final_table.to_stata('results/descriptive_stats96.dta')
means_table

Unnamed: 0,duration,duration_last,age,female,college,year_of_spell,last_T,last_P,last_A,before_84,before_92,version
0,190.7582,363.0519,33.70517,0.449102,0.123443,2004.283,0.708587,0.284964,0.005903,0.0,0.0,Raw
1,245.1106,363.0519,33.70517,0.449102,0.123443,2004.283,0.708587,0.284964,0.005903,0.0,0.0,LTU
2,254.9513,386.4403,33.31425,0.452068,0.150467,2004.082,0.656779,0.298585,0.042615,0.0003,0.002766,STU_noAjd
3,224.1607,376.5824,32.14592,0.452068,0.150467,2002.914,0.63983,0.300376,0.05874,0.000352,0.003824,STU
4,181.1696,377.611,31.85762,0.463655,0.158915,2002.364,0.652266,0.297862,0.047878,0.000574,0.005573,STU_Recalls
5,237.4543,408.9748,33.22313,0.462918,0.159494,2003.68,0.651427,0.310651,0.034013,0.000623,0.005306,NE_noAjd
6,182.1737,392.524,31.73462,0.462918,0.159494,2002.192,0.644419,0.308775,0.04443,0.000673,0.006565,NE


In [25]:
final_table, means_table = read_descriptive_stats('results/descriptive_stats_part2_noA.log')
final_table.to_stata('results/descriptive_stats_noA.dta')

In [26]:
final_table, means_table = read_descriptive_stats('results/descriptive_stats96_noA.log')
final_table.to_stata('results/descriptive_stats96_noA.dta')

In [27]:
final_table, means_table = read_descriptive_stats('results/descriptive_stats84_noA.log')
final_table.to_stata('results/descriptive_stats84_noA.dta')

In [10]:
table_raw = np.zeros((5,11,14))
var_names = []
row_n = 0
table_n = 0
column_names = []
results_follow = 0
names_done = 0
f = open('results/descriptive_stats_part2_BC.log','r')
for line in f:
    words = line.split()
    if len(words)>1 and words[0] == "Variable":
        results_follow=1
        if names_done == 0:
            stats_names = words[2:]       
    elif len(words)>1 and results_follow==1:
        if words[0]!='before92':
            if names_done ==0:
                column_names.append(words[0])
            idx = 0
            for w in words[2:]: 
                w = w.replace(",","")
                table_raw[idx,row_n,table_n] = np.array(w)
                idx+=1
            row_n += 1

        elif words[0]=='before92':
            if names_done ==0:
                column_names.append(words[0])
                names_done = 1 
            idx=0
            for w in words[2:]: 
                w = w.replace(",","")
                table_raw[idx,row_n,table_n] = np.array(w)
                idx+=1
            results_follow=0
            table_n +=1
            row_n = 0
f.close()

column_names_format =  ['duration','duration_last','age','female','college',
              'year_of_spell','last_T','last_P','last_A','before_84','before_92']
table_names_format = ['Raw','LTU', 'STU_noAjd', 'STU','STU_Recalls', 'NE_noAjd', 'NE']*2
year_names = ['2002-2007','2008-2013']*7

means_table = pd.DataFrame(table_raw[1,:,:].T, columns=column_names_format) #, index=names_latex
means_table['version'] = table_names_format
means_table['period'] = year_names
means_table.round(3).to_stata('results/descriptive_stats_means_BC.dta')
stds_table = pd.DataFrame(table_raw[2,:,:].T, columns=column_names_format) #, index=names_latex
stds_table['version'] = table_names_format
stds_table['period'] = year_names

final_table_BC = pd.concat([means_table, stds_table]).sort_index(kind='merge')
final_table_BC['stat'] = ['mean','std']*14
final_table_BC = final_table_BC[list(final_table_BC.columns[-3:])+list(final_table_BC.columns[:-3])]
final_table_BC.to_stata('results/descriptive_stats_BC.dta')
final_table_BC