In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.optimize import fsolve

#from f_detrend import detrend_fuction

sn.set_style('whitegrid')
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
# Expanded and modified version (first column now string)
def read_table(filename,keyword='ciclo',rescale=1,ncols=5,start=2,end_row='Total',
               endword=None,omit_list=['t20_21'],index_to_int=True,debug=False):
    t0 = start
    t1 = ncols+start
    results_follow = 0
    row_names = []
    f = open(filename,'r')
    for line in f:
        words = line.split()
        if debug:
            print(words)
        if len(words)>1 and words[0] == keyword:
            results_follow=1
            if endword==None:
                colnames = words[t0:t1]
            else:
                buff_v = []
                for w in words[t0:]:
                    if w==endword:
                        break
                    else:
                        buff_v.append(w)
                colnames = buff_v[:-1]
                t1 = len(buff_v)+t0-1
            table_raw = np.empty(t1-t0)
        elif len(words)>1 and words[0] == end_row:
            break
        elif len(words)>1 and results_follow==1 and words[0] not in omit_list:
            row_names.append(words[0])
            buff_v = []
            for w in words[t0:t1]: 
                w = w.replace(",","")
                buff_v.append(w)
            table_raw = np.vstack((table_raw,np.array(buff_v,dtype=float)/rescale))
    table_raw = table_raw[1:,:]
    if debug:
        print(table_raw.shape)
        print(colnames)
    if index_to_int:
        return pd.DataFrame(table_raw, columns=colnames,index=np.array(row_names,dtype=int))
    else:
        return pd.DataFrame(table_raw, columns=colnames,index=np.array(row_names))
    
    
def file_split(filename,path,startwords=['Summary',],endword="Total",header=None):
    f = open(path+filename,'r')
    parent_file = filename[:filename.find('.')]
    di = 1
    writing = 0
    for line in f:
        words = line.split()
        if len(words)>0 and words[0] in startwords:
            sf = open(path+parent_file+'_{}.log'.format(di), "w")
            writing = 1
            if header!=None:
                sf.write(header)
        elif len(words)>0 and words[0]==endword and writing:
            sf.write(line)
            writing = 0
            sf.close()
            di+=1
        if writing:
            sf.write(line)
            
def normalise_table(table):
    cols = table.columns
    table['Total'] = table.sum(axis=1).copy()
    for col in cols:
        table[col] = table[col]/table['Total']
    return table

In [3]:
path="./descriptive_stats/"
file_split('rznotb_w_age3040_w.log',path,startwords=['ciclo',],endword="Total",header=None)

In [12]:
tab1 = read_table(path+'rznotb_w_age3040_w_1.log',ncols=7)
tab2 = read_table(path+'rznotb_w_age3040_w_2.log',ncols=7)
tab3 = read_table(path+'rznotb_w_age3040_w_3.log',ncols=2)

razones_tab = pd.concat((tab1,tab2,tab3),axis=1)
razones_tab.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
130,0.0,10284250.0,3103128.0,2502995.0,6669225.0,216749.0,0.0,350367.0,327764.0,19762.0,124154.0,0.0,0.0,0.0,217063.0,597388.0
131,0.0,3307175.0,3448442.0,2568949.0,7186888.0,207146.0,0.0,171103.0,431069.0,0.0,44958.0,0.0,0.0,0.0,333021.0,397188.0
132,0.0,38420894.0,4543038.0,2401617.0,6581356.0,76908.0,0.0,99793.0,451928.0,0.0,106603.0,3708.0,7422.0,0.0,242939.0,393749.0
133,8811.0,8544532.0,4693800.0,3016258.0,6981493.0,343105.0,0.0,183598.0,579282.0,19292.0,95760.0,0.0,55349.0,41780.0,118828.0,395698.0
134,0.0,5194325.0,4118224.0,2582654.0,8783282.0,122650.0,0.0,208094.0,770363.0,16070.0,95490.0,0.0,14307.0,0.0,341156.0,290743.0


In [18]:
reason_dict = {"1": "Vacaciones o dias de permiso",
               "2": "Permiso por nacimiento de un hijo",
               "3": "Excedencia por nacimiento de un hijo",
               "4": "Enfermedad, accidente o incapacidad temporal del encuestado",
               "5": "Jornada de verano, horario variable, flexible o similar",
               "6": "Actividades de representación sindical",
               "7": "Nuevo empleo en el que aún no había empezado a trabajar",
               "8": "Fijo discontinuo o trabajador estacional en la época de menor actividad",
               "9": "Mal tiempo",
               "10": "Paro parcial por razones técnicas o económicas",
               "11": "Se encuentra en expediente de regulación de empleo",
               "12": "Huelga o conflicto laboral",
               "13": "Haber recibido enseñanza o formación fuera del establecimiento",
               "14": "Razones personales o responsabilidades familiares",
               "15": "Otras razones",
               "0": "No sabe"}


In [19]:
razones_tab.rename(columns=reason_dict)

Unnamed: 0,No sabe,Vacaciones o dias de permiso,Permiso por nacimiento de un hijo,Excedencia por nacimiento de un hijo,"Enfermedad, accidente o incapacidad temporal del encuestado","Jornada de verano, horario variable, flexible o similar",Actividades de representación sindical,Nuevo empleo en el que aún no había empezado a trabajar,Fijo discontinuo o trabajador estacional en la época de menor actividad,Mal tiempo,Paro parcial por razones técnicas o económicas,Se encuentra en expediente de regulación de empleo,Huelga o conflicto laboral,Haber recibido enseñanza o formación fuera del establecimiento,Razones personales o responsabilidades familiares,Otras razones
130,0.0,10284250.0,3103128.0,2502995.0,6669225.0,216749.0,0.0,350367.0,327764.0,19762.0,124154.0,0.0,0.0,0.0,217063.0,597388.0
131,0.0,3307175.0,3448442.0,2568949.0,7186888.0,207146.0,0.0,171103.0,431069.0,0.0,44958.0,0.0,0.0,0.0,333021.0,397188.0
132,0.0,38420894.0,4543038.0,2401617.0,6581356.0,76908.0,0.0,99793.0,451928.0,0.0,106603.0,3708.0,7422.0,0.0,242939.0,393749.0
133,8811.0,8544532.0,4693800.0,3016258.0,6981493.0,343105.0,0.0,183598.0,579282.0,19292.0,95760.0,0.0,55349.0,41780.0,118828.0,395698.0
134,0.0,5194325.0,4118224.0,2582654.0,8783282.0,122650.0,0.0,208094.0,770363.0,16070.0,95490.0,0.0,14307.0,0.0,341156.0,290743.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,0.0,10171757.0,4874013.0,1296773.0,10164554.0,110818.0,0.0,0.0,439518.0,56365.0,225095.0,1412194.0,0.0,34811.0,352463.0,240331.0
198,0.0,5962781.0,4469208.0,1129296.0,10960711.0,46414.0,0.0,321735.0,916240.0,0.0,209865.0,958616.0,0.0,0.0,341380.0,406092.0
199,0.0,7000605.0,3584324.0,1279472.0,9167567.0,265974.0,0.0,33449.0,385592.0,0.0,86900.0,561791.0,0.0,0.0,240669.0,308279.0
200,0.0,33927828.0,3887585.0,1610042.0,7783679.0,256862.0,0.0,321471.0,1255860.0,0.0,196272.0,354223.0,0.0,40540.0,505024.0,697303.0
