In [1]:
import pandas as pd
import os

In [2]:
from matplotlib import pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
%matplotlib inline
%config Inlinebackend.figure_format = 'retina'

import seaborn as sns
sns.set_context('poster')
sns.set(rc={'figure.figsize': (16., 9.)})
sns.set_style('whitegrid')
import plotly.express as px
import plotly.graph_objects as go


In [3]:
at_prim = pd.read_csv('../data/psico_atprimaria.csv')

In [5]:
at_prim.sample(3)

Unnamed: 0,Año,CCAA,Sexo,Nivel_Renta,Edad,Casos,CodCA,cod_salud,problema_salud,cei10
79331,2015,Cantabria,Hombre,- 18000,00-14,0,6,P99,Otros problemas psicológicos/mentales,F99
54491,2014,Castilla-Leon,Hombre,desconocida,35-64,0,7,P20,Trastornos de la memoria,F00-F09
88727,2015,Galicia,Hombre,+ 100000,00-14,0,12,P27,Miedo a una enfermedad mental,F99


In [13]:
# 'Trastornos de la ansiedad/estado de ansiedad' -> P74

In [14]:
at_prim.Edad.unique()

array([ 0, 15, 35, 65])

In [15]:
edad = {'00-14':0, '15-34':15, '35-64':35, '65 +':65}

In [16]:
at_prim.Edad = at_prim.Edad.map(edad)

In [17]:
ag_cei10 = at_prim.groupby(['Año','cei10']).agg({"Casos":"sum"}).reset_index()

In [18]:
fig = px.line(ag_cei10, x="Año", y="Casos",color = 'cei10')
fig.show()

In [19]:
at_prim.sample()

Unnamed: 0,Año,CCAA,Sexo,Nivel_Renta,Edad,Casos,CodCA,cod_salud,problema_salud,cei10
60781,2014,Extremadura,Hombre,muy_baja,,0,11,P18,Abuso de fármacos,F10-F19


In [20]:
casos = at_prim.groupby(['Año','CCAA','Sexo','Nivel_Renta','cei10']).agg({'Casos':'sum'}).reset_index()

In [21]:
casos_renta = casos.groupby(['Año','Nivel_Renta']).agg({'Casos':'sum'}).reset_index()

In [22]:
casos_renta.sample(3)

Unnamed: 0,Año,Nivel_Renta,Casos
0,2011,desconocida,4231605
40,2019,muy_baja,1327292
14,2014,desconocida,9842


In [23]:
casos_renta.Nivel_Renta.value_counts()

desconocida    9
+ 100000       8
- 18000        8
18000-99999    8
muy_baja       8
Name: Nivel_Renta, dtype: int64

In [24]:
pob_act = pd.read_csv('../data/pob_activa_acum.csv')
pob_act.sample(3)

Unnamed: 0,sexo,cod_ca,edad,year,miles
2231,Mujeres,19,De 16 a 19 años,2012,0.15
39,Hombres,1,De 25 a 34 años,2020,317.425
765,Hombres,13,De 35 a 44 años,2016,472.85


In [25]:
pob_act['tot'] = pob_act.miles * 1000 

In [26]:
activos = pob_act.groupby(['year']).agg({'tot':'sum'}).astype('int64').reset_index()

In [27]:
activos.sample(3)

Unnamed: 0,year,tot
8,2019,19779375
0,2011,18421900
3,2014,17344075


In [28]:
dic_activos={}
for i,r in activos.iterrows():
    dic_activos[int(r.year)] = int(r.tot)
dic_activos

{2011: 18421900,
 2012: 17632725,
 2013: 17138875,
 2014: 17344075,
 2015: 17866225,
 2016: 18341600,
 2017: 18824775,
 2018: 19327125,
 2019: 19779375,
 2020: 19202525}

In [29]:
fig1 = px.line(casos_renta, x="Año", y="Casos",color = 'Nivel_Renta')
fig1.show()

In [30]:
nivel_renta = at_prim.groupby(['Nivel_Renta','cei10']).agg({'Casos':'sum'}).reset_index()

In [31]:
rent_level = {'+ 100000':4, '- 18000':2, '18000-99999':3, 'desconocida':0, 'muy_baja':1}

In [32]:
# nivel_renta.Nivel_Renta.unique()
# renta_dic = {'+ 100000':5, '- 18000':2, '18000-99999':3, 'desconocida':0, 'muy_baja':1}
# nivel_renta.Nivel_Renta = nivel_renta.Nivel_Renta.map(renta_dic)

In [33]:
nivel_renta["renta"] = nivel_renta.Nivel_Renta.map(rent_level)

In [34]:
fig2 = px.histogram(nivel_renta, x="Nivel_Renta", y="Casos",color = 'cei10') 
fig2.show()

In [35]:
nivel_renta_sex = at_prim.groupby(['Nivel_Renta','Sexo']).agg({'Casos':'sum'}).reset_index()
fig3 = px.histogram(nivel_renta_sex, x="Nivel_Renta", y="Casos",color = 'Sexo') 
fig3.show()