# ANEXO 1: CÓDIGO EN PYTHON PARA REPLICAR, CORREGIR, ADAPTAR...

## 1) Instalaciones e Importaciones de Librerías Previas:

- Instalamos el módulo necesario para leer la base de datos, que se proporcionan en SPSS

In [1]:
!pip install pyreadstat



- Instalamos el módulo necesario para hacer el análisis factorial

In [2]:
!pip install factor-analyzer==0.3.2



In [3]:
!pip install semopy



In [4]:
pip install graphviz

Note: you may need to restart the kernel to use updated packages.


- Importamos las librerías necesarias:

In [106]:
import pandas as pd
import numpy as np
import scipy as sp
import sklearn
import matplotlib.pyplot as plt
import sklearn.preprocessing
from sklearn.linear_model import LinearRegression
from typing import List
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
from factor_analyzer.factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_kmo
from factor_analyzer import (ConfirmatoryFactorAnalyzer, ModelSpecificationParser)
from scipy.stats import chi2, pearsonr
from scipy.optimize import minimize
from sklearn.base import BaseEstimator, TransformerMixin
from factor_analyzer.rotator import Rotator
from factor_analyzer.rotator import POSSIBLE_ROTATIONS, OBLIQUE_ROTATIONS
from sklearn.utils.extmath import randomized_svd
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted
from typing import List
from semopy import Model
from semopy import Optimizer
from semopy.inspector import inspect
import statsmodels.api as sm           
import statsmodels.formula.api as smf
from matplotlib import pyplot             # Permite la generación de gráficos
from mpl_toolkits.mplot3d import Axes3D

In [6]:
import graphviz as gv

In [7]:
import semopy.visualization as sv

## 2) Cargamos bases de datos de la ola 6 y la ola 8 de ELSA

- Ola 6

In [8]:
w6_df: pd.core.frame.DataFrame = pd.read_spss('data/wave_6_elsa_data_v2_mediumfortest.sav', convert_categoricals = False)

- Ola 8

In [9]:
w8_df: pd.core.frame.DataFrame = pd.read_spss('data/wave_8_elsa_data_eul_v2_mediumfortest.sav', convert_categoricals = False)

## 3) Construcción teórica de Cuestionarios

Estructura y columnas de la base de datos general que los componen

### 3.1) Ola 6

- Casp-19 para la ola 6 -> casp19_w6

In [10]:
casp19_w6_control = ['scqola', 'scqolb', 'scqolc', 'scqold']
casp19_w6_autonomy = ['scqole', 'scqolf', 'scqolg', 'scqolh', 'scqoli']
casp19_w6_pleasure = [ 'scqolj', 'scqolk', 'scqoll', 'scqolm', 'scqoln']
casp19_w6_self_realization = ['scqolo', 'scqolp', 'scqolq', 'scqolr', 'scqols']

casp19_w6_scales = [casp19_w6_control, casp19_w6_autonomy, casp19_w6_pleasure, casp19_w6_self_realization]

casp19_original_values = [1, 2, 3, 4]
casp19_reverse_items_values = [0, 1, 2, 3]
casp19_direct_items_values = [3, 2, 1, 0]

* Insatisfacción sexual para la ola 6 (mujeres) -> sexdissa_w6_f

In [11]:
sexdissa_w6_f_cols = ['q3', 'q9', 'q10', 'q14f', 'q15f', 'q19m19f', 'q28m25f', 'q32m26f']
sexdissa_w6_q10_original_values = [1, 2, 3]
sexdissa_w6_q10_transformed_values = [5, 0, 5]

* Insatisfacción sexual para la ola 6 (hombres) -> sexdissa_w6_m

In [12]:
sexdissa_w6_m_cols = ['q3', 'q9', 'q10', 'q13m', 'q17m', 'q19m19f', 'q21m', 'q28m25f', 'q32m26f']

- Isolation para la ola 6 -> isolation_w6_df

Nomenclatura: 
>`vip`: persona/s importantes: children, family y friends.

>`has_vip`: tiene a esa persona/personas importantes. Es decir, tiene niños, familia, amigos.

In [13]:
isolation_w6_chd_items = {
    'has_vip': ['scchd'],
    'kind_of_contact': ['scchdg', 'scchdh', 'scchdi', 'scchdt'],
}
isolation_w6_fam_items = {
    'has_vip': ['scfam'],
    'kind_of_contact': ['scfamg', 'scfamh', 'scfami', 'scfamt'],
}
isolation_w6_frd_items = {
    'has_vip': ['scfrd'],
    'kind_of_contact': ['scfrdg', 'scfrdh', 'scfrdi', 'scfrdt'],
}

isolation_w6_org_contact = 'scorg09'
isolation_w6_prt_contact = 'scprtr'

isolation_w6_relatives = [isolation_w6_chd_items, isolation_w6_fam_items, isolation_w6_frd_items]

* multimorbidity

Si la persona confirma que la enfermedad registrada en el pasado está correctamente registrada (hedac-/hedbd- = yes) y que aún la tiene (hedas-/hedbs- = yes) o bien desmiente que le tuviera (hedac-/hedbd- = No) pero dice tenerla ahora ((hedan-/hedbm- = 3) o la reporta ahora (hedia-/hedib- = 1 "mentioned") entonces se imputará un uno sobre esa enfermedad. Si, en suma, hay más de un uno se asignará un uno a la variable de comorbilidad. 

In [14]:
cvd = ['bp', 'an', 'mi', 'hf', 'hm', 'ar', 'di', 'st', 'ch', '95']
chr_ = ['lu', 'as', 'ar', 'os', 'ca', 'pd', 'ps', 'ad', 'de', 'bl'] # ps: psychiatric
psy = ['ha', 'an', 'de', 'em', 'sc', 'ps', 'mo', 'ma']

lex_diagnosed_fedforward_cvd = 'hedaw' 
lex_diagnosed_fedforward_chr = 'hedbw'
# hedaw- y hedbw- puede que no las necesite

lex_what_psy = 'hepsy'

lex_confirm_cvd = 'hedac'
lex_confirm_chr = 'hedbd'

lex_still_has_cvd = 'hedas'
lex_still_has_chr = 'hedbs'

lex_dispute_cvd = 'hedan'
lex_dispute_chr = 'hedbm'
       
lex_new_cvd = 'hedia'
lex_new_chr = 'hedib'

In [15]:
diagnosed_fedforward_cvd = [lex_diagnosed_fedforward_cvd + i for i in cvd]
diagnosed_fedforward_chr = [lex_diagnosed_fedforward_chr + i for i in chr_]

what_psy = [lex_what_psy + i for i in psy]

still_has_cvd = [lex_still_has_cvd + i for i in cvd]
still_has_chr = [lex_still_has_chr + i for i in chr_]


confirm_cvd = [lex_confirm_cvd + i for i in cvd]
confirm_chr = [lex_confirm_chr + i for i in chr_]


dispute_cvd = [lex_dispute_cvd + i for i in cvd]
dispute_chr = [lex_dispute_chr + i for i in chr_]
       
new_cvd = [lex_new_cvd + i for i in cvd]
new_chr = [lex_new_chr + i for i in chr_]

### 3.2) Ola 8

- Casp-19 para la ola 8 -> casp19_w8

In [16]:
casp19_w8_control = ['scqola', 'scqolb', 'scqolc', 'scqold']
casp19_w8_autonomy = ['scqole', 'scqolf', 'scqolg', 'scqolh', 'scqoli']
casp19_w8_pleasure = [ 'scqolj', 'scqolk', 'scqoll', 'scqolm', 'scqoln']
casp19_w8_self_realization = ['scqolo', 'scqolp', 'scqolq', 'scqolr', 'scqols']

casp19_w8_scales = [casp19_w6_control, casp19_w6_autonomy, casp19_w6_pleasure, casp19_w6_self_realization]

casp19_original_values = [1, 2, 3, 4]
casp19_reverse_items_values = [0, 1, 2, 3]
casp19_w8_direct_items_values = [3, 2, 1, 0]

* Insatisfacción sexual para la ola 8 (mujeres) -> sexdissa_w8_f

In [17]:
sexdissa_w8_f_cols = ['sxwdy', 'sxwofy', 'sxhaf', 'sxfpdy', 'sxfway', 'sxwoey', 'sxsosy', 'sxwsty']

* Insatisfacción sexual para la ola 8 (hombres) -> sexdissa_w8_m

In [18]:
sexdissa_w8_m_cols = ['sxwdy', 'sxwofy', 'sxhsaf', 'sxmwey', 'sxmcoy', 'sxwoey', 'sxsosy', 'sxwsty']

* multimorbidity

Las variables se llaman igual que en la ola 6

## 4) Tratamiento de valores perdidos de los primeros cuestionarios:

Lo hacemos de esta manera para teneer la mayor cantidad de sujetos posible a la hora de hacer los análisis factoriales 

### 4.1) Ola 6

- Casp19 e Isolation: **"-2": "schedule not aplicable"** :  *Cuestionarios, compuestos por sus ítems, que van a ser revisados para eliminar sujetos que no formaban parte de la muestra en estos módulos de ELSA. 1604 sujetos no han recibido este módulo del cuestionario* 

#### Columnas, pertenecientes a cuestionarios, que van a ser limpiadas

In [19]:
isolation_w6_cols = ['scchdg', 'scchdh', 'scchdi', 'scchdt', 
                     'scfamg', 'scfamh', 'scfami', 'scfamt', 
                     'scfrdg', 'scfrdh', 'scfrdi', 'scfrdt', 
                     'scorg09', 
                     'scprtr']
casp19_w6_cols = ['scqola', 'scqolb', 'scqolc', 'scqold', 'scqole', 
               'scqolf', 'scqolg', 'scqolh', 'scqoli', 'scqolj', 
               'scqolk', 'scqoll', 'scqolm', 'scqoln', 'scqolo', 
               'scqolp', 'scqolq', 'scqolr', 'scqols']

In [20]:
cols_to_clean_out_of_sample_w6 = isolation_w6_cols + casp19_w6_cols

#### sujetos seleccionados para ser eliminados 

traducción: 
> Constrúyeme una variable (llamada sbj_out_of_sample) que sea igual a: me localizas en el dataframe w6_df `w6_df.loc` lo que te voy a decir: `[` construye un pandas dataframe `pd.DataFrame` con lo siguiente `(`: mira cuales cumplen `==` que son un elemento del w6_df igual a -2 `)` (eso devuelve un True o False) y hazlo para `for` cada elemento `col` en `in` la variable cols_to_clean_out_of_sample (ya definida). Hazlo si en esta lista `[]` lo hay en ALGUNA (de las columnas)`any()`. Bien, cuando lo tengas localizado `]`, coge el índice `.index()`.

In [21]:
sbj_out_of_sample_w6 = w6_df.loc[pd.DataFrame([(w6_df[col] == -2) for col in cols_to_clean_out_of_sample_w6]).any()].index
sbj_out_of_sample_w6 # Resultado: Int64Index([5, 9, 16, 17, 18, 19, 62, 65, 71, 89, 91, 93, 96], dtype='int64')

Int64Index([5, 9, 16, 17, 18, 19, 62, 65, 71, 89, 91, 93, 96], dtype='int64')

#### Construimos el dataframe sin los sujetos
traducción:
>Construye una variable (tipo df por cómo se compone) que sea: el dataframe w6_df quitándole `.drop` los sujetos seleccionados.

In [22]:
w6_without_sbj_out_of_sample_df =  w6_df.drop(sbj_out_of_sample_w6)
w6_without_sbj_out_of_sample_df #del 88 pasa al 90 y luego al 92 porque el 89 y el 91 se eliminaron

Unnamed: 0,idauniq,idahhw6,perid,samptyp,w6indout,W6hhout,couple,cpid,chinhh,chouthh,...,w6nssec8,w6nssec5,w6nssec3,W6SIC,w6soc,W6SEC,w6sic2003,w6soc2000,w6nssec13,finstatw6
0,103890.0,11743.0,1.0,1.0,11.0,120.0,1.0,2.0,2.0,1.0,...,2.0,1.0,1.0,0.0,0.0,0.0,-1.0,35.0,4.3,1.0
1,103895.0,16260.0,1.0,1.0,11.0,110.0,1.0,2.0,1.0,1.0,...,5.0,4.0,3.0,0.0,0.0,0.0,-1.0,92.0,10.0,1.0
2,103960.0,14747.0,1.0,1.0,11.0,110.0,1.0,2.0,2.0,1.0,...,7.0,5.0,3.0,0.0,0.0,0.0,-1.0,62.0,13.3,1.0
3,104192.0,15705.0,1.0,1.0,11.0,110.0,1.0,2.0,2.0,1.0,...,4.0,3.0,2.0,0.0,0.0,0.0,-1.0,51.0,9.2,1.0
4,104194.0,16507.0,1.0,1.0,11.0,110.0,1.0,2.0,2.0,2.0,...,6.0,5.0,3.0,0.0,0.0,0.0,-1.0,41.0,12.6,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,165305.0,11882.0,1.0,1.0,11.0,110.0,1.0,2.0,2.0,1.0,...,5.0,4.0,3.0,1.0,1.0,1.0,63.0,91.0,10.0,25.0
90,165365.0,14517.0,1.0,1.0,11.0,110.0,3.0,-1.0,1.0,2.0,...,6.0,5.0,3.0,1.0,1.0,1.0,80.0,61.0,12.7,25.0
92,165537.0,11512.0,2.0,1.0,11.0,110.0,1.0,1.0,2.0,1.0,...,1.0,1.0,1.0,0.0,0.0,0.0,-1.0,11.0,2.0,25.0
94,165858.0,10999.0,3.0,-1.0,11.0,110.0,1.0,1.0,2.0,1.0,...,2.0,1.0,1.0,1.0,1.0,1.0,74.0,35.0,4.3,29.0


- Isolation: **kind_of_contact = "-1" "item not applicable" y has_vip = "-9" "not answered"**: *sujetos que van a ser eliminados porque no contestaron a si tenían VIP y el ítem de tipo de contacto no es aplicable*

#### Columnas que se van a inspeccionar para ver qué sujetos cumplen las condiciones para ser eliminados:

In [23]:
cols_to_clean_didnt_awr_hasvip_w6 = ['scchd', 'scfam', 'scfrd']
cols_to_clean_kind_of_contact_not_applicable_w6 = [['scchdg', 'scchdh', 'scchdi', 'scchdt'], 
                                                ['scfamg', 'scfamh', 'scfami', 'scfamt'], 
                                                ['scfrdg', 'scfrdh', 'scfrdi', 'scfrdt']]

#### Función que vamos a utilizar para eliminar sujetos
Traducción: 
> Define `def` la función (llamada *delete_sbj_didnt_awr_kind_of_contact_not_applicable*) que para cada sujeto devuelva un buleano `bool` (True o False). Hazlo (devolver True) si se cumple que en algún `any` caso el sujeto no contesta (-9) a si tiene personas importantes (vip: friends, children, family) `sbj[has_vip] == -9` y a la vez `and` si alguno `any` de los sujetos tiene en la *columna* un valor de -1, lo haces para cada `[i]` lista de columnas de las *cols_to_clean_kind_of_contact_not_applicable_w6*. Y ahora, hazme todo eso que te he dicho con cada elemento `i` de has_vip y con cada elemento de *cols_to_clean_didnt_awr_hasvip_w6* dándome indice y el elemento de la lista (esto es lo que hace *enumerate*: te asocia cada i de has_vip con cada elemento de la lista a la que se refiere enumerate) 

In [24]:
def is_sbj_didnt_awr_kind_of_contact_not_applicable(sbj) -> bool:
    return any([
        sbj[has_vip] == -9 and any([
            sbj[col] == -1 for col in cols_to_clean_kind_of_contact_not_applicable_w6[i]])
        for i, has_vip in enumerate(cols_to_clean_didnt_awr_hasvip_w6)])

reducimos de nuevo el dataframe, sobre el que ya estaba reducido, esta vez para eliminar los nuevos sujetos seleccionados:

#### Construimos El data frame is_sbj_didnt_awr_kind_of_contact_not_applicable_w6_df aplicando 
Traducción: 
> 

In [25]:
is_sbj_didnt_awr_kind_of_contact_not_applicable_w6_df = w6_without_sbj_out_of_sample_df.apply(
                                                            is_sbj_didnt_awr_kind_of_contact_not_applicable
                                                            ,
                                                            axis=1)
w6_without_sbj_didnt_awr_kind_of_contact_not_applicable_df = w6_without_sbj_out_of_sample_df.loc[
    is_sbj_didnt_awr_kind_of_contact_not_applicable_w6_df == False]

In [26]:
# w6_without_sbj_didnt_awr_kind_of_contact_not_applicable_df

- Isolation: **has_vip = "1" "yes" y kind_of_contact = "-9" "not answered"**: *sujetos que tienen vip pero no contestan a alguna de las preguntas. Tenemos que calcular su puntuación basándonos en el resto de respuestas.*

In [27]:
def sustitution_missing_kind_of_contact(sbj,scale: dict) -> [0, 1]: 
    return 0 if (any([
        sbj[scale['kind_of_contact']] == -9 and any([
            sbj[col] == 0 for col in kind_of_contact])])) else 1 if any([
                sbj[scale['kind_of_contact']] == -9 and all([
                    sbj[col] == 1 for col in kind_of_contact])
                ]) else 0 #16

- Isolation: **Has_vip = "2" "No" y kind_of_contact = "-1" "item not applicable"**: *sujetos que no tienen vip y lógicamente no tienen ningún tipo de contacto con ese vip. Debemos imputar directamente un punto de isolation para esta subescala. El tratamiento se hace directamente en la construcción del cuestionario.* 

- Eliminamos a los sujetos menores de 50 años

In [28]:
#18

- En CASP-19 no han contestado a alguna de las preguntas (-9) dentro de las subescalas

In [29]:
#19

#### ( ! )  Base de datos limpiada de valores perdidos de los primeros cuestionarios de ola 6

In [30]:
w6_first_cleaned_df = w6_without_sbj_didnt_awr_kind_of_contact_not_applicable_df  #20

In [31]:
len(w6_first_cleaned_df)

84

### 4.2) Ola 8 

- Casp-19: **"-2": "Self-completion instrument not completed"** :  *Cuestionarios, compuestos por sus ítems, que van a ser revisados para eliminar sujetos que no formaban parte de la muestra en estos módulos de ELSA. 1223 sujetos no han recibido este módulo del cuestionario* 

#### Columnas, pertenecientes al cuestionario, que van a ser limpiadas

In [32]:
casp19_w8_cols = ['scqola', 'scqolb', 'scqolc', 'scqold', 'scqole', 
               'scqolf', 'scqolg', 'scqolh', 'scqoli', 'scqolj', 
               'scqolk', 'scqoll', 'scqolm', 'scqoln', 'scqolo', 
               'scqolp', 'scqolq', 'scqolr', 'scqols']

In [33]:
cols_to_clean_out_of_sample_w8 = casp19_w8_cols

#### sujetos seleccionados para ser eliminados 

traducción: 
> Constrúyeme una variable (llamada sbj_out_of_sample) que sea igual a: me localizas en el dataframe w8_df `w8_df.loc` lo que te voy a decir: `[` construye un pandas dataframe `pd.DataFrame` con lo siguiente `(`: mira cuales cumplen `==` que son un elemento del w8_df igual a -2 `)` (eso devuelve un True o False) y hazlo para `for` cada elemento `col` en `in` la variable cols_to_clean_out_of_sample_w8 (ya definida). Hazlo si en esta lista `[]` lo hay en ALGUNA (de las columnas)`any()`. Bien, cuando lo tengas localizado `]`, coge el índice `.index()`.

In [34]:
sbj_out_of_sample_w8 = w8_df.loc[pd.DataFrame([(w8_df[col] == -2) for col in cols_to_clean_out_of_sample_w8]).any()].index
sbj_out_of_sample_w8 

Int64Index([9, 20, 23, 28, 35, 39, 47, 50, 51, 52, 57, 62, 63, 64, 66, 68, 75,
            76],
           dtype='int64')

#### Construimos el dataframe sin los sujetos
traducción:
>Construye una variable (tipo df por cómo se compone) que sea: el dataframe w8_df quitándole `.drop` los sujetos seleccionados.

In [35]:
w8_without_sbj_out_of_sample_df =  w8_df.drop(sbj_out_of_sample_w8)
w8_without_sbj_out_of_sample_df.head()

Unnamed: 0,idauniq,idahhw8,perid,samptyp,finstat,w8indout,w8hhout,w8scout,w8sscout,w8nurout,...,w8nssec5,w8nssec3,w8soc2000r,w8sic2003r,gor,w8w1lwgt,w8w4lwgt,w8xwgt,w8scwt,w8sscwt
0,117573.0,10116.0,1.0,1.0,1.0,11.0,110.0,1.0,1.0,81.0,...,1.0,1.0,35.0,-1.0,E12000008,1.736654,2.036621,1.362483,1.589579,1.885873
1,161881.0,10118.0,2.0,1.0,14.0,11.0,110.0,1.0,1.0,81.0,...,5.0,3.0,62.0,-1.0,E12000006,,1.274136,6.554409,6.735195,5.763463
2,151416.0,10134.0,1.0,1.0,7.0,11.0,110.0,1.0,1.0,81.0,...,4.0,3.0,62.0,-1.0,E12000007,,1.559035,1.590436,1.631708,1.31657
3,120968.0,10219.0,2.0,1.0,1.0,11.0,110.0,1.0,3.0,81.0,...,5.0,3.0,71.0,-1.0,E12000001,1.03025,0.793539,0.670783,0.627842,
4,160230.0,10220.0,1.0,1.0,14.0,11.0,110.0,1.0,1.0,-1.0,...,1.0,1.0,32.0,-1.0,E12000005,,1.05026,0.436357,0.410924,0.374231


- Eliminamos a los sujetos menores de 50 años

In [36]:
#27

- En CASP-19 no han contestado a alguna de las preguntas (-9) dentro de las subescalas

In [37]:
#28

#### ( ! )  Base de datos limpiada de valores perdidos de los primeros cuestionarios de ola 8

In [39]:
w8_first_cleaned_df = w8_without_sbj_out_of_sample_df  # sustituir por la última reducción del dataframe

In [40]:
len(w8_first_cleaned_df)

59

 ## 5) Construcción funcional de los cuestionarios Casp-19, isolation y multimorbidity.

### 5.1) Ola 6

- Cuestionario CASP-19 en ola 6

#### Especificamos los ítems directos e inversos

In [41]:
casp19_reverse_items = [
    'scqola', 
    'scqolb',
    'scqold', 
    'scqolf',
    'scqolh', 
    'scqoli',
]
casp19_direct_items = [
    'scqolc', 
    'scqole', 
    'scqolg',
    'scqolj',
    'scqolk',
    'scqoll',
    'scqolm',
    'scqoln', 
    'scqolo', 
    'scqolp',
    'scqolq', 
    'scqolr',
    'scqols',
]

#### Construinmos las funciones que aplicaremos a los sujetos para crear el cuestionario

Debe tenerse en cuenta que los valores originales en la base de datos están codificados como 1 = often, 2 = Sometimes, 3 = No often y 4 = Never; y el cuestionario debe ofrecer una puntuación mayor cuanto mayor sea la calidad de vida del sujeto. De tal manera que un ítem inverso como *I feel that what happens to me is out of my control* (calificado como inverso) si es respondido con *often* se le ha asignado un valor de 1 en la base de datos. 

Traducción
> Definimos la función que obtiene las puntuaciones para los ítems inversos cuando ofrecemos un valor `x` que podrá ser un 1, 2, 3 o 4 `:[1, 2, 3, 4]` y obtendremos en su lugar '->' una puntuación para el ítem de 0, 1, 2 o 3 `:`. devolverá `return` ese valor menos uno. 

In [42]:
def get_reverse_item(x: [1, 2, 3, 4]) -> [0, 1, 2, 3]:
    return x - 1

Traducción
> Definimos la función que obtiene las puntuaciones para los ítems directos cuando ofrecemos un valor `x` que podrá ser un 1, 2, 3 o 4 `:[1, 2, 3, 4]` y obtendremos en su lugar '->' una puntuación para el ítem de 0, 1, 2 o 3 `:`. En este caso devolverá un 0 donde había un 4, un 1 donde había un 3, etc. 

In [43]:
def get_direct_item(x: [1, 2, 3, 4]) -> [3, 2, 1, 0]:
    return (x - 4) * (-1)

Traducción
> Definimos la función que calculará la puntuación del cuestionario CASP-19 para la ola 6 que dará como resultado un único valor entero comprendido entre cero y 57. 
Esta función devuelve `return` la suma de dos elementos: El primero se obtiene de aplicar la función get_reverse_ item a cada valor *reverse_item* de cada sujeto para cada valor de item reverso de la lista de items inversos. El segundo elemento de la suma se obtiene de aplicar la función get_direct_item a cada valor *direct_item* de cada sujeto para cada ítem directo de la lista de ítems directos del CASP-19 de la ola 6

Función para obtener los tems transformados del cuestionario CASP-19_w6

In [44]:
def transform_casp19_items(sbj) -> List[int]:  # int -> [0, 1, 2, 3]
    return ([get_reverse_item(sbj[reverse_item]) for reverse_item in casp19_reverse_items] +
     [get_direct_item(sbj[direct_item]) for direct_item in casp19_direct_items])

Función para calcular el valor del cuestionario CASP-19

In [45]:
def calc_casp19(sbj) -> list(range(57)):
    return sum([get_reverse_item(sbj[reverse_item]) for reverse_item in casp19_reverse_items] +
     [get_direct_item(sbj[direct_item]) for direct_item in casp19_direct_items])

#### Obtenemos los items del cuestionario CASP-19

In [46]:
items_casp19_w6_df = w6_first_cleaned_df.apply(transform_casp19_items, axis=1, result_type='expand')

#### Creamos el cuestionario aplicando las funciones para su creación

In [47]:
casp19_w6 = w6_first_cleaned_df.apply(calc_casp19, axis=1)

In [48]:
casp19_w6

0     53.0
1     35.0
2     49.0
3     48.0
4     41.0
      ... 
88    40.0
90    53.0
92    42.0
94    45.0
95    38.0
Length: 84, dtype: float64

- Isolation para la ola 6 -> isolation_w6_df   

In [49]:
#59

#### Función que dicotomiza una respuesta likert 

In [50]:
def dichotomize_item(likert_answer: int) -> [0, 1]: 
       return 0 if likert_answer in range(1, 3 + 1) \
        else 1 if likert_answer in range(4, 6 + 1) else 0 #29

#### Funcion que reduce las subescalas con múltiples items a subescalas con un solo item y además es dicotómica

In [51]:
def reduce_scale_to_one_item(sbj, scale: dict) -> [0, 1]: 
    if sbj[scale['has_vip']][0] == 2: #17
        return 1
    else:
        relative_inters = [dichotomize_item(sbj[item]) for item in scale['kind_of_contact']]  
        relative = 1 if all([relative_inter == 1 for relative_inter in relative_inters]) else 0
    return relative

#### Funcion que dicotomiza una respuesta simple, dicotómica, a una respuesta dicotomica (añadiendo un "y si encuentras otra cosa dame un cero")

In [52]:
def reduce_simple_subscale(sbj, item) -> [0, 1]: 
    return sbj[item] if sbj[item] in [0, 1] else 0 #29

Traducción: 
> Define la función llamada calc_isolation_w6 para cada sujeto `sbj`. El resultado será 0, 1, 2, 3, 4 o 5. 
> Relatives será el resultado de aplicar la función reduce_scale_to_one_item a cada sujeto `sbj` de cada elemento de isolation_w6_relatives `x` en `for` cada 
> elemento `x` de `in` isolation_w6_relatives
> Isolation será la suma de 

#### Función que calcula el valor de isolation

Traducción:
> Definimos `relatives` como _lista que resulta de que apliques esta función_: aplica _reduce_scale_to_one_item_ a cada sujeto `sbj` para cada elemento `x` de _isolation_w6_relatives_ 
> _isolation_w6_relatives_ es la _scale_ que te estoy dando como parámetro de la función _reduce_scale_to_one_item_ (más arriba)
    

In [53]:
def calc_items_isolation_w6(sbj):
    relatives = [reduce_scale_to_one_item(sbj, x) for x in isolation_w6_relatives] 
    return relatives + [reduce_simple_subscale(sbj, isolation_w6_org_contact)] + [reduce_simple_subscale(sbj, isolation_w6_prt_contact)]

In [54]:
def calc_isolation_w6(sbj) -> [0, 1, 2, 3, 4, 5]:
    isolation = sum(calc_items_isolation_w6(sbj))
    return isolation

In [55]:
items_isolation_w6_df = w6_first_cleaned_df.loc[(w6_first_cleaned_df['indager'] >= 50)].apply(calc_items_isolation_w6, axis=1, result_type='expand')

In [56]:
isolation_w6_df = w6_first_cleaned_df.loc[(w6_first_cleaned_df['indager'] >= 50)].apply(calc_isolation_w6, axis=1)

In [57]:
items_isolation_w6_df

Unnamed: 0,0,1,2,3,4
0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,1.0
4,1.0,1.0,0.0,0.0,1.0
...,...,...,...,...,...
88,0.0,0.0,0.0,1.0,1.0
90,0.0,1.0,0.0,1.0,0.0
92,0.0,0.0,0.0,0.0,1.0
94,0.0,1.0,0.0,1.0,1.0


In [58]:
isolation_w6_df.head()

0    1.0
1    1.0
2    1.0
3    1.0
4    3.0
dtype: float64

* multimorbidity

In [59]:
def calc_morbidity_cvd (sbj) -> [0, 1]:
    return 1 if ((i == 1 for i in enumerate[confirm_cvd]) and (i == 1 for i in enumerate[still_has_cvd])) or ((i == 2 for i in enumerate[confirm_cvd]) and (i == 3 for i in enumerate[dispute_cvd])) or ((i == 2 for i in enumerate[confirm_cvd]) and (i == 2 for i in enumerate[dispute_cvd])) or (i == 1 for i in enumerate[new_cvdelse]) else 0

CVD

In [60]:
def calc_morbidity_cvd_bp (sbj) -> [0, 1]:
    return 1 if (('hedacbp' == 1) and ('hedasbp' == 1)) or (('hedacbp' == 2) and ('hedanbp' == 2)) or (('hedacbp' == 2) and ('hedanbp' == 3)) or ('hediabp' == 1) else 0

In [61]:
def calc_morbidity_cvd_an (sbj) -> [0, 1]:
    return 1 if (('hedacan' == 1) and ('hedasan' == 1)) or (('hedacan' == 2) and ('hedanan' == 2)) or (('hedacan' == 2) and ('hedanan' == 3)) or ('hediaan' == 1) else 0

In [62]:
def calc_morbidity_cvd_mi (sbj) -> [0, 1]:
    return 1 if (('hedacmi' == 1) and ('hedasmi' == 1)) or (('hedacmi' == 2) and ('hedanmi' == 2)) or (('hedacmi' == 2) and ('hedanmi' == 3)) or ('hediami' == 1) else 0

In [63]:
def calc_morbidity_cvd_hf (sbj) -> [0, 1]:
    return 1 if (('hedachf' == 1) and ('hedashf' == 1)) or (('hedachf' == 2) and ('hedanhf' == 2)) or (('hedachf' == 2) and ('hedanhf' == 3)) or ('hediahf' == 1) else 0

In [64]:
def calc_morbidity_cvd_hm (sbj) -> [0, 1]:
    return 1 if (('hedachm' == 1) and ('hedashm' == 1)) or (('hedachm' == 2) and ('hedanhm' == 2)) or (('hedachm' == 2) and ('hedanhm' == 3)) or ('hediahm' == 1) else 0

In [65]:
def calc_morbidity_cvd_ar (sbj) -> [0, 1]:
    return 1 if (('hedacar' == 1) and ('hedasar' == 1)) or (('hedacar' == 2) and ('hedanar' == 2)) or (('hedacar' == 2) and ('hedanar' == 3)) or ('hediaar' == 1) else 0

In [66]:
def calc_morbidity_cvd_di (sbj) -> [0, 1]:
    return 1 if (('hedacdi' == 1) and ('hedasdi' == 1)) or (('hedacdi' == 2) and ('hedandi' == 2)) or (('hedacdi' == 2) and ('hedandi' == 3)) or ('hediadi' == 1) else 0

In [67]:
def calc_morbidity_cvd_st (sbj) -> [0, 1]:
    return 1 if (('hedacst' == 1) and ('hedasst' == 1)) or (('hedacst' == 2) and ('hedanst' == 2)) or (('hedacst' == 2) and ('hedanst' == 3)) or ('hediast' == 1) else 0

In [68]:
def calc_morbidity_cvd_ch (sbj) -> [0, 1]:
    return 1 if (('hedacch' == 1) and ('hedasch' == 1)) or (('hedacch' == 2) and ('hedanch' == 2)) or (('hedacch' == 2) and ('hedanch' == 3)) or ('hediach' == 1) else 0

In [69]:
def calc_morbidity_cvd_95 (sbj) -> [0, 1]:
    return 1 if (('hedac95' == 1) and ('hedas95' == 1)) or (('hedac95' == 2) and ('hedan95' == 2)) or (('hedac95' == 2) and ('hedan95' == 3)) or ('hedia95' == 1) else 0

Chronic

In [70]:
def calc_morbidity_chr_lu (sbj) -> [0, 1]:
    return 1 if (('hedaclu' == 1) and ('hedbslu' == 1)) or (('hedaclu' == 2) and ('hedbmlu' == 2)) or (('hedaclu' == 2) and ('hedbmlu' == 3)) or ('hediblu' == 1) else 0

In [71]:
def calc_morbidity_chr_as (sbj) -> [0, 1]:
    return 1 if (('hedacas' == 1) and ('hedbsas' == 1)) or (('hedacas' == 2) and ('hedbmas' == 2)) or (('hedacas' == 2) and ('hedbmas' == 3)) or ('hedibas' == 1) else 0

In [72]:
def calc_morbidity_chr_ar (sbj) -> [0, 1]:
    return 1 if (('hedacar' == 1) and ('hedbsar' == 1)) or (('hedacar' == 2) and ('hedbmar' == 2)) or (('hedacar' == 2) and ('hedbmar' == 3)) or ('hedibar' == 1) else 0

In [73]:
def calc_morbidity_chr_os (sbj) -> [0, 1]:
    return 1 if (('hedacos' == 1) and ('hedbsos' == 1)) or (('hedacos' == 2) and ('hedbmos' == 2)) or (('hedacos' == 2) and ('hedbmos' == 3)) or ('hedibos' == 1) else 0

In [74]:
def calc_morbidity_chr_ca (sbj) -> [0, 1]:
    return 1 if (('hedacca' == 1) and ('hedbsca' == 1)) or (('hedacca' == 2) and ('hedbmca' == 2)) or (('hedacca' == 2) and ('hedbmca' == 3)) or ('hedibca' == 1) else 0

In [75]:
def calc_morbidity_chr_pd (sbj) -> [0, 1]:
    return 1 if (('hedacpd' == 1) and ('hedbspd' == 1)) or (('hedacpd' == 2) and ('hedbmpd' == 2)) or (('hedacpd' == 2) and ('hedbmpd' == 3)) or ('hedibpd' == 1) else 0

In [76]:
def calc_morbidity_chr_ps (sbj) -> [0, 1]:
    return 1 if (('hedacps' == 1) and ('hedbsps' == 1)) or (('hedacps' == 2) and ('hedbmps' == 2)) or (('hedacps' == 2) and ('hedbmps' == 3)) or ('hedibps' == 1) else 0

In [77]:
def calc_morbidity_chr_ad (sbj) -> [0, 1]:
    return 1 if (('hedacad' == 1) and ('hedbsad' == 1)) or (('hedacad' == 2) and ('hedbmad' == 2)) or (('hedacad' == 2) and ('hedbmad' == 3)) or ('hedibad' == 1) else 0

In [78]:
def calc_morbidity_chr_de (sbj) -> [0, 1]:
    return 1 if (('hedacde' == 1) and ('hedbsde' == 1)) or (('hedacde' == 2) and ('hedbmde' == 2)) or (('hedacde' == 2) and ('hedbmde' == 3)) or ('hedibde' == 1) else 0

In [79]:
def calc_morbidity_chr_bl (sbj) -> [0, 1]:
    return 1 if (('hedacbl' == 1) and ('hedbsbl' == 1)) or (('hedacbl' == 2) and ('hedbmbl' == 2)) or (('hedacbl' == 2) and ('hedbmbl' == 3)) or ('hedibbl' == 1) else 0

In [80]:
# def calc_morbidity_cvd_w6 (sbj, item) -> List[int]
#    calc_morbidity[col] for col[sbj] in 

In [81]:
morbidity_cvd_bp_w6 = w6_first_cleaned_df.apply(calc_morbidity_cvd_bp, axis=1)
morbidity_cvd_an_w6 = w6_first_cleaned_df.apply(calc_morbidity_cvd_an, axis=1)
morbidity_cvd_mi_w6 = w6_first_cleaned_df.apply(calc_morbidity_cvd_mi, axis=1)
morbidity_cvd_hf_w6 = w6_first_cleaned_df.apply(calc_morbidity_cvd_hf, axis=1)
morbidity_cvd_hm_w6 = w6_first_cleaned_df.apply(calc_morbidity_cvd_hm, axis=1)
morbidity_cvd_ar_w6 = w6_first_cleaned_df.apply(calc_morbidity_cvd_ar, axis=1)
morbidity_cvd_di_w6 = w6_first_cleaned_df.apply(calc_morbidity_cvd_di, axis=1)
morbidity_cvd_st_w6 = w6_first_cleaned_df.apply(calc_morbidity_cvd_st, axis=1)
morbidity_cvd_ch_w6 = w6_first_cleaned_df.apply(calc_morbidity_cvd_ch, axis=1)
morbidity_cvd_95_w6 = w6_first_cleaned_df.apply(calc_morbidity_cvd_95, axis=1)

morbidity_cvd_w6_df = pd.DataFrame([morbidity_cvd_bp_w6, 
                             morbidity_cvd_an_w6,
                             morbidity_cvd_mi_w6,
                             morbidity_cvd_hf_w6,
                             morbidity_cvd_hm_w6,
                             morbidity_cvd_ar_w6,
                             morbidity_cvd_di_w6,
                             morbidity_cvd_st_w6,
                             morbidity_cvd_ch_w6,
                             morbidity_cvd_95_w6]).transpose()

morbidity_cvd_w6_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
88,0,0,0,0,0,0,0,0,0,0
90,0,0,0,0,0,0,0,0,0,0
92,0,0,0,0,0,0,0,0,0,0
94,0,0,0,0,0,0,0,0,0,0


In [82]:
morbidity_chr_lu_w6 = w6_first_cleaned_df.apply(calc_morbidity_chr_lu, axis=1)
morbidity_chr_as_w6 = w6_first_cleaned_df.apply(calc_morbidity_chr_as, axis=1)
morbidity_chr_ar_w6 = w6_first_cleaned_df.apply(calc_morbidity_chr_ar, axis=1)
morbidity_chr_os_w6 = w6_first_cleaned_df.apply(calc_morbidity_chr_os, axis=1)
morbidity_chr_ca_w6 = w6_first_cleaned_df.apply(calc_morbidity_chr_ca, axis=1)
morbidity_chr_pd_w6 = w6_first_cleaned_df.apply(calc_morbidity_chr_pd, axis=1)
morbidity_chr_ps_w6 = w6_first_cleaned_df.apply(calc_morbidity_chr_ps, axis=1)
morbidity_chr_ad_w6 = w6_first_cleaned_df.apply(calc_morbidity_chr_ad, axis=1)
morbidity_chr_de_w6 = w6_first_cleaned_df.apply(calc_morbidity_chr_de, axis=1)
morbidity_chr_bl_w6 = w6_first_cleaned_df.apply(calc_morbidity_chr_bl, axis=1)

morbidity_chr_w6_df = pd.DataFrame([morbidity_chr_lu_w6,
                             morbidity_chr_as_w6,
                             morbidity_chr_ar_w6,
                             morbidity_chr_os_w6,
                             morbidity_chr_ca_w6,
                             morbidity_chr_pd_w6,
                             morbidity_chr_ps_w6,
                             morbidity_chr_ad_w6,
                             morbidity_chr_de_w6,       
                             morbidity_chr_bl_w6]).transpose()

morbidity_chr_w6_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
88,0,0,0,0,0,0,0,0,0,0
90,0,0,0,0,0,0,0,0,0,0
92,0,0,0,0,0,0,0,0,0,0
94,0,0,0,0,0,0,0,0,0,0


In [83]:
def calc_multimorbidity (sbj) -> [0, 1]:
    return 1 if sbj.sum() > 1 else 0

In [84]:
multimorbidity_cvd_w6_df = morbidity_cvd_w6_df.apply(calc_multimorbidity, axis=1)

In [85]:
multimorbidity_cvd_w6_df

0     0
1     0
2     0
3     0
4     0
     ..
88    0
90    0
92    0
94    0
95    0
Length: 84, dtype: int64

In [86]:
sbj = morbidity_cvd_w6_df.iloc[0]
sbj
calc_multimorbidity(sbj)

0

In [87]:
multimorbidity_chr_w6_df = morbidity_chr_w6_df.apply(calc_multimorbidity, axis=1)

In [88]:
sbj = morbidity_chr_w6_df.iloc[0]
sbj
calc_multimorbidity(sbj)

0

### 5.2) Ola 8

- Cuestionario CASP-19 en ola 8

In [89]:
items_casp19_w8_df = w8_first_cleaned_df.apply(transform_casp19_items, axis=1, result_type='expand')

In [90]:
casp19_w8 = w8_first_cleaned_df.apply(calc_casp19, axis=1)

In [91]:
casp19_w8

0     31.0
1     28.0
2     44.0
3     41.0
4     54.0
5     48.0
6     49.0
7     40.0
8     48.0
10    44.0
11    39.0
12    36.0
13    42.0
14    44.0
15    38.0
16    45.0
17    46.0
18    67.0
19    54.0
21    36.0
22    45.0
24    36.0
25    44.0
26    33.0
27    33.0
29    26.0
30    47.0
31    55.0
32    48.0
33    47.0
34    37.0
36    37.0
37    34.0
38    44.0
40    38.0
41    20.0
42    14.0
43    45.0
44    43.0
45    29.0
46    54.0
48    20.0
49    56.0
53    46.0
54    44.0
55    52.0
56    54.0
58    50.0
59    49.0
60    44.0
61    43.0
65    48.0
67    26.0
69    55.0
70    52.0
71    33.0
72    51.0
73    49.0
74    55.0
dtype: float64

## 6) Análisis factoriales de los primeros cuestionarios (*Casp-19* e *Isolation*)

### 6.1) Ola 6

- Casp-19 

#### Confirmatorio con Semopy

##### Especificación del modelo 

In [92]:
casp19_mod = """ control ~~ autonomy 
                 control ~~ pleasure
                 control ~~ self_realization
                 autonomy ~~ pleasure
                 autonomy ~~ self_realization 
                 pleasure ~~ self_realization
    
                 control =~ scqola + scqolb + scqolc + scqold 
                 autonomy =~ scqole + scqolf + scqolg + scqolh + scqoli
                 pleasure =~ scqolj + scqolk + scqoll + scqolm + scqoln
                 self_realization =~ scqolo + scqolp + scqolq + scqolr + scqols
             """

In [93]:
casp19_sem_w6 = Model(casp19_mod)

In [94]:
casp19_sem_w6.load_dataset(w6_first_cleaned_df)

Estimar los parámetros

In [95]:
opt_casp19_sem_w6 = Optimizer(casp19_sem_w6)
casp19_sem_w6_objective_function_value = opt_casp19_sem_w6.optimize()
# opt_casp19_sem_w6.optimize(objective='ULS')
# opt_casp19_sem_w6.optimize(objective='GLS')
# opt_casp19_sem_w6.optimize(objective='MLW')

In [96]:
inspect(opt_casp19_sem_w6)

Unnamed: 0,lval,op,rval,Value,SE,Z-score,P-value
3,autonomy,=~,scqolf,-0.252532,0.417626,-0.604686,0.5453878
4,autonomy,=~,scqolg,1.276919,0.981818,1.300566,0.1934071
5,autonomy,=~,scqolh,-2.748239,1.738773,-1.580562,0.1139781
6,autonomy,=~,scqoli,-1.023782,0.762897,-1.341966,0.179607
0,control,=~,scqolb,0.777992,0.311266,2.499441,0.01243892
1,control,=~,scqolc,-1.160867,0.21595,-5.375628,7.631603e-08
2,control,=~,scqold,0.930487,0.199499,4.664116,3.099471e-06
7,pleasure,=~,scqolk,0.973957,0.102083,9.540822,0.0
8,pleasure,=~,scqoll,0.537483,0.074628,7.202153,5.926371e-13
9,pleasure,=~,scqolm,0.36085,0.082767,4.359824,1.301668e-05


In [97]:
#sv.visualize()

CONFIRMATORIO con Factor Analyzer

> El tipo de análisis que viene en el paquete de Factor Analyzer

In [98]:
casp_19_w6_dict = {'control': ['scqola', 'scqolb', 'scqolc', 'scqold'],
                   'autonomy': ['scqole', 'scqolf', 'scqolg', 'scqolh', 'scqoli'],
                   'pleasure': ['scqolj', 'scqolk', 'scqoll', 'scqolm', 'scqoln'],
                   'self_realization': ['scqolo', 'scqolp', 'scqolq', 'scqolr', 'scqols']}

In [99]:
casp_19_w6_spec = ModelSpecificationParser.parse_model_specification_from_dict(items_casp19_w6_df, casp_19_w6_dict)

In [100]:
cfa_casp_19_w6_spec = ConfirmatoryFactorAnalyzer(casp_19_w6_spec, disp=False)

In [101]:
cfa_casp_19_w6_spec.fit(items_casp19_w6_df.values)

  'to converge: {}'.format(str(res.message)))


ConfirmatoryFactorAnalyzer(bounds=None, disp=False, impute='median',
                           is_cov_matrix=False, max_iter=200, n_obs=84,
                           specification=<factor_analyzer.confirmatory_factor_analyzer.ModelSpecification object at 0x7fe31c9dcb10>,
                           tol=None)

In [102]:
cfa_casp_19_w6_spec.loadings_

array([[0.94492227, 0.        , 0.        , 0.        ],
       [1.13459   , 0.        , 0.        , 0.        ],
       [0.92230621, 0.        , 0.        , 0.        ],
       [0.85529699, 0.        , 0.        , 0.        ],
       [0.        , 0.94986215, 0.        , 0.        ],
       [0.        , 0.94297686, 0.        , 0.        ],
       [0.        , 0.94837738, 0.        , 0.        ],
       [0.        , 1.02358932, 0.        , 0.        ],
       [0.        , 0.98726218, 0.        , 0.        ],
       [0.        , 0.        , 0.99849893, 0.        ],
       [0.        , 0.        , 0.99126419, 0.        ],
       [0.        , 0.        , 0.9326037 , 0.        ],
       [0.        , 0.        , 0.91059808, 0.        ],
       [0.        , 0.        , 0.98147499, 0.        ],
       [0.        , 0.        , 0.        , 0.96594766],
       [0.        , 0.        , 0.        , 0.91232989],
       [0.        , 0.        , 0.        , 0.92085726],
       [0.        , 0.        ,

In [103]:
cfa_casp_19_w6_spec.factor_varcovs_

array([[1.        , 0.09173813, 0.07426923, 0.08752234],
       [0.09173813, 1.        , 0.0795434 , 0.10128656],
       [0.07426923, 0.0795434 , 1.        , 0.10329269],
       [0.08752234, 0.10128656, 0.10329269, 1.        ]])

In [104]:
cfa_casp_19_w6_spec.get_standard_errors

<bound method ConfirmatoryFactorAnalyzer.get_standard_errors of ConfirmatoryFactorAnalyzer(bounds=None, disp=False, impute='median',
                           is_cov_matrix=False, max_iter=200, n_obs=84,
                           specification=<factor_analyzer.confirmatory_factor_analyzer.ModelSpecification object at 0x7fe31c9dcb10>,
                           tol=None)>

### 6.2) Ola 8

* Casp-19

In [None]:
#42

In [105]:
fa_casp19_w8 = fa_casp19_w6

NameError: name 'fa_casp19_w6' is not defined

In [None]:
casp19_sem_w8 = Model(casp19_mod)

In [None]:
casp19_sem_w8.load_dataset(w8_first_cleaned_df)

Escimar los parámetros

In [None]:
opt_casp19_sem_w8 = Optimizer(casp19_sem_w8)
casp19_sem_w8_objective_function_value = opt_casp19_sem_w8.optimize()
# opt_casp19_sem_w6.optimize(objective='ULS')
# opt_casp19_sem_w6.optimize(objective='GLS')
# opt_casp19_sem_w6.optimize(objective='MLW')

In [None]:
inspect(opt_casp19_sem_w6)

## 7) Segundo tratamiento de valores perdidos

### 7.1) Ola 6

In [109]:
second_cols_to_clean_out_of_sample_w6 = isolation_w6_cols + casp19_w6_cols + sexdissa_w6_m_cols + sexdissa_w6_f_cols

- Casp-19, Isolation, sexual dissatisfaction for female and male and multimorbidity: **"-2": "schedule not aplicable"** : *Cuestionarios, compuestos por sus ítems, que van a ser revisados para eliminar sujetos que no formaban parte de la muestra en estos módulos de ELSA. 1902 mujeres y 1620 hombres no han recibido módulo de sexualidad del cuestionario* 

In [110]:
second_sbj_out_of_sample_w6 = w6_first_cleaned_df.loc[pd.DataFrame([(w6_first_cleaned_df[col] == -2) for col in second_cols_to_clean_out_of_sample_w6]).any()].index

In [111]:
second_sbj_out_of_sample_w6

Int64Index([0, 35, 38, 39, 45, 52, 56, 61, 66, 70, 86], dtype='int64')

In [112]:
w6_without_second_sbj_out_of_sample_df = w6_first_cleaned_df.drop(second_sbj_out_of_sample_w6)
#w6_without_second_sbj_out_of_sample_df

 * Sexdissa: -9 Sujetos que no contestan a si han mantenido relaciones en el úiltimo año

In [113]:
sbj_didnt_awr_q5 = w6_without_second_sbj_out_of_sample_df.loc[pd.DataFrame([w6_without_second_sbj_out_of_sample_df['q5'] == -9]).any()].index

In [114]:
w6_without_sbj_didnt_awr_q5_df = w6_without_second_sbj_out_of_sample_df.drop(sbj_didnt_awr_q5)

In [115]:
w6_without_sbj_didnt_awr_q5_df

Unnamed: 0,idauniq,idahhw6,perid,samptyp,w6indout,W6hhout,couple,cpid,chinhh,chouthh,...,w6nssec8,w6nssec5,w6nssec3,W6SIC,w6soc,W6SEC,w6sic2003,w6soc2000,w6nssec13,finstatw6
1,103895.0,16260.0,1.0,1.0,11.0,110.0,1.0,2.0,1.0,1.0,...,5.0,4.0,3.0,0.0,0.0,0.0,-1.0,92.0,10.0,1.0
2,103960.0,14747.0,1.0,1.0,11.0,110.0,1.0,2.0,2.0,1.0,...,7.0,5.0,3.0,0.0,0.0,0.0,-1.0,62.0,13.3,1.0
3,104192.0,15705.0,1.0,1.0,11.0,110.0,1.0,2.0,2.0,1.0,...,4.0,3.0,2.0,0.0,0.0,0.0,-1.0,51.0,9.2,1.0
4,104194.0,16507.0,1.0,1.0,11.0,110.0,1.0,2.0,2.0,2.0,...,6.0,5.0,3.0,0.0,0.0,0.0,-1.0,41.0,12.6,1.0
6,104510.0,15552.0,1.0,1.0,11.0,110.0,3.0,-1.0,2.0,1.0,...,5.0,4.0,3.0,0.0,0.0,0.0,-1.0,71.0,10.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,165305.0,11882.0,1.0,1.0,11.0,110.0,1.0,2.0,2.0,1.0,...,5.0,4.0,3.0,1.0,1.0,1.0,63.0,91.0,10.0,25.0
90,165365.0,14517.0,1.0,1.0,11.0,110.0,3.0,-1.0,1.0,2.0,...,6.0,5.0,3.0,1.0,1.0,1.0,80.0,61.0,12.7,25.0
92,165537.0,11512.0,2.0,1.0,11.0,110.0,1.0,1.0,2.0,1.0,...,1.0,1.0,1.0,0.0,0.0,0.0,-1.0,11.0,2.0,25.0
94,165858.0,10999.0,3.0,-1.0,11.0,110.0,1.0,1.0,2.0,1.0,...,2.0,1.0,1.0,1.0,1.0,1.0,74.0,35.0,4.3,29.0


 * -9 Sujetos que no contestan a alguna de las preguntas del cuestionario de satisfacción sexual

## ( ! ) Ayuda:

quiero sustituir por la media de puntuaciones en el cuestionario de satisfacción sexual (de todos los items mayores que cero, es importante que no se cuelen en esa media valores negativos, valores perdidos)

In [None]:
mean_sexdissa_w6 =df[sexdissa_w6_f_cols]

In [None]:
second_sbj_didnt_awr_w6 = w6_without_second_sbj_out_of_sample_df.loc[pd.DataFrame([(w6_without_second_sbj_out_of_sample_df[col] == -9) for col in second_cols_to_clean_out_of_sample_w6]).any()].index

In [None]:
w6_without_second_sbj_didnt_awr_df = w6_without_second_sbj_out_of_sample_df.drop(second_sbj_didnt_awr_w6)
# w6_without_second_sbj_didnt_awr_df

In [None]:
len(w6_without_second_sbj_didnt_awr_df)

* sexdissa:  -1 (item no aplicable) en sexdissa_w6_f y sexdissa_w6_m que no son aplicables porque se requiere ser de un sexo u otro para responder:

Se tratarán en la construcción funcional de los cuestionarios sexdissa_w6_f y sexdissa_w6_m

* sexdissa: q5 = -1 (item no aplicable) por no haber mantenido relaciones sexuales en el último año

## Test cutres

In [None]:
w6_second_cleaned_df.loc[w6_second_cleaned_df['q3'] < 0, ['q3']]

In [None]:
w6_second_cleaned_df.loc[w6_second_cleaned_df['q9'] < 0, ['q9']]

- mortalidad: eliminamos los sujetos no repetidos en las dos olas

In [None]:
indaunic

### ( ! ) Base de datos tras la segunda limpieza valores perdidos

In [117]:
w6_second_cleaned_df = w6_without_sbj_didnt_awr_q5_df #sustituir por el nombre del df más reciente tras la limpieza

## 8) Construcción funcional de los segundos cuestionarios

* Sexual dissatisfaction female

In [None]:
casp19_original_values = [1, 2, 3, 4]
casp19_reverse_items_values = [0, 1, 2, 3]
casp19_direct_items_values = [3, 2, 1, 0]

# eliminar esta celda repetida (está en el apartado "construcción teórica de questionarios")
sexdissa_w6_f_cols = ['q3', 'q9', 'q10', 'q14f', 'q15f', 'q19m19f', 'q28m25f', 'q32m26f']
sexdissa_w6_q10_original_values = [1, 2, 3]
sexdissa_w6_q10_transformed_values = [5, 0, 5]

In [135]:
sexdissa_item_to_transform = ['q10']
sexdissa_direct_items_f = ['q3', 'q9', 'q14f', 'q15f', 'q19m19f', 'q28m25f', 'q32m26f']
sexdissa_direct_items_m =  ['q3', 'q9', 'q13m', 'q17m', 'q19m19f', 'q21m', 'q28m25f', 'q32m26f']

# ( ! ) Ayuda:

#### Función que transforma el valor de q10

Lo que estoy intentando es por un lado cambiar el valor de q10 y luego el resto de items de sexdissa transformarlos de [1, 2, 3, 4, 5] a [0, 1, 2, 3, 4]

Después tengo que crear los items de mujeres y de hombres aplicando las funciones para construir dataframes con las puntuacioens de los items sueltas y luego el dataframe de la puntuación del cuestionario con la suma.
al construir el dataframe se manejan los -1 de los hombres por las preguntas que requieren ser mujer y viceversa. Eso es lo que he intentado.

In [122]:
def transform_q10_sexdissa_w6(x: [1, 2, 3]) -> [5, 0, 5]:
     return 5 if x == 1 else 5 if x == 3 else 0 if x == 2  else None

In [123]:
def get_direct_items_sexdissa_w6(x: [1, 2, 3, 4, 5]) -> [0, 1, 2, 3, 4]:
    return x - 1

In [124]:
def calc_items_sexdissa_w6_f(sbj) -> List[int]:
    return ([transform_q10_sexdissa_w6(sbj[item_to_transform]) for item_to_transform in sexdissa_item_to_transform] + 
              [sexdissa_direct_items_f])

In [125]:
def calc_items_sexdissa_w6_m(sbj) -> List[int]:
    return ([transform_q10_sexdissa_w6(sbj[item_to_transform]) for item_to_transform in sexdissa_item_to_transform] + 
              [sexdissa_direct_items_m])

In [126]:
def calc_sexdissa_w6_f(sbj) -> list(range(40)):
    return sum([transform_q10_sexdissa_w6(sbj[item_to_transform]) for item_to_transform in sexdissa_item_to_transform] + 
              [sexdissa_direct_items_f])

In [127]:
def calc_sexdissa_w6_m(sbj) -> list(range(40)):
    return sum([transform_q10_sexdissa_w6(sbj[item_to_transform]) for item_to_transform in sexdissa_item_to_transform] + 
              [sexdissa_direct_items_m])

Los los valores perdidos "-1 (item not applicable)" debidos al sexo se disipan al construir los dataframes diferenciados en hombres y mujeres

In [139]:
f_w6_df = w6_second_cleaned_df.loc[(w6_second_cleaned_df['indsex'] == 2)].index

In [129]:
m_w6_df = w6_second_cleaned_df.loc[(w6_second_cleaned_df['indsex'] == 1)].index

In [140]:
items_sexdissa_w6_f_df = f_w6_df.apply(calc_items_sexdissa_w6_f, axis=1, result_type='expand')

AttributeError: 'Int64Index' object has no attribute 'apply'

In [138]:
items_sexdissa_w6_f_df

Unnamed: 0,0,1
2,5.0,"[q3, q9, q14f, q15f, q19m19f, q28m25f, q32m26f]"
6,5.0,"[q3, q9, q14f, q15f, q19m19f, q28m25f, q32m26f]"
8,0.0,"[q3, q9, q14f, q15f, q19m19f, q28m25f, q32m26f]"
11,,"[q3, q9, q14f, q15f, q19m19f, q28m25f, q32m26f]"
15,,"[q3, q9, q14f, q15f, q19m19f, q28m25f, q32m26f]"
20,,"[q3, q9, q14f, q15f, q19m19f, q28m25f, q32m26f]"
23,,"[q3, q9, q14f, q15f, q19m19f, q28m25f, q32m26f]"
25,0.0,"[q3, q9, q14f, q15f, q19m19f, q28m25f, q32m26f]"
26,0.0,"[q3, q9, q14f, q15f, q19m19f, q28m25f, q32m26f]"
30,,"[q3, q9, q14f, q15f, q19m19f, q28m25f, q32m26f]"


In [None]:
sexdissa_w6_df = w6_second_cleaned_df.loc[(w6_second_cleaned_df['indsex'] == 1)].apply(calc_sexdissa_w6, axis=1)

* Sexual dissatisfaction male

en cuanto tenga female, sé hacer male

## 9) Análisis factoriales de los segundos cuestionarios

### 9.1) Ola 6

* sexdissa_w6_f

#### Exploratorio

In [None]:
chi_square_value_sexdissa_w6_f, p_value_value_sexdissa_w6_f = calculate_bartlett_sphericity(sexdissa_w6_f)

In [None]:
chi_square_value_value_sexdissa_w6_f, p_value_value_sexdissa_w6_f

In [None]:
kmo_all_value_sexdissa_w6_f, kmo_model_value_sexdissa_w6_f = calculate_kmo(sexdissa_w6_f)

In [None]:
kmo_all_value_sexdissa_w6_f

In [None]:
kmo_model_value_sexdissa_w6_f

In [None]:
fa_sexdissa_w6_f= FactorAnalyzer(bounds=(0.005, 1), impute='drop', is_corr_matrix=False,
               method='minres', n_factors=3, rotation='promax',
               rotation_kwargs={}, use_smc=True)

In [None]:
fa_sexdissa_w6_f.fit(sexdissa_w6_f)

In [None]:
ev_sexdissa_w6_f, v_sexdissa_w6_f = fa_sexdissa_w6_f.get_eigenvalues()

In [None]:
plt.scatter(range(1, sexdissa_w6_f.shape[1]+1), ev_sexdissa_w6_f)
plt.plot(range(1, sexdissa_w6_f.shape[1]+1), ev_sexdissa_w6_f)
plt.title('Scree Plot')
plt.xlabel('Factors')
plt.ylabel('Eigenvalue')
plt.grid()
plt.show()

* sexdissa_w6_m

#### Exploratorio

In [None]:
chi_square_value_sexdissa_w6_m,p_value_sexdissa_w6_m = calculate_bartlett_sphericity(sexdissa_w6_m)

In [None]:
chi_square_value_sexdissa_w6_m,p_value_sexdissa_w6_m

In [None]:
kmo_all_sexdissa_w6_m, Kmo_model_sexdissa_w6_m = calculate_kmo(sexdissa_w6_m)

In [None]:
kmo_all_sexdissa_w6_m

In [None]:
Kmo_model_sexdissa_w6_m

In [None]:
fa_sexdissa_w6_m = FactorAnalyzer(bounds=(0.005, 1), impute='drop', is_corr_matrix=False,
               method='minres', n_factors=3, rotation='promax',
               rotation_kwargs={}, use_smc=True)

In [None]:
fa_sexdissa_w6_m.fit(sexdissa_w6_m)

In [None]:
ev_sexdissa_w6_m, v_sexdissa_w6_m = fa_sexdissa_w6_m.get_eigenvalues()

In [None]:
plt.scatter(range(1, sexdissa_w6_m.shape[1]+1), ev_sexdissa_w6_m)
plt.plot(range(1, sexdissa_w6_m.shape[1]+1), ev_sexdissa_w6_m)
plt.title('Scree Plot')
plt.xlabel('Factors')
plt.ylabel('Eigenvalue')
plt.grid()
plt.show()

## 10) Normalizar

## 11) Regresión lineal múltiple

In [None]:
# https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html

In [119]:
x1f = w6_second_cleaned_df[sexdissa_w6_f]
x2 = w6_second_cleaned_df[casp19_w6]
x3 = w6_second_cleaned_df[isolation_w6_df]
x4 = w6_second_cleaned_df['indager']
yf = w6_second_cleaned_df[casp19_w8]

x1m = w6_second_cleaned_df[sexdissa_w6_m]
ym = w6_second_cleaned_df[casp19_w8]

NameError: name 'sexdissa_w6_f' is not defined

* mujeres

In [None]:
modf = smf.ols('yf ~ x1f +x2 + x3 + x4')

* hombres

In [None]:
modf = smf.ols('yf ~ x1f +x2 + x3 + x4')