# Análisis Factorial Confirmatorio (AFC)

In [1]:
#Basado en el github del Evaluation Testing Service:
#https://github.com/EducationalTestingService/factor_analyzer
#Los datos se pueden leer directamente de: https://raw.githubusercontent.com/EducationalTestingService/factor_analyzer/main/tests/data/test02.csv

## Impotar librerías

In [1]:
# %load ../standard_import.txt
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import scale
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

from scipy.cluster import hierarchy

%matplotlib inline
plt.style.use('seaborn-white')

from factor_analyzer import FactorAnalyzer
from factor_analyzer import (ConfirmatoryFactorAnalyzer,
                             ModelSpecificationParser)

## Importar los datos

In [2]:
# Carga de datos
df_features02 = pd.read_csv('https://raw.githubusercontent.com/EducationalTestingService/factor_analyzer/main/tests/data/test02.csv', index_col=0)
df_features02.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1678 entries, 2 to 1
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   zygosity  1678 non-null   int64
 1   moed      1678 non-null   int64
 2   faed      1678 non-null   int64
 3   faminc    1678 non-null   int64
 4   english   1678 non-null   int64
 5   math      1678 non-null   int64
 6   socsci    1678 non-null   int64
 7   natsci    1678 non-null   int64
 8   vocab     1678 non-null   int64
dtypes: int64(9)
memory usage: 131.1 KB


## Análisis Factorial

In [3]:
fa = FactorAnalyzer(rotation='varimax')
fa.fit(df_features02)

FactorAnalyzer(rotation='varimax', rotation_kwargs={})

In [4]:
fa.loadings_

array([[ 0.0130604 ,  0.05302504,  0.05673551],
       [ 0.07284237,  0.70217185,  0.07990228],
       [ 0.12840347,  0.8406644 ,  0.07872461],
       [ 0.1619275 ,  0.54853408,  0.05498724],
       [ 0.72917051,  0.13034093,  0.04584536],
       [ 0.69439413,  0.14104724,  0.33393446],
       [ 0.8400285 ,  0.13482517,  0.0291735 ],
       [ 0.76432041,  0.06320179,  0.35565568],
       [ 0.89395299,  0.21061284, -0.27848264]])

In [5]:
fa.get_communalities()

array([0.00620115, 0.50473569, 0.72940166, 0.33013375, 0.55078019,
       0.61358976, 0.72467681, 0.71467111, 0.92106231])

## Análisis Confirmatorio

In [10]:
#Carga de datos
df_features11 = pd.read_csv('https://raw.githubusercontent.com/EducationalTestingService/factor_analyzer/main/tests/data/test11.csv')

In [11]:
df_features11.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   V1      1000 non-null   float64
 1   V2      1000 non-null   float64
 2   V3      1000 non-null   float64
 3   V4      1000 non-null   float64
 4   V5      1000 non-null   float64
 5   V6      1000 non-null   float64
 6   V7      1000 non-null   float64
 7   V8      1000 non-null   float64
dtypes: float64(8)
memory usage: 62.6 KB


In [13]:

model_dict = {"F1": ["V1", "V2", "V3", "V4"],
              "F2": ["V5", "V6", "V7", "V8"]}
model_spec = ModelSpecificationParser.parse_model_specification_from_dict(df_features11,
                                                                          model_dict)

cfa = ConfirmatoryFactorAnalyzer(model_spec, disp=False)

cfa.fit(df_features11.values)


ConfirmatoryFactorAnalyzer(disp=False, n_obs=1000,
                           specification=<factor_analyzer.confirmatory_factor_analyzer.ModelSpecification object at 0x000001AE3AC37250>)

In [14]:
cfa.loadings_

array([[0.99131035, 0.        ],
       [0.46075129, 0.        ],
       [0.35022393, 0.        ],
       [0.58331139, 0.        ],
       [0.        , 0.98621366],
       [0.        , 0.73389146],
       [0.        , 0.37602713],
       [0.        , 0.50049193]])

In [15]:
cfa.factor_varcovs_

array([[1.        , 0.17385698],
       [0.17385698, 1.        ]])

In [16]:
cfa.transform(df_features11.values)

array([[-0.46852279, -1.08707326],
       [ 2.59026044,  1.20227579],
       [-0.4721608 ,  2.65696545],
       ...,
       [-1.59308951, -0.918051  ],
       [ 0.19431265,  0.88174776],
       [-0.2786335 , -0.76951055]])

### Visualización

In [17]:
#El tutorial de funciones completas se puede ver en:
#https://semopy.com/syntax.html
#También hay un ejemplo en:
#https://towardsdatascience.com/structural-equation-modeling-dca298798f4d

In [18]:
#!pip install semopy

In [19]:
#!conda install python-graphviz

## Análisis confirmatorio para datos de test11.csv

In [20]:
import semopy

In [21]:
data = semopy.examples.political_democracy.get_data()
mod = semopy.examples.political_democracy.get_model()

In [22]:
m = semopy.Model(mod)
m.fit(data)
g = semopy.semplot(m, "pd.png")

In [23]:
# Specify the model relations using the same syntax given before
# Usaremos los datos en df_features, pra no cargar el de la página original
model_spec ='''
  # measurement model
    Y1 =~ V1 + V2 + V3 + V4
    Y2 =~ V5 + V6 + V7 + V8
'''

In [24]:
# Instantiate the model
model = semopy.Model(model_spec)


In [25]:
# Fit the model using the data
model.fit(df_features11)

# Show the results using the inspect method
model.inspect()

#Plot
g = semopy.semplot(model, "test11_cfa.png")

In [26]:
### Análisis de datos test02 (no conozco la asociación la haré con el AF)
fa.loadings_>.4

array([[False, False, False],
       [False,  True, False],
       [False,  True, False],
       [False,  True, False],
       [ True, False, False],
       [ True, False, False],
       [ True, False, False],
       [ True, False, False],
       [ True, False, False]])

#Variables en los datos test02

| 0  | zygosity  1678 non-null   int64
| 1  | moed      1678 non-null   int64
| 2  | faed      1678 non-null   int64
| 3  | faminc    1678 non-null   int64
| 4  | english   1678 non-null   int64
| 5  | math      1678 non-null   int64
| 6  | socsci    1678 non-null   int64
| 7  | natsci    1678 non-null   int64
| 8  | vocab     1678 non-null   int64

In [27]:
model_spec='''
    # measurement model
    F1 =~ math + socsci + vocab
    F2 =~ moed + faed + english
    # covariance
    F1 ~~ F2
'''

In [28]:
# Instantiate the model
model = semopy.Model(model_spec)

In [29]:
# Fit the model using the data
model.fit(df_features02)

# Show the results using the inspect method
model.inspect()

#Plot
g = semopy.semplot(model, "test02_cfa2.png")


In [30]:
model

<semopy.model.Model at 0x1ae384007c0>

<bound method Model.fit of <semopy.model.Model object at 0x0000027C226EFAF0>>
