## Validation of Constructs

v1_12.04.2024

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from factor_analyzer import FactorAnalyzer, calculate_kmo, calculate_bartlett_sphericity
from semopy import Model
from semopy.stats import calc_stats, calc_gfi
from scipy.stats import chi2
from sklearn.covariance import EmpiricalCovariance
from statsmodels.stats.outliers_influence import variance_inflation_factor
import pingouin as pg
from semopy.inspector import inspect

In [82]:
# Load data
data = pd.read_csv('data_merged_all_validation.csv')

In [83]:
# Define the model using semopy syntax
model_desc = """
NEO_N =~ NEON1 + NEON2R + NEON3 + NEON4
"""

model = Model(model_desc)
result = model.fit(data)
print(result)

Name of objective: MLW
Optimization method: SLSQP
Optimization successful.
Optimization terminated successfully
Objective value: 0.009
Number of iterations: 17
Params: -0.737 1.250 1.227 0.552 0.746 0.603 0.208 0.541


In [None]:
AILiteracy_Use =~ AILiteracyUse1 + AILiteracyUse2 + AILiteracyUse3 + AILiteracyUse4 + AILiteracyUse5 + AILiteracyUse6
AILiteracy_Kno =~ AILiteracyKno1 + AILiteracyKno2 + AILiteracyKno3 + AILiteracyKno4 + AILiteracyKno5 + AILiteracyKno6
AILiteracy_Det =~ AILiteracyDet1 + AILiteracyDet2 + AILiteracyDet3
AILiteracy_Eth =~ AILiteracyEth1 + AILiteracyEth3
PGAT =~ PGATPGAT1 + PGATPGAT2 + PGATPGAT3
NGAT =~ NGATNGAT1 + NGATNGAT2 + NGATNGAT3
CMV =~ CMVSQ001 + CMVSQ002 + CMVSQ003
NEO_E =~ NEOE1R + NEOE2 + NEOE3R + NEOE4
NEO_A =~ NEOV1R + NEOV2 + NEOV3R + NEOV4R
NEO_C =~ NEOG1 + NEOG2R + NEOG3 + NEOG4
NEO_N =~ NEON1 + NEON2R + NEON3 + NEON4
NEO_O =~ NEOO1 + NEOO2 + NEOO3 + NEOO4 + NEOO5R
JC1_IStR =~ JC1IStR1 + JC1IStR2 + JC1IStR3 + JC1IStR4 + JC1IStR5
JC1_HRJD =~ JC1HRJD1 + JC1HRJD2 + JC1HRJD3
JC2_IStR =~ JC22IStR1 + JC22IStR2 + JC22IStR3 + JC22IStR4 + JC22IStR5
JC2_HRJD =~ JC22HRJD1 + JC22HRJD2 + JC22HRJD5 + JC22HRJD6

# 'JC_IStR': ['JC1[IStR1]', 'JC1[IStR2]', 'JC1[IStR3]', 'JC1[IStR4]', 'JC1[IStR5]', 'JC2[2IStR1]', 'JC2[2IStR2]', 'JC2[2IStR3]', 'JC2[2IStR4]', 'JC2[2IStR5]'],
# 'JC_HRJD': ['JC1[HRJD1]', 'JC1[HRJD2]', 'JC1[HRJD3]', 'JC2[2HRJD1]', 'JC2[2HRJD2]', 'JC2[2HRJD5]', 'JC2[2HRJD6]']

In [84]:
# Use calc_stats to get a comprehensive result object including fit indices
stats = calc_stats(model)
print(stats)

       DoF  DoF Baseline      chi2  chi2 p-value  chi2 Baseline       CFI  \
Value    2             6  0.283289       0.86793      43.600805  1.045656   

            GFI      AGFI       NFI       TLI  RMSEA        AIC        BIC  \
Value  0.993503  0.980508  0.993503  1.136969      0  15.981114  27.190693   

         LogLik  
Value  0.009443  


In [86]:
# Evaluate model fit 
print(f"RMSEA: {stats['RMSEA']},\nCFI: {stats['CFI']}, \nTLI: {stats['TLI']}")

RMSEA: Value    0
Name: RMSEA, dtype: int64,
CFI: Value    1.045656
Name: CFI, dtype: float64, 
TLI: Value    1.136969
Name: TLI, dtype: float64


In [87]:
# Compute Average Variance Extracted (AVE) for each factor
stats = calc_stats(model)
print("Average Variance Extracted (AVE):\n", stats.get_ave())

AttributeError: 'DataFrame' object has no attribute 'get_ave'

In [None]:
# Correlating constructs with an external criterion
# Assuming 'job_satisfaction' is the external criterion
# correlations = data[['NEO[E]', 'NEO[A]', 'job_satisfaction']].corr()
# print(correlations)

In [None]:
# Calculate and print the Fornell-Larcker Criterion and HTMT ratio
discriminant_validity = stats.discriminant_validity()
htmt_ratio = stats.htmt()
print("Discriminant Validity (Fornell-Larcker):\n", discriminant_validity)
print("HTMT Ratio:\n", htmt_ratio)

In [None]:
# Calculate Cronbach's Alpha for each construct
# Define constructs with their associated items
constructs = {
    'AILiteracy_Use': ['AILiteracy[Use1]', 'AILiteracy[Use2]', 'AILiteracy[Use3]', 'AILiteracy[Use4]', 'AILiteracy[Use5]', 'AILiteracy[Use6]'],
    'AILiteracy_Kno': ['AILiteracy[Kno1]', 'AILiteracy[Kno2]', 'AILiteracy[Kno3]', 'AILiteracy[Kno4]', 'AILiteracy[Kno5]', 'AILiteracy[Kno6]'],
    'AILiteracy_Det': ['AILiteracy[Det1]', 'AILiteracy[Det2]', 'AILiteracy[Det3]'],
    'AILiteracy_Eth': ['AILiteracy[Eth1]', 'AILiteracy[Eth3]'],
    'PGAT': ['PGAT[PGAT1]', 'PGAT[PGAT2]', 'PGAT[PGAT3]'],
    'NGAT': ['NGAT[NGAT1]', 'NGAT[NGAT2]', 'NGAT[NGAT3]'],
    'CMV': ['CMV[SQ001]', 'CMV[SQ002]', 'CMV[SQ003]'],
    'NEO_E': ['NEO[E1R]', 'NEO[E2]', 'NEO[E3R]', 'NEO[E4]'],
    'NEO_A': ['NEO[V1R]', 'NEO[V2]', 'NEO[V3R]', 'NEO[V4R]'],
    'NEO_C': ['NEO[G1]', 'NEO[G2R]', 'NEO[G3]', 'NEO[G4]'],
    'NEO_N': ['NEO[N1]', 'NEO[N2R]', 'NEO[N3]', 'NEO[N4]'],
    'NEO_O': ['NEO[O1]', 'NEO[O2]', 'NEO[O3]', 'NEO[O4]', 'NEO[O5R]'],
    'JC1_IStR': ['JC1[IStR1]', 'JC1[IStR2]', 'JC1[IStR3]', 'JC1[IStR4]', 'JC1[IStR5]'],
    'JC1_HRJD': ['JC1[HRJD1]', 'JC1[HRJD2]', 'JC1[HRJD3]'],
    'JC2_IStR': ['JC2[2IStR1]', 'JC2[2IStR2]', 'JC2[2IStR3]', 'JC2[2IStR4]', 'JC2[2IStR5]'],
    'JC2_HRJD': ['JC2[2HRJD1]', 'JC2[2HRJD2]', 'JC2[2HRJD5]', 'JC2[2HRJD6]'],
    'JC_IStR': ['JC1[IStR1]', 'JC1[IStR2]', 'JC1[IStR3]', 'JC1[IStR4]', 'JC1[IStR5]', 'JC2[2IStR1]', 'JC2[2IStR2]', 'JC2[2IStR3]', 'JC2[2IStR4]', 'JC2[2IStR5]'],
    'JC_HRJD': ['JC1[HRJD1]', 'JC1[HRJD2]', 'JC1[HRJD3]', 'JC2[2HRJD1]', 'JC2[2HRJD2]', 'JC2[2HRJD5]', 'JC2[2HRJD6]']
}

# Calculate Cronbach's Alpha for each construct
for name, items in constructs.items():
    alpha, _ = pg.cronbach_alpha(data=data[items])
    print(f"Cronbach's Alpha for {name}: {alpha:.3f}")

In [81]:
# Calculate Composite Reliability
composite_reliability = stats.get_crb()
print("Composite Reliability:\n", composite_reliability)

AttributeError: 'DataFrame' object has no attribute 'get_crb'

In [None]:
# Mardia's Test for multivariate normality
mn_kurtosis, mn_skewness = mardia(data)
print(f"Mardia's Multivariate Kurtosis: {mn_kurtosis}, Skewness: {mn_skewness}")

In [None]:
# Mahalanobis distance for detecting outliers
cov = EmpiricalCovariance().fit(data)
mdist = cov.mahalanobis(data)
chi2_thresh = chi2.ppf((1-0.01), df=len(data.columns)) # 99% confidence level
outliers = mdist > chi2_thresh
print(f"Outliers detected: {np.sum(outliers)}")

In [None]:
# Variance Inflation Factor (VIF)
vif_data = pd.DataFrame()
vif_data["feature"] = data.columns
vif_data["VIF"] = [variance_inflation_factor(data.values, i) for i in range(data.shape[1])]
print(vif_data)