### **Library Preparation**

In [1]:
try :
  import factor_analyzer
except :
  !pip install factor_analyzer
  import factor_analyzer

# import required libraries
import pandas as pd
import numpy as np
from factor_analyzer import FactorAnalyzer
import matplotlib.pyplot as plt

## **Dataset import and preprocessing**

In [2]:
! gdown --id 1hH9s_NIGXDZhq9HazkV-Ysv8MNxokmpQ

Downloading...
From: https://drive.google.com/uc?id=1hH9s_NIGXDZhq9HazkV-Ysv8MNxokmpQ
To: /content/data_validasi.xlsx
100% 11.5k/11.5k [00:00<00:00, 14.2MB/s]


In [3]:
df=pd.read_excel('/content/data_validasi.xlsx',dtype={'no': np.int32, 'data':np.str}) #lokasi file csv
df

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """Entry point for launching an IPython kernel.


Unnamed: 0,Timestamp,Name / Initial,Gender,E1,WOP1,COU1,WOP2,PCQ1,PCQ2,E2,COU2
0,2022-09-22 23:31:12.661,winda,Female,5.0,5.0,5.0,5.0,5.0,3.0,5.0,5.0
1,2022-09-22 23:49:44.716,Nadhira,Female,5.0,4.0,4.0,4.0,5.0,4.0,5.0,5.0
2,2022-09-23 06:57:58.879,Sakinah,Female,4.0,5.0,5.0,4.0,4.0,4.0,3.0,5.0
3,2022-09-23 08:09:11.297,r,Female,4.0,4.0,5.0,3.0,3.0,4.0,5.0,4.0
4,2022-09-23 08:10:56.623,el,Female,4.0,4.0,4.0,4.0,4.0,3.0,4.0,2.0
5,2022-09-23 08:12:55.866,Alya,Female,5.0,4.0,5.0,4.0,4.0,3.0,4.0,5.0
6,2022-09-23 08:13:33.668,Angelica,Female,5.0,5.0,5.0,5.0,4.0,5.0,5.0,4.0
7,2022-09-23 08:27:07.114,HRA,Male,5.0,4.0,4.0,4.0,4.0,4.0,4.0,5.0
8,2022-09-23 08:29:44.656,J,Female,5.0,4.0,5.0,4.0,3.0,4.0,5.0,5.0
9,2022-09-23 08:30:00.820,Micheleasn,Female,4.0,4.0,4.0,3.0,4.0,4.0,4.0,3.0


In [4]:
#Dropping indentifier columns since we're only doing survey validation
df.drop({'Timestamp','Name / Initial','Gender'},axis=1,inplace=True)

#Dropping missing value rows
df.dropna(inplace=True)

display(df.info(),df.head(5))

<class 'pandas.core.frame.DataFrame'>
Int64Index: 46 entries, 0 to 45
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   E1      46 non-null     float64
 1   WOP1    46 non-null     float64
 2   COU1    46 non-null     float64
 3   WOP2    46 non-null     float64
 4   PCQ1    46 non-null     float64
 5   PCQ2    46 non-null     float64
 6   E2      46 non-null     float64
 7   COU2    46 non-null     float64
dtypes: float64(8)
memory usage: 3.2 KB


None

Unnamed: 0,E1,WOP1,COU1,WOP2,PCQ1,PCQ2,E2,COU2
0,5.0,5.0,5.0,5.0,5.0,3.0,5.0,5.0
1,5.0,4.0,4.0,4.0,5.0,4.0,5.0,5.0
2,4.0,5.0,5.0,4.0,4.0,4.0,3.0,5.0
3,4.0,4.0,5.0,3.0,3.0,4.0,5.0,4.0
4,4.0,4.0,4.0,4.0,4.0,3.0,4.0,2.0


# **Adequacy Test**

In [5]:
#Bartlett't test of sphericity
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
chi_square_value,p_value=calculate_bartlett_sphericity(df)
chi_square_value,p_value

(104.60644703351069, 8.959204574466919e-11)

in this bartlett test, the p-value is 8,959 e-11 or less than 0, the test was statistically significant, indicating that the observed correlation matrix is not an identify matrix

# **Kaiser-Meyer-Olkin (KMO) Test**

value of KMO less than 0.6 is considered inadequate

In [6]:
from factor_analyzer.factor_analyzer import calculate_kmo
kmo_all, kmo_model=calculate_kmo(df)
display(kmo_model)

0.6791696294104808

the overall KMO for our data is 0,679 which is quite good. this value indicates that we can proceed with our planned factor analyst

# Confirmatory Factor Analysis using the semopy package

In [7]:
try :
  import semopy
except :
  !pip install semopy
  import semopy

In [11]:
from semopy import Model

mod="""
E =~ E1+E2
WOP =~ WOP1+WOP2
COU =~ COU1+COU2
PCQ =~ PCQ1+PCQ2
"""

model=Model(mod)
model.fit(df)

SolverResult(fun=0.5743553591727499, success=True, n_it=36, x=array([0.52518896, 1.15806396, 0.88774391, 0.58647552, 0.48266067,
       0.23243784, 0.5210598 , 0.03308068, 0.59809844, 0.29695637,
       0.19875773, 0.49074975, 0.12654539, 0.20456232, 0.12975728,
       0.22251689, 0.30710992, 0.22875811, 0.1581742 , 0.40395334,
       0.21641582, 0.22461656]), message='Optimization terminated successfully', name_method='SLSQP', name_obj='MLW')

In [12]:
from semopy.inspector import inspect
model_fit_inspection = inspect(model,std_est=True)
model_fit_inspection

Unnamed: 0,lval,op,rval,Estimate,Est. Std,Std. Err,z-value,p-value
0,E1,~,E,1.0,0.961409,-,-,-
1,E2,~,E,0.525189,0.43307,0.247325,2.123478,0.033714
2,WOP1,~,WOP,1.0,0.546633,-,-,-
3,WOP2,~,WOP,1.158064,0.506911,0.344497,3.361604,0.000775
4,COU1,~,COU,1.0,0.728381,-,-,-
5,COU2,~,COU,0.887744,0.503566,0.263663,3.366962,0.00076
6,PCQ1,~,PCQ,1.0,0.754453,-,-,-
7,PCQ2,~,PCQ,0.586476,0.38743,0.268953,2.180588,0.029214
8,WOP,~~,WOP,0.126545,1.0,0.07694,1.644734,0.100025
9,WOP,~~,PCQ,0.204562,1.037661,0.070775,2.890305,0.003849


In [13]:
# we can also look into the fit indicates for CFA by running the following code
from semopy import gather_statistics

stats=gather_statistics(model)
print(stats)

SEMStatistics(dof=14, ml=0.5743553591727499, fun=0.5743553591727499, chi2=(26.4203465219465, 0.022876513207578553), dof_baseline=28, chi2_baseline=115.9493190916122, rmsea=0.14040952869773984, cfi=0.8587783663338101, gfi=0.7721388385120947, agfi=0.5442776770241895, nfi=0.7721388385120947, tli=0.7175567326676202, aic=42.8512892816545, bic=83.08140000441459, params=[ParametersStatistics(value=0.525188962867192, se=0.24732489081205455, zscore=2.1234779934226937, pvalue=0.03371381767179482), ParametersStatistics(value=1.158063955907068, se=0.344497409741416, zscore=3.3616042476864005, pvalue=0.0007749109923418374), ParametersStatistics(value=0.8877439055144098, se=0.26366315425855835, zscore=3.366962319810855, pvalue=0.000760010719941473), ParametersStatistics(value=0.5864755157457795, se=0.26895290437184954, zscore=2.1805881483724314, pvalue=0.029213892511096295), ParametersStatistics(value=0.4826606687162596, se=0.10856542435605748, zscore=4.445804652583052, pvalue=8.756349137550856e-06)

**INTERPRETASI :**

JIKA P-VALUE < ALPHA (0.05) MAKA SIGNIFIKAN tetapi ini tidak terlalu dilihat, yang dilihat adalah **standart estimate**



mayoritas **standart estimate** lebih dari 0.5 dan 0.7 maka ideal tetapi ada jg yang kurang dari 0.5 atau tidak berpengaruh

dapat diketahui bahwa standart estimate E1 terhadap variabel E adalah 0,96 dan lebih besar daripada 0.5 maka disimpulkan pertanyaan E1 berpengaruh terhadap E
dan seterusnya sampai baris no. 7 PSQ2 ke PSQ

rmse adalah 0,14