In [29]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# 假设你有一个DataFrame包含你的测量数据
df = pd.read_excel("data\\evaluation.xlsx").drop("序号", axis=1)
names = df.columns

# 标准化处理
scaler = StandardScaler()
df = pd.DataFrame(scaler.fit_transform(df))
df.columns = names
df.head()

Unnamed: 0,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,X_9,X_10
0,-0.834352,-2.375773,0.989949,-1.414214,1.0,-1.870829,-1.612452,-0.445859,1.082167,1.748347
1,-0.834352,-2.375773,0.989949,-1.414214,1.0,0.534522,0.620174,-0.445859,1.082167,1.748347
2,-0.834352,-0.901155,0.989949,-1.414214,1.0,0.534522,0.620174,-0.445859,1.082167,1.748347
3,-0.834352,-0.901155,0.989949,-1.414214,1.0,0.534522,0.620174,-0.445859,1.082167,-0.710266
4,-0.834352,-0.901155,0.989949,-1.414214,1.0,0.534522,-1.612452,-0.445859,-1.023672,1.748347


In [30]:
from pingouin import reliability

# 计算Cronbach's alpha
reliability.cronbach_alpha(df)

(-0.9104634780438363, array([-2.563,  0.17 ]))

In [31]:
from factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_kmo, calculate_bartlett_sphericity


# 计算KMO值：KMO值用于衡量数据适合进行因子分析的程度：KMO值范围是0到1
# 一般来说，KMO值大于0.6表示数据适合进行因子分析，0.8及以上则表示非常适合
kmo_all, kmo_model = calculate_kmo(df)
print(f"KMO值: {kmo_model}\n")

# 巴特利特球形度检验：巴特利特球形度检验用于确定数据的相关矩阵是否是单位矩阵
# 这个检验的 p 值需要小于0.05，表明相关矩阵显著偏离单位矩阵，数据适合进行因子分析。
chi_square_value, p_value = calculate_bartlett_sphericity(df)
print(f"巴特利特球形度检验结果: \nchi_square_value = {chi_square_value}, p = {p_value}\n")

# 因子分析
fa = FactorAnalyzer(rotation='varimax', n_factors=3)
fa.fit(df)
factor_scores = fa.transform(df)

# 获取因子载荷：因子载荷矩阵显示每个变量在各个因子上的载荷（权重）
# 因子载荷矩阵中大部分变量的载荷值应高于0.5
loadings = fa.loadings_
print("因子载荷矩阵:\n", loadings)

# 获取变量共同度：变量共同度表示变量在所有因子上的载荷值的总和
# 变量共同度越接近1，表示变量在所有因子上的载荷值越接近，即变量在所有因子上的重要性越接近
communalities = fa.get_communalities()
print("变量共同度:\n", communalities)

# 获取因子解释的方差：一般要求因子解释之和达到70%以上，直接看最下面一行最右边的数值就行
variance = fa.get_factor_variance()
print("因子解释的方差:\n", variance)

KMO值: 0.6933381738019934

巴特利特球形度检验结果: 
chi_square_value = 78.47463150281236, p = 0.0014717479314208394

因子载荷矩阵:
 [[-0.03357552  0.16620651  0.98579889]
 [-0.37980484  0.73286905  0.34512948]
 [ 0.02088272 -0.80963548 -0.09766584]
 [-0.47701429  0.6648021   0.42275156]
 [ 0.86523081 -0.29552231 -0.20686079]
 [-0.57607389 -0.12092942 -0.02636854]
 [ 0.21338849  0.41631046 -0.10970494]
 [-0.46013607  0.0388524   0.27237054]
 [ 0.70827677 -0.16823588  0.0186242 ]
 [ 0.4195347  -0.61820561 -0.29837806]]
变量共同度:
 [1.00055138 0.80046312 0.66548432 0.84822335 0.87874917 0.34718036
 0.23088422 0.28742042 0.53030616 0.64721701]
因子解释的方差:
 (array([2.38876845, 2.34945586, 1.4982552 ]), array([0.23887684, 0.23494559, 0.14982552]), array([0.23887684, 0.47382243, 0.62364795]))


In [32]:
from factor_analyzer import ConfirmatoryFactorAnalyzer, ModelSpecificationParser

# 定义模型
model_dict = {
    "F1": ["X2", "X4", "X5", "X6", "X8", "X9"],
    "F2": ["X2", "X3", "X4", "X7", "X10"],
    "F3": ["X1"]
}

# 解析模型规格
model_spec = ModelSpecificationParser.parse_model_specification_from_dict(df, model_dict)

# 创建并拟合CFA模型
cfa = ConfirmatoryFactorAnalyzer(model_spec, disp=False)
cfa.fit(df)

# 输出拟合结果
print("因子载荷矩阵:\n", cfa.loadings_)
print("误差方差矩阵:\n", cfa.error_vars_)
print("因子协方差矩阵:\n", cfa.factor_varcovs_)



因子载荷矩阵:
 [[ 0.95233094  0.82549746  0.        ]
 [ 0.92241516  0.75152979  0.        ]
 [ 0.24118137  0.          0.        ]
 [ 0.8493295   0.          0.        ]
 [-0.14658346  0.          0.        ]
 [ 0.73171495  0.          0.        ]
 [ 0.          0.67175348  0.        ]
 [ 0.          0.47917297  0.        ]
 [ 0.          0.26634027  0.        ]
 [ 0.          0.          0.66516566]]
误差方差矩阵:
 [[0.47270454]
 [0.55851691]
 [1.33205434]
 [0.84125055]
 [1.53264707]
 [0.97486895]
 [0.92499295]
 [1.07651149]
 [1.2241047 ]
 [0.29482588]]
因子协方差矩阵:
 [[ 1.         -0.10245859 -0.28248186]
 [-0.10245859  1.         -0.42069064]
 [-0.28248186 -0.42069064  1.        ]]


