In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo

df = pd.read_csv("../preprocessed/preprocessed_hotel.csv") 


# Bartlett’s Test (구형성 검정) & KMO Test
bartlett_test, p_value = calculate_bartlett_sphericity(df)
print(f"Bartlett Test: {bartlett_test}, p-value: {p_value}")

kmo_all, kmo_model = calculate_kmo(df)
print(f"KMO Test Score: {kmo_model}")

# 최적 요인 개수 결정 (스크리 플롯)
fa = FactorAnalyzer(rotation=None, method="ml")  # MLFA 적용
fa.fit(df)

ev, _ = fa.get_eigenvalues()  # 고유값 추출
plt.figure(figsize=(8, 5))
plt.scatter(range(1, len(ev) + 1), ev, label="Eigenvalues")
plt.plot(range(1, len(ev) + 1), ev, label="Scree Plot")
plt.axhline(y=1, color="r", linestyle="--", label="Kaiser Criterion (λ=1)")
plt.xlabel("Factors")
plt.ylabel("Eigenvalue")
plt.legend()
plt.title("Scree Plot")
plt.show()

# MLFA 수행 (최적 요인 개수 선택 후)
n_factors = sum(ev > 1)  # Kaiser 기준 사용 (λ > 1)
fa = FactorAnalyzer(n_factors=n_factors, rotation="varimax", method="ml")
fa.fit(df)

# MLFA 결과 해석
loadings = pd.DataFrame(fa.loadings_, index=df.columns, columns=[f"Factor {i+1}" for i in range(n_factors)])
print(loadings)

# 공통성 추출
communalities = pd.DataFrame(fa.get_communalities(), index=df.columns, columns=["Communalities"])
print(communalities)
