In [None]:
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.inspection import permutation_importance

sns.set_style("whitegrid")
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams["font.family"] = "AppleGothic"  

rf_model = joblib.load("../output/rf_model.pkl")
X_val = joblib.load("../output/X_val.pkl")
y_val = joblib.load("../output/y_val.pkl")

In [None]:
# 모델 피처 수와 일치하도록 X_val 재정렬
expected_features = rf_model.feature_names_in_ 
X_val = X_val[expected_features]

# Feature Importance 시각화
importances = rf_model.feature_importances_
features = X_val.columns

feat_df = pd.DataFrame({
    'Feature': features,
    'Importance': importances
}).sort_values(by='Importance', ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feat_df)
plt.title("Random Forest Feature Importance")
plt.tight_layout()
plt.show()

In [None]:
from sklearn.preprocessing import LabelEncoder

# object 타입 컬럼들 확인
cat_cols = X_val.select_dtypes(include='object').columns
print("인코딩할 범주형 변수:", cat_cols)

# 각 컬럼에 대해 LabelEncoder 적용
label_encoders = {}

for col in cat_cols:
    le = LabelEncoder()
    X_val[col] = le.fit_transform(X_val[col])
    label_encoders[col] = le 

In [None]:
import joblib
joblib.dump(label_encoders, "../output/label_encoders.pkl")

In [None]:
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

perm = permutation_importance(rf_model, X_val, y_val, n_repeats=10, random_state=42)

perm_df = pd.DataFrame({
    "Feature": X_val.columns,
    "Importance": perm.importances_mean
}).sort_values(by="Importance", ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x="Importance", y="Feature", data=perm_df)
plt.title("Permutation Importance")
plt.tight_layout()
plt.show()

In [None]:
# shap
import shap
import joblib
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder


In [None]:
X_val = joblib.load("../output/X_val.pkl")
y_val = joblib.load("../output/y_val.pkl")
rf_model = joblib.load("../output/rf_model.pkl")

In [None]:
# Timestamp 등 SHAP이 처리 못하는 컬럼 제거
if 'base_date' in X_val.columns:
    X_val = X_val.drop(columns=['base_date'])

# 범주형 컬럼 확인
cat_cols = X_val.select_dtypes(include='object').columns
print("범주형 변수:", list(cat_cols))

# Label Encoding
from sklearn.preprocessing import LabelEncoder
for col in cat_cols:
    le = LabelEncoder()
    X_val[col] = le.fit_transform(X_val[col])

In [None]:
import shap

# 300개
sample_X = X_val.sample(n=300, random_state=42)

explainer = shap.TreeExplainer(rf_model)
shap_values = explainer.shap_values(sample_X)

shap.summary_plot(shap_values, sample_X)