In [None]:
# 1. 데이터 로딩 및 준비
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# 피처 엔지니어링 결과 데이터 로드
file_path = '../data/processed_data.csv'
df = pd.read_csv(file_path)

# 입력 변수(X)와 목표 변수(y) 분리
X = df[['Age', 'Score', 'Name_encoded']]
y = df['Score_category'].apply(lambda x: 1 if x == 'High' else 0)

# 2. 데이터셋 분리 (훈련 / 테스트)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 3. 모델 학습
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# 4. 모델 예측
y_pred = model.predict(X_test)

# 5. 모델 평가
print("\n✅ 모델 평가 결과:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

# 6. 중요 피처 확인
feature_importances = model.feature_importances_
feature_names = X.columns

print("\n📊 중요 피처:")
for name, importance in zip(feature_names, feature_importances):
    print(f"{name}: {importance:.2f}")
