In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('pokemon_preprocessed.csv')

import preparation as prep
from imblearn.over_sampling import SMOTE

X, y, _, _ = prep.get_dataset(df, 'Legendary')

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=42, stratify=y_resampled)

In [6]:
## 모델 성능 비교
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import warnings

warnings.filterwarnings('ignore')

# 모델 리스트
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Support Vector Classifier": SVC(),
    "Decision Tree": DecisionTreeClassifier(random_state=42)
}

# 모델 성능 평가
results = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results[model_name] = accuracy

# 성능 출력
for model_name, accuracy in results.items():
    print(f"{model_name}: {accuracy:.2f}") # 앙상블 모델인 Random Forest가 가장 좋은 성능을 보이는 것을 알 수 있다.

Random Forest: 0.97
Logistic Regression: 0.96
Support Vector Classifier: 0.95
Decision Tree: 0.96
