In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('pokemon_preprocessed.csv')

In [2]:
import preparation as prep

X, y, _, _ = prep.get_dataset(df, 'Legendary')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import warnings

warnings.filterwarnings('ignore')

models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Support Vector Classifier": SVC(),
    "Decision Tree": DecisionTreeClassifier(random_state=42)
}

results = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results[model_name] = accuracy

# 성능 출력
for model_name, accuracy in results.items():
    print(f"{model_name}: {accuracy:.2f}") 
    
# Decision Tree가 가장 좋은 성능을 보이는 것을 알 수 있다.
# 하지만, 앙상블 모델인 랜덤 포레스트가 향후 추가적인 모델링 기법에 따라 개선될 가능성이 더 크다고 생각한다.

Random Forest: 0.95
Logistic Regression: 0.95
Support Vector Classifier: 0.93
Decision Tree: 0.96
