# Project Notebook
Generated from script.

In [None]:
import sys
import os
from pathlib import Path

project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

from pokemon_predictor import config
from pokemon_predictor.data_utils import load_data
import pandas as pd
import numpy as np
import joblib

# Qualitative Scenario Testing

In [None]:
from pokemon_predictor.predict import PokemonPredictor
from pokemon_predictor import config
import pandas as pd

predictor = PokemonPredictor()

scenarios = {
    "Mono-Type": ["Charmander", "Squirtle"],
    "Dual-Type": ["Charizard", "Gengar"],
    "Impostors": ["Sudowoodo", "Groudon"]
}

print(f"{'Pokemon':<15} | {'True Type':<20} | {'XGBoost':<20} | {'MLP':<20}")
print("-" * 80)

df_meta = pd.read_csv(config.PROCESSED_DATA_DIR / "pokemon_metadata.csv")

for cat, names in scenarios.items():
    print(f"--- {cat} ---")
    for name in names:
        row = df_meta[df_meta['name'].str.lower() == name.lower()]
        if row.empty: continue
        img_path = config.RAW_DATA_DIR / f"{row.iloc[0]['name']}.png"
        pred = predictor.predict(str(img_path))
        t1 = row.iloc[0]['type1']
        t2 = row.iloc[0]['type2']
        true_t = f"{t1}" + (f", {t2}" if pd.notna(t2) else "")
        if pred:
            xgb = ", ".join(pred['xgboost'])
            mlp = ", ".join(pred['mlp'])
            print(f"{name:<15} | {true_t:<20} | {xgb:<20} | {mlp:<20}")

## XGBoost Penalization Grid Search
Testing different colsample_bytree values to find the optimal regularization against Biological Ratios.

In [None]:
from xgboost import XGBClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import f1_score, precision_score

X_train, X_test, y_train, y_test, _ = load_data('hybrid', split_data=True)

penalties = [1.0, 0.75, 0.50, 0.25]
results = []

for penalty in penalties:
    model = MultiOutputClassifier(XGBClassifier(n_estimators=100, max_depth=5, learning_rate=0.1, colsample_bytree=penalty, n_jobs=-1, random_state=config.RANDOM_SEED))
    model.fit(X_train, y_train)
    pred_probs = model.predict_proba(X_test)
    preds = np.array([p[:, 1] for p in pred_probs]).T > 0.5
    f1 = f1_score(y_test, preds, average='micro')
    prec = precision_score(y_test, preds, average='micro')
    results.append({'Penalty': penalty, 'F1 Micro': f1, 'Precision': prec})

df_res = pd.DataFrame(results).sort_values(by='F1 Micro', ascending=False)
print("\n=== Scenario Ranking ===")
display(df_res)