In [5]:
# Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [6]:
# Load Data
pokemon_df = pd.read_csv("pokemon_data.csv")

pokemon_df = pokemon_df[
    [
        "name",
        "type1",
        "type2",
        "hp",
        "attack",
        "defense",
        "sp_attack",
        "sp_defense",
        "speed",
        "is_legendary",
        "against_bug",
        "against_dark",
        "against_dragon",
        "against_electric",
        "against_fairy",
        "against_fight",
        "against_fire",
        "against_flying",
        "against_ghost",
        "against_grass",
        "against_ground",
        "against_ice",
        "against_normal",
        "against_poison",
        "against_psychic",
        "against_rock",
        "against_steel",
        "against_water"
    ]
]

In [7]:
# Get Type Multiplier
def get_type_multiplier(attacker_type, defender):
    column_name = f"against_{attacker_type}"
    if column_name in defender:
        return defender[column_name]
    return 1.0

In [8]:
# Generate Battle Score
def battle_score(attacker, defender):
    attack_power = attacker["attack"] + attacker["sp_attack"]
    defense_power = attacker["defense"] + attacker["sp_defense"]
    mult1 = get_type_multiplier(attacker["type1"], defender)
    mult2 = get_type_multiplier(attacker["type2"], defender) if pd.notna(defender["type2"]) else 1.0
    total_multiplier = mult1 * mult2
    score = (attack_power * total_multiplier + defense_power + attacker["speed"] * 0.5)
    return score

In [9]:
# Generate Features for Model
def create_features(p1, p2):
    return {
        "attack_diff": p1["attack"] - p2["attack"],
        "defense_diff": p1["defense"] - p2["defense"],
        "spatk_diff": p1["sp_attack"] - p2["sp_attack"],
        "spdef_diff": p1["sp_defense"] - p2["sp_defense"],
        "speed_diff": p1["speed"] - p2["speed"],
        "legendary_diff": int(p1["is_legendary"]) - int(p2["is_legendary"]),
    }

In [10]:
# Generate Battle Data
battle_rows = []
NUM_BATTLES = 3000

for _ in range(NUM_BATTLES):
    p1, p2 = pokemon_df.sample(2).to_dict("records")
    score1 = battle_score(p1, p2)
    score2 = battle_score(p2, p1)
    winner = 1 if score1 > score2 else 0
    features = create_features(p1, p2)
    features["winner"] = winner
    battle_rows.append(features)

battle_df = pd.DataFrame(battle_rows)

In [None]:
# Test/Train Split
X = battle_df.drop("winner", axis=1)
y = battle_df["winner"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Train Model
model = LogisticRegression(max_iter = 1000)
model.fit(X_train, y_train)

# Evaluate Model
preds = model.predict(X_test)
accuracy = accuracy_score(y_test, preds)
formatted_accuracy = f"{(accuracy * 100):.2f}%"

print("Model Accuracy:", formatted_accuracy)

Model Accuracy: 80.33%


In [12]:
# Predict Custom Battles
def predict_battle(pokemon1_name, pokemon2_name):
    p1 = pokemon_df[pokemon_df["name"] == pokemon1_name].iloc[0]
    p2 = pokemon_df[pokemon_df["name"] == pokemon2_name].iloc[0]

    features = create_features(p1, p2)
    X_input = pd.DataFrame([features])

    win_prob = model.predict_proba(X_input)[0][1]
    formatted_win_prob = f"{(win_prob * 100):.2f}%"

    print(f"Probability of {pokemon1_name} winning against {pokemon2_name}: {formatted_win_prob}")
    
# Example Prediction
predict_battle("Charizard", "Venusaur")

Probability of Charizard winning against Venusaur: 44.25%
