### Imports

In [None]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import plotly.graph_objs as go
from plotly.offline import iplot
import plotly.figure_factory as ff

### Dataset Preprocessing

In [106]:
stats_df = pd.read_csv("../data/pokemon.csv")

In [107]:
stats_df.head()
stats_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 13 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   #           800 non-null    int64 
 1   Name        800 non-null    object
 2   Type 1      800 non-null    object
 3   Type 2      414 non-null    object
 4   Total       800 non-null    int64 
 5   HP          800 non-null    int64 
 6   Attack      800 non-null    int64 
 7   Defense     800 non-null    int64 
 8   Sp. Atk     800 non-null    int64 
 9   Sp. Def     800 non-null    int64 
 10  Speed       800 non-null    int64 
 11  Generation  800 non-null    int64 
 12  Legendary   800 non-null    bool  
dtypes: bool(1), int64(9), object(3)
memory usage: 75.9+ KB


#### Handle missing values

In [109]:
stats_df.isna().sum()

#               0
Name            0
Type 1          0
Type 2        386
Total           0
HP              0
Attack          0
Defense         0
Sp. Atk         0
Sp. Def         0
Speed           0
Generation      0
Legendary       0
dtype: int64

In [102]:
stats_df['Type 2'] = stats_df['Type 2'].fillna("None")
stats_df.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [103]:
stats_df.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [104]:
types = pd.unique(stats_df[['Type 1', 'Type 2']].values.ravel())
print(types)

['Grass' 'Poison' 'Fire' 'None' 'Flying' 'Dragon' 'Water' 'Bug' 'Normal'
 'Electric' 'Ground' 'Fairy' 'Fighting' 'Psychic' 'Rock' 'Steel' 'Ice'
 'Ghost' 'Dark']


### Feature Engineering

In [None]:
# 2 = super effective, 0.5 = not very effective, 0 = immune, 1 = neutral
type_chart = {
    'Normal': {'Rock': 0.5, 'Ghost': 0, 'Steel': 0.5},
    'Fire': {'Grass': 2, 'Ice': 2, 'Bug': 2, 'Steel': 2,
             'Fire': 0.5, 'Water': 0.5, 'Rock': 0.5, 'Dragon': 0.5},
    'Water': {'Fire': 2, 'Ground': 2, 'Rock': 2,
              'Water': 0.5, 'Grass': 0.5, 'Dragon': 0.5},
    'Electric': {'Water': 2, 'Flying': 2,
                 'Electric': 0.5, 'Ground': 0, 'Dragon': 0.5},
    'Grass': {'Water': 2, 'Ground': 2, 'Rock': 2,
              'Fire': 0.5, 'Grass': 0.5, 'Poison': 0.5, 'Flying': 0.5,
              'Bug': 0.5, 'Dragon': 0.5, 'Steel': 0.5},
    'Ice': {'Grass': 2, 'Ground': 2, 'Flying': 2, 'Dragon': 2,
            'Fire': 0.5, 'Water': 0.5, 'Ice': 0.5, 'Steel': 0.5},
    'Fighting': {'Normal': 2, 'Ice': 2, 'Rock': 2, 'Dark': 2, 'Steel': 2,
                 'Poison': 0.5, 'Flying': 0.5, 'Psychic': 0.5, 'Bug': 0.5, 'Fairy': 0.5},
    'Poison': {'Grass': 2, 'Fairy': 2,
               'Poison': 0.5, 'Ground': 0.5, 'Rock': 0.5, 'Ghost': 0.5, 'Steel': 0},
    'Ground': {'Fire': 2, 'Electric': 2, 'Poison': 2, 'Rock': 2, 'Steel': 2,
               'Grass': 0.5, 'Bug': 0.5, 'Flying': 0},
    'Flying': {'Grass': 2, 'Fighting': 2, 'Bug': 2,
               'Electric': 0.5, 'Rock': 0.5, 'Steel': 0.5},
    'Psychic': {'Fighting': 2, 'Poison': 2,
                'Psychic': 0.5, 'Steel': 0.5, 'Dark': 0},
    'Bug': {'Grass': 2, 'Psychic': 2, 'Dark': 2,
            'Fire': 0.5, 'Fighting': 0.5, 'Poison': 0.5, 'Flying': 0.5,
            'Ghost': 0.5, 'Steel': 0.5, 'Fairy': 0.5},
    'Rock': {'Fire': 2, 'Ice': 2, 'Flying': 2, 'Bug': 2,
             'Fighting': 0.5, 'Ground': 0.5, 'Steel': 0.5},
    'Ghost': {'Psychic': 2, 'Ghost': 2,
              'Dark': 0.5, 'Normal': 0},
    'Dragon': {'Dragon': 2,
               'Steel': 0.5, 'Fairy': 0},
    'Dark': {'Psychic': 2, 'Ghost': 2,
             'Fighting': 0.5, 'Dark': 0.5, 'Fairy': 0.5},
    'Steel': {'Ice': 2, 'Rock': 2, 'Fairy': 2,
              'Fire': 0.5, 'Water': 0.5, 'Electric': 0.5, 'Steel': 0.5},
    'Fairy': {'Fighting': 2, 'Dragon': 2, 'Dark': 2,
              'Fire': 0.5, 'Poison': 0.5, 'Steel': 0.5}
}

In [120]:
types = list(type_chart.keys())
matrix = np.ones((len(types), len(types)))

for i, atk in enumerate(types):
    for j, defn in enumerate(types):
        matrix[i, j] = type_chart[atk].get(defn, 1)

fig = go.Figure(go.Heatmap(
    z=matrix,
    x=types,
    y=types,
    colorscale='Viridis'
))
fig.update_layout(
    title="Pokémon Type Effectiveness",
    xaxis_title="Defending Type",
    yaxis_title="Attacking Type"
)
fig.show()

#### Fuctions to calculate scores

In [None]:
def type_score(attacker, defender):
    atk_types = [attacker['Type 1'], attacker['Type 2']]
    def_types = [defender['Type 1'], defender['Type 2']]

    multiplier = 1.0
    for atk_type in atk_types:
        if atk_type is None: 
            continue
        for def_type in def_types:
            if def_type is None: 
                continue
            multiplier *= type_chart.get(atk_type, {}).get(def_type, 1)
    return multiplier


def battle_score(p1, p2):
    # Offensive power
    p1_attack = (p1['Attack'] + p1['Sp. Atk']) * type_score(p1, p2)
    p2_attack = (p2['Attack'] + p2['Sp. Atk']) * type_score(p2, p1)

    # Defensive resistance
    p1_defense = p1['Defense'] + p1['Sp. Def']
    p2_defense = p2['Defense'] + p2['Sp. Def']

    # Speed bonus)
    p1_speed = p1['Speed'] * 0.2
    p2_speed = p2['Speed'] * 0.2

    # Final scores
    p1_score = (p1_attack - p2_defense) + p1_speed
    p2_score = (p2_attack - p1_defense) + p2_speed

    return p1_score, p2_score


#### Battle Simulation

In [None]:
np.random.seed(42)
random.seed(42)

battles = []
for _ in range(10000):
    sampled = stats_df.sample(2)
    p1 = sampled.iloc[0]
    p2 = sampled.iloc[1]
    p1_score, p2_score = battle_score(p1, p2)
    battles.append({
        'p1_name': p1['Name'],
        'p2_name': p2['Name'],
        'p1_total': p1['Total'],
        'p2_total': p2['Total'],
        'p1_attack_vs_def': p1['Attack'] - p2['Defense'],
        'p2_attack_vs_def': p2['Attack'] - p1['Defense'],
        'p1_type_score': type_score(p1, p2),
        'p2_type_score': type_score(p2, p1),
        'p1_speed': p1['Speed'],
        'p2_speed': p2['Speed'],
        'winner': 1 if p1_score > p2_score else 0
    })

battle_df = pd.DataFrame(battles)

In [66]:
battle_df.head()

Unnamed: 0,p1_name,p2_name,p1_total,p2_total,p1_attack_vs_def,p2_attack_vs_def,p1_type_score,p2_type_score,p1_speed,p2_speed,winner
0,Hydreigon,Beheeyem,600,485,30,-15,2.0,0.0,98,40,1
1,Magmar,LopunnyMega Lopunny,495,580,1,79,1.0,1.0,93,135,0
2,Ledyba,Volcarona,265,550,-45,30,1.0,1.0,55,100,0
3,Shiftry,GengarMega Gengar,480,600,20,5,1.0,1.0,80,130,0
4,Starly,Miltank,245,490,-50,50,1.0,1.0,60,100,0


In [67]:
print(battle_df['winner'].value_counts())

winner
0    5055
1    4945
Name: count, dtype: int64


### Model Training

#### Train-Test Split

In [68]:
X = battle_df[['p1_total', 'p2_total', 'p1_attack_vs_def', 'p2_attack_vs_def',
        'p1_type_score', 'p2_type_score', 'p1_speed', 'p2_speed']]

y = battle_df['winner']   

In [69]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)

Train shape: (8000, 8)
Test shape: (2000, 8)


####  Train Decision Tree

In [70]:
model = LogisticRegression(random_state=42, max_iter=1000)  
model.fit(X_train, y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'lbfgs'
,max_iter,1000


#### Model Evaluation

In [110]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.936

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.93      0.94      1033
           1       0.93      0.94      0.93       967

    accuracy                           0.94      2000
   macro avg       0.94      0.94      0.94      2000
weighted avg       0.94      0.94      0.94      2000


Confusion Matrix:
 [[962  71]
 [ 57 910]]


In [None]:
cm = confusion_matrix(y_test, y_pred)
labels = np.unique(y_test)

fig = ff.create_annotated_heatmap(
    z=cm,
    x=labels.tolist(),
    y=labels.tolist(),
    colorscale="Viridis",
    showscale=True
)

fig.update_layout(title="Confusion Matrix", xaxis_title="Predicted", yaxis_title="Actual")
fig.show()


### Game

In [None]:
def get_pokemon_stats(pokemon_name, df):
    return df[df['Name'] == pokemon_name].iloc[0]

def build_features(p1, p2):
    feature_row = pd.DataFrame([{
        'p1_total': p1['Total'],
        'p2_total': p2['Total'],
        'p1_attack_vs_def': p1['Attack'] - p2['Defense'],
        'p2_attack_vs_def': p2['Attack'] - p1['Defense'],
        'p1_type_score': type_score(p1, p2),
        'p2_type_score': type_score(p2, p1),
        'p1_speed': p1['Speed'],
        'p2_speed': p2['Speed']
    }])
    return feature_row

def predict_win_probability(p1, p2, df, model):
    stats1 = get_pokemon_stats(p1, df)
    stats2 = get_pokemon_stats(p2, df)
    features = build_features(stats1, stats2)
    prob_p1_wins = model.predict_proba(features)[0][1]  # class 1 = p1 wins
    prob_p2_wins = model.predict_proba(features)[0][0]  # class 0 = p1 loses

    return prob_p1_wins, prob_p2_wins

def compare2pokemon(poke1, poke2, stats_df):
    x = stats_df[stats_df["Name"] == poke1].iloc[0]
    y = stats_df[stats_df["Name"] == poke2].iloc[0]
    
    categories = ['HP','Attack','Defense','Sp. Atk','Sp. Def','Speed']

    trace0 = go.Scatterpolar(
        r = [x[cat] for cat in categories] + [x['HP']],
        theta = categories + ['HP'],
        fill = 'toself',
        name = poke1
    )

    trace1 = go.Scatterpolar(
        r = [y[cat] for cat in categories] + [y['HP']],
        theta = categories + ['HP'],
        fill = 'toself',
        name = poke2
    )

    layout = go.Layout(
        polar=dict(
            radialaxis=dict(visible=True, range=[0, 200])
        ),
        showlegend=True,
        title=f"{poke1} vs {poke2}",
        width=700, 
        height=600    )

    fig = go.Figure(data=[trace0, trace1], layout=layout)
    iplot(fig, filename="Pokemon Comparison")


In [None]:
def plot_win_probs(team_probs, opponent_name):
    fig = go.Figure(go.Bar(
        x=list(team_probs.keys()),
        y=list(team_probs.values()),
        text=[f"{p:.2%}" for p in team_probs.values()],
        textposition="auto",
        marker_color="orange"
    ))

    fig.update_layout(
        title=f"Win Probabilities Against {opponent_name}",
        yaxis_title="Probability",
        xaxis_title="Pokémon"
    )
    fig.show()


In [None]:
def play_game(stats_df, model):
    print("Choose your team of 5 Pokémon:")
    team = []
    for i in range(5):
        poke = input(f"Enter Pokémon {i+1}: ")
        team.append(poke)
    
    all_pokemon = stats_df['Name'].tolist()
    opponent = random.choice(all_pokemon)
    print(f"\nOpponent Pokémon: {opponent}")
    
    fighter = input("Choose one Pokémon from your team to battle: ")
    
    probs = {}
    for poke in team:
        p1_prob, _ = predict_win_probability(poke, opponent, stats_df, model)
        probs[poke] = p1_prob
        
    chosen_prob = probs[fighter]
    print(f"\nChosen Pokémon: {fighter}")
    if chosen_prob >= 0.90:
        print("\nGreat choice! That’s an excellent counter.")
    elif chosen_prob >= 0.70:
        print("\nDecent choice! This Pokémon has good chances.")
    elif chosen_prob >= 0.40:
        print("\nRisky choice. Could go either way.")
    else:
        print("\nThat’s a really bad matchup.")
    
    ranked = sorted(probs.items(), key=lambda x: x[1], reverse=True)
    
    print("\n---------- Battle Results ----------")
    for rank, (poke, prob) in enumerate(ranked, 1):
        
        if prob >= 0.90:
            tier = "✅ Excellent pick"
        elif prob >= 0.70:
            tier = "⚡ Good pick"
        elif prob >= 0.40:
            tier = "🤔 Risky pick"
        else:
            tier = "❌ Bad pick"
        
        print(f"{rank}. {poke}: {prob:.2%}  → {tier}")
    
    print(f"\nComparing stats: {fighter} vs {opponent}")
    compare2pokemon(fighter, opponent, stats_df)
    plot_win_probs(probs, opponent)

In [118]:
play_game(stats_df,model)

Choose your team of 5 Pokémon:

Opponent Pokémon: Marshtomp

Chosen Pokémon: Bulbasaur

Great choice! That’s an excellent counter.

---------- Battle Results ----------
1. Bulbasaur: 99.82%  → ✅ Excellent pick
2. Pidgey: 46.57%  → 🤔 Risky pick
3. Squirtle: 38.74%  → ❌ Bad pick
4. Pikachu: 0.00%  → ❌ Bad pick
5. Charmander: 0.00%  → ❌ Bad pick

Comparing stats: Bulbasaur vs Marshtomp
