In [16]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import plotly.express as px
import itertools
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import roc_curve, roc_auc_score
import plotly.graph_objects as go
import plotly.figure_factory as ff



In [None]:
data = pd.read_csv('../data/pokemon_data_cleaned.csv')

df = data.copy()

df = pd.DataFrame(df)

df.head()

In [18]:
# pikachu = df[(df['name'] == 'Pikachu')]


In [19]:
def get_first_attacker(p1, p2):
    if p1['speed'] > p2['speed']:
        return p1, p2
    elif p2['speed'] > p1['speed']:
        return p2, p1
    else:
        attacker = np.random.choice([p1, p2])
        defender = p2 if attacker == p1 else p1
        return attacker, defender

# En attack räknas ut såhär:
# (attack / defense) * effectiveness * random_factor
def calculate_attack_damage(attacker, defender, effectiveness):
    random_factor = np.random.uniform(0.85, 1.0)
    attack = attacker['attack']
    defense = defender['defense']
    
    damage = (attack / defense) * effectiveness * random_factor
    return damage

# Räknar ut skadan
def perform_attack(attacker, defender):
    attacker_type = attacker['primary_type']
    effectiveness = defender.get(f'{attacker_type.lower()}_attack_effectiveness', 1)
    
    damage = calculate_attack_damage(attacker, defender, effectiveness)
    defender['hp'] -= damage
    return defender['hp'] <= 0, damage 

def handle_round(attacker, defender):
  if perform_attack(attacker, defender):
      return attacker, defender
  if perform_attack(defender, attacker):
      return defender, attacker
  return None, None

def simulate_battle(p1, p2):
    p1['hp'] = p1['original_hp']
    p2['hp'] = p2['original_hp']
    
    attacker, defender = get_first_attacker(p1, p2)
    while True:
        winner, loser = handle_round(attacker, defender)
        
        if winner:
            return winner, loser


In [20]:
n_per_type = 7

# Slumpa fram n_pokemon från hela dataramen
sample_pokemon = df.groupby('primary_type').apply(lambda x: x.sample(n=n_per_type, random_state=42)).reset_index(drop=True)

# Skapa alla möjliga par av de slumpade Pokémon
pokemon_pairs = list(itertools.combinations(sample_pokemon.to_dict(orient='records'), 2))

In [None]:
print(f'Number of Battles: {len(pokemon_pairs)}')

In [22]:
try:
    battle_results = []
    battle_id = 1 

    for p1, p2 in pokemon_pairs:
        winner, loser = simulate_battle(p1, p2)
        
        winner_effektiveness = winner.get(f'{loser["primary_type"].lower()}_attack_effectiveness', 1)
        loser_effektiveness = loser.get(f'{winner["primary_type"].lower()}_attack_effectiveness', 1)

        battle_results.append ({
            'battle_id': battle_id,
            'name': winner['name'],
            'pokemon_id': winner['pokedex_number'],
            'type': winner['primary_type'],
            'primary_type': winner['primary_type'],
            'pokemon_class': winner['pokemon_class'],
            'attack': winner['attack'],
            'defense': winner['defense'],
            'speed': winner['speed'],
            'hp': winner['original_hp'],
            'effectiveness': winner_effektiveness,
            'opponent_id': loser['pokedex_number'],
            'opponent_name': loser['name'],
            'opponent_primary_type': loser['primary_type'],
            'attack_first': winner['speed'] > loser['speed'], 
            'is_winner': True,
        })
        battle_results.append({
            'battle_id': battle_id,
            'name': loser['name'],
            'pokemon_id': loser['pokedex_number'],
            'type': loser['primary_type'],
            'primary_type': loser['primary_type'],
            'pokemon_class': loser['pokemon_class'],
            'attack': loser['attack'],
            'defense': loser['defense'],
            'speed': loser['speed'],
            'hp': loser['original_hp'],
            'effectiveness': loser_effektiveness,
            'opponent_id': winner['pokedex_number'],
            'opponent_name': winner['name'],
            'opponent_primary_type': winner['primary_type'],
            'attack_first': loser['speed'] > winner['speed'],
            'is_winner': False
        })
        battle_id += 1

except Exception as e:
    print(f"An error occurred during battle {battle_id} between {p1['name']} and {p2['name']}: {e}")


In [23]:
df_battles = pd.DataFrame(battle_results)
# df_battles.dtypes

In [24]:
encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)

encoder.fit(df_battles[['type', 'pokemon_class']])

df_encoded = encoder.transform(df_battles[['type', 'pokemon_class']])

df_encoded = pd.DataFrame(df_encoded, columns=encoder.get_feature_names_out(['type', 'pokemon_class']))

df_encoded = pd.concat([df_battles.drop(columns=['type', 'pokemon_class']), df_encoded], axis=1)

In [None]:
df_encoded['name'].value_counts()

## XGBoost

In [None]:
xgboost_model = joblib.load('../models/xgboost_model.pkl')
cols_to_use = ['attack', 'defense', 'speed', 'hp', 'effectiveness','attack_first', 'pokemon_class_Legendary', 'pokemon_class_Mythical', 'pokemon_class_Normal']

X = df_encoded[cols_to_use]

prediction = xgboost_model.predict(X)

prediction

In [None]:
y_true = df_encoded['is_winner']
y_prob = xgboost_model.predict_proba(X)[:, 1]

fpr, tpr, thresholds = roc_curve(y_true, y_prob)
auc_score = roc_auc_score(y_true, y_prob)

print(f'AUC Score: {auc_score}')


In [None]:
fig = ff.create_distplot([y_true, prediction], group_labels=['True Values', 'Predicted Values'], colors=['green', 'red'], show_hist=False)

fig.update_layout(
    title='Distribution av True och Predicted Values',
    xaxis_title='True (0) - False (1)',
    yaxis_title='Density',
)


fig.show()

## Random Forest

In [None]:
rf_model = joblib.load('../models/rf_best_model.pkl')
cols_to_use = ['attack', 'defense', 'speed', 'hp', 'effectiveness','attack_first']

X = df_encoded[cols_to_use]
rf_pred = rf_model.predict(X)

rf_pred

y_true = df_encoded['is_winner']
y_prob = rf_model.predict_proba(X)[:, 1]

fpr, tpr, thresholds = roc_curve(y_true, y_prob)
auc_score = roc_auc_score(y_true, y_prob)

print(f'AUC Score: {auc_score}')