In [240]:
import pandas as pd
import numpy as np

In [241]:
df_pokemon = pd.read_csv('./pokemon.csv')

In [242]:
df_pokemon.columns

Index(['id', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk',
       'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')

In [243]:
df_pokemon = df_pokemon.rename(columns={
    'Name': 'name', 'Type 1': 'type_1', 'Type 2': 'type_2', 'HP': 'hp',
                           'Attack': 'attack', 'Defense': 'defense', 'Sp. Atk': 'sp_atk',
                           'Sp. Def': 'sp_def', 'Speed': 'speed',
                           'Generation': 'generation', 'Legendary': 'legendary'})

In [244]:
df_pokemon.columns

Index(['id', 'name', 'type_1', 'type_2', 'hp', 'attack', 'defense', 'sp_atk',
       'sp_def', 'speed', 'generation', 'legendary'],
      dtype='object')

In [245]:
to_remove = ['generation', 'legendary']
df_pokemon = df_pokemon.drop(columns=to_remove)

In [246]:
df_pokemon.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 10 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   id       800 non-null    int64 
 1   name     799 non-null    object
 2   type_1   800 non-null    object
 3   type_2   414 non-null    object
 4   hp       800 non-null    int64 
 5   attack   800 non-null    int64 
 6   defense  800 non-null    int64 
 7   sp_atk   800 non-null    int64 
 8   sp_def   800 non-null    int64 
 9   speed    800 non-null    int64 
dtypes: int64(7), object(3)
memory usage: 62.6+ KB


In [247]:
# %pip install plotly
# %pip install nbformat

In [248]:
def get_pokemon_stats(name):
    pokemon = df_pokemon[df_pokemon['name'] == name]
    return [pokemon['hp'].sum(), 
            pokemon['attack'].sum(), 
            pokemon['defense'].sum(), 
            pokemon['sp_atk'].sum(), 
            pokemon['sp_def'].sum(), 
            pokemon['speed'].sum()]

get_pokemon_stats('Pikachu')

[np.int64(35),
 np.int64(55),
 np.int64(40),
 np.int64(50),
 np.int64(50),
 np.int64(90)]

In [249]:
import plotly.express as px

def showStatsRadar(name):
    df = pd.DataFrame(dict(
    r=get_pokemon_stats(name),
    theta=['hp','attack','defense',
           'sp_atk', 'sp_def', 'speed']))
    fig = px.line_polar(df, r='r', theta='theta', line_close=True)
    fig.update_traces(fill='toself')
    fig.show()

In [250]:
showStatsRadar('Tepig')
showStatsRadar('Squirtle')


In [251]:
combats = pd.read_csv('combats.csv')
combats.head(10)

Unnamed: 0,First_pokemon,Second_pokemon,Winner
0,266,298,298
1,702,701,701
2,191,668,668
3,237,683,683
4,151,231,151
5,657,752,657
6,192,134,134
7,73,545,545
8,220,763,763
9,302,31,31


In [252]:
combats = combats.merge(df_pokemon, how='left', left_on='First_pokemon', right_on='id', suffixes=('', '_first'))

combats = combats.merge(df_pokemon, left_on='Second_pokemon', right_on='id', how='left', suffixes=('_first', '_second'))


for feature in ['hp', 'attack', 'defense', 'sp_atk', 'sp_def', 'speed']:
    combats[f'{feature}_diff'] = combats[f'{feature}_first'] - combats[f'{feature}_second']


In [253]:
combats.columns

Index(['First_pokemon', 'Second_pokemon', 'Winner', 'id_first', 'name_first',
       'type_1_first', 'type_2_first', 'hp_first', 'attack_first',
       'defense_first', 'sp_atk_first', 'sp_def_first', 'speed_first',
       'id_second', 'name_second', 'type_1_second', 'type_2_second',
       'hp_second', 'attack_second', 'defense_second', 'sp_atk_second',
       'sp_def_second', 'speed_second', 'hp_diff', 'attack_diff',
       'defense_diff', 'sp_atk_diff', 'sp_def_diff', 'speed_diff'],
      dtype='object')

In [254]:
combats.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 29 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   First_pokemon   50000 non-null  int64 
 1   Second_pokemon  50000 non-null  int64 
 2   Winner          50000 non-null  int64 
 3   id_first        50000 non-null  int64 
 4   name_first      49944 non-null  object
 5   type_1_first    50000 non-null  object
 6   type_2_first    25969 non-null  object
 7   hp_first        50000 non-null  int64 
 8   attack_first    50000 non-null  int64 
 9   defense_first   50000 non-null  int64 
 10  sp_atk_first    50000 non-null  int64 
 11  sp_def_first    50000 non-null  int64 
 12  speed_first     50000 non-null  int64 
 13  id_second       50000 non-null  int64 
 14  name_second     49948 non-null  object
 15  type_1_second   50000 non-null  object
 16  type_2_second   26015 non-null  object
 17  hp_second       50000 non-null  int64 
 18  attack

In [255]:
def standartScaler(x: np.ndarray) -> np.ndarray:
    
    mean = np.mean(x, axis=0)
    deviation = np.std(x, axis=0)
    
    res = (x - mean) / deviation

    return res

In [256]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import StandardScaler
from collections import Counter

X = combats[['hp_diff', 'attack_diff', 'defense_diff', 'sp_atk_diff', 'sp_def_diff', 'speed_diff']]
y = combats['Winner']

y = combats.apply(lambda row: 1 if row['Winner'] == row['First_pokemon'] else 0, axis=1)

print("Class distribution in target variable:", Counter(y))

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# X_scaled = standartScaler(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators = 100, random_state = 42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy: float = accuracy_score(y_test, y_pred)
precision: float = precision_score(y_test, y_pred, zero_division=1)
recall: float = recall_score(y_test, y_pred, zero_division=1)
f1: float = f1_score(y_test, y_pred, zero_division=1)
roc_auc: float = roc_auc_score(y_test, y_pred)

print(f'Accuracy: {accuracy * 100}')
print(f'Precision: {precision * 100}')
print(f'Recall: {recall * 100}')
print(f'F1 Score: {f1 * 100}')
print(f'ROC AUC Score: {roc_auc * 100}')

Class distribution in target variable: Counter({0: 26399, 1: 23601})
Accuracy: 95.0
Precision: 94.10910599460692
Recall: 95.4555017883442
F1 Score: 94.77752245665344
ROC AUC Score: 95.02144252748637


In [257]:
test = pd.read_csv('tests.csv')

In [258]:
test = test.merge(df_pokemon, how='left', left_on='First_pokemon', right_on='id', suffixes=('', '_first'))

test = test.merge(df_pokemon, left_on='Second_pokemon', right_on='id', how='left', suffixes=('_first', '_second'))


for feature in ['hp', 'attack', 'defense', 'sp_atk', 'sp_def', 'speed']:
    test[f'{feature}_diff'] = test[f'{feature}_first'] - test[f'{feature}_second']


In [259]:
X_test_final = scaler.transform(test[['hp_diff', 'attack_diff', 'defense_diff', 'sp_atk_diff', 'sp_def_diff', 'speed_diff']])

test['Prediction'] = model.predict(X_test_final)

test['Winning_Pokemon'] = test.apply(lambda row: row['First_pokemon'] if row['Prediction'] == 1 else row['Second_pokemon'], axis=1)

test[['First_pokemon', 'Second_pokemon', 'Winning_Pokemon']].head(10)

Unnamed: 0,First_pokemon,Second_pokemon,Winning_Pokemon
0,656,392,392
1,129,117,129
2,660,211,211
3,706,115,706
4,195,618,618
5,27,656,27
6,126,222,126
7,436,207,436
8,121,36,36
9,169,636,169
