In [6]:
import numpy as np
import pandas as pd
import tensorflow as tf 

from sklearn.ensemble import GradientBoostingClassifier

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import os
print(os.listdir("input/"))

['.DS_Store', 'battles.csv', 'test.csv', 'pokemon.csv', 'typetable.csv']


In [7]:
import re
to_underscore = lambda x: re.sub("[^0-9a-zA-Z#]+", "_", x.lower())

In [8]:
# Retrieve the data
pokemon = pd.read_csv('input/pokemon.csv').rename(to_underscore, axis='columns').fillna("None")
battles = pd.read_csv('input/battles.csv').rename(to_underscore, axis='columns').fillna("None")
test = pd.read_csv('input/test.csv').rename(to_underscore, axis='columns').fillna("None")
typetable = pd.read_csv('input/typetable.csv')

# Show data structure
print("pokemon", pokemon.shape)
print("battles", battles.shape)
print("test", test.shape)
print("typetable", typetable.shape)

pokemon (800, 12)
battles (50000, 4)
test (10000, 3)
typetable (18, 19)


In [9]:
# Show pokemon file content
pokemon = pokemon.drop(labels=["name"], axis=1)
pokemon = pokemon.fillna({'Type 2': 'None'})
pokemon.head()

Unnamed: 0,#,type_1,type_2,hp,attack,defense,sp_atk,sp_def,speed,generation,legendary
0,1,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Fire,,39,52,43,60,50,65,1,False


In [10]:
# Normalize columns with continious values
columns = ['attack', 'defense', 'hp', 'speed', 'sp_atk', 'sp_def']
pokemon[columns] = MinMaxScaler().fit_transform(pokemon[columns])
pokemon.head()

Unnamed: 0,#,type_1,type_2,hp,attack,defense,sp_atk,sp_def,speed,generation,legendary
0,1,Grass,Poison,0.173228,0.237838,0.195556,0.298913,0.214286,0.228571,1,False
1,2,Grass,Poison,0.232283,0.308108,0.257778,0.380435,0.285714,0.314286,1,False
2,3,Grass,Poison,0.311024,0.416216,0.346667,0.48913,0.380952,0.428571,1,False
3,4,Grass,Poison,0.311024,0.513514,0.524444,0.608696,0.47619,0.428571,1,False
4,5,Fire,,0.149606,0.254054,0.168889,0.271739,0.142857,0.342857,1,False


In [11]:
# Change boolean type to 0 & 1 values
pokemon["legendary"] = pokemon["legendary"].astype(int)
pokemon.head()

Unnamed: 0,#,type_1,type_2,hp,attack,defense,sp_atk,sp_def,speed,generation,legendary
0,1,Grass,Poison,0.173228,0.237838,0.195556,0.298913,0.214286,0.228571,1,0
1,2,Grass,Poison,0.232283,0.308108,0.257778,0.380435,0.285714,0.314286,1,0
2,3,Grass,Poison,0.311024,0.416216,0.346667,0.48913,0.380952,0.428571,1,0
3,4,Grass,Poison,0.311024,0.513514,0.524444,0.608696,0.47619,0.428571,1,0
4,5,Fire,,0.149606,0.254054,0.168889,0.271739,0.142857,0.342857,1,0


In [12]:
# Create new multiplier to get the type effectivity between pokemons
vals = []

for c1 in typetable.columns[1:]:
    
    # Values when the defensor just have one type
    vals.append(pd.DataFrame({
        "idx": typetable["atck"].map(lambda x: "%s-vs-%s-None" % (x, c1)), #.rename(None)
        "mul": typetable[c1],
    }))
    
    # Values when the defensor have two types
    for c2 in typetable.columns[1:]:
        vals.append(pd.DataFrame({
            "idx": typetable["atck"].map(lambda x: "%s-vs-%s-%s" % (x, c1, c2)), #.rename(None)
            "mul": typetable[c1] * typetable[c2],
        }))
        
# Transform dataset to dicctionary 
mult = pd.concat(vals).reset_index().drop(["index"], axis=1)
mult = dict(zip(mult.values[:,0], mult.values[:,1]))

def multiplier(cat):
    return mult.get(cat, 0)

mult

{'Normal-vs-Normal-None': 1.0,
 'Fighting-vs-Normal-None': 2.0,
 'Flying-vs-Normal-None': 1.0,
 'Poison-vs-Normal-None': 1.0,
 'Ground-vs-Normal-None': 1.0,
 'Rock-vs-Normal-None': 1.0,
 'Bug-vs-Normal-None': 1.0,
 'Ghost-vs-Normal-None': 0.0,
 'Steel-vs-Normal-None': 1.0,
 'Fire-vs-Normal-None': 1.0,
 'Water-vs-Normal-None': 1.0,
 'Grass-vs-Normal-None': 1.0,
 'Electric-vs-Normal-None': 1.0,
 'Psychic-vs-Normal-None': 1.0,
 'Ice-vs-Normal-None': 1.0,
 'Dragon-vs-Normal-None': 1.0,
 'Dark-vs-Normal-None': 1.0,
 'Fairy-vs-Normal-None': 1.0,
 'Normal-vs-Normal-Normal': 1.0,
 'Fighting-vs-Normal-Normal': 4.0,
 'Flying-vs-Normal-Normal': 1.0,
 'Poison-vs-Normal-Normal': 1.0,
 'Ground-vs-Normal-Normal': 1.0,
 'Rock-vs-Normal-Normal': 1.0,
 'Bug-vs-Normal-Normal': 1.0,
 'Ghost-vs-Normal-Normal': 0.0,
 'Steel-vs-Normal-Normal': 1.0,
 'Fire-vs-Normal-Normal': 1.0,
 'Water-vs-Normal-Normal': 1.0,
 'Grass-vs-Normal-Normal': 1.0,
 'Electric-vs-Normal-Normal': 1.0,
 'Psychic-vs-Normal-Normal': 1.0

In [13]:
# Merge between battles & pokemon, also calculate types effectivity values
def merge_data(battles):
    
    battles = battles \
        .merge(pokemon.rename(lambda x: "f_%s" % x, axis="columns"), left_on="first_pokemon", right_on="f_#") \
        .merge(pokemon.rename(lambda x: "s_%s" % x, axis="columns"), left_on="second_pokemon", right_on="s_#") \

    battles["f_t1"] = (battles["f_type_1"] + "-vs-" + battles["s_type_1"] + "-" + battles["s_type_2"]).map(multiplier)
    battles["f_t2"] = (battles["f_type_2"] + "-vs-" + battles["s_type_1"] + "-" + battles["s_type_2"]).map(multiplier)
    battles["s_t1"] = (battles["s_type_1"] + "-vs-" + battles["f_type_1"] + "-" + battles["f_type_2"]).map(multiplier)
    battles["s_t2"] = (battles["s_type_2"] + "-vs-" + battles["f_type_1"] + "-" + battles["f_type_2"]).map(multiplier)
    
    battles["speed"] = (battles["f_speed"] - battles["s_speed"])
    
    # Delete original values
    battles = battles\
        .sort_values(['battle_number']) \
        .reset_index() \
        .drop(["index","battle_number", "first_pokemon", "second_pokemon", "f_#", "s_#"], axis=1)
    return battles

print(battles.head())
train = merge_data(battles)
train.head()

   battle_number  first_pokemon  second_pokemon  winner
0              0            266             298       1
1              1            702             701       1
2              2            191             668       1
3              3            237             683       1
4              4            151             231       0


Unnamed: 0,winner,f_type_1,f_type_2,f_hp,f_attack,f_defense,f_sp_atk,f_sp_def,f_speed,f_generation,...,s_sp_atk,s_sp_def,s_speed,s_generation,s_legendary,f_t1,f_t2,s_t1,s_t2,speed
0,1,Rock,Ground,0.192913,0.318919,0.2,0.190217,0.142857,0.205714,2,...,0.271739,0.095238,0.314286,3,0,1.0,0.5,4.0,1.0,-0.108571
1,1,Grass,Fighting,0.354331,0.459459,0.297778,0.434783,0.519048,0.588571,5,...,0.336957,0.333333,0.588571,5,1,2.0,2.0,0.5,1.0,0.0
2,1,Fairy,Flying,0.212598,0.189189,0.355556,0.380435,0.404762,0.2,2,...,0.625,0.357143,0.2,5,0,1.0,1.0,1.0,0.0,0.0
3,1,Fire,,0.153543,0.189189,0.155556,0.326087,0.095238,0.085714,2,...,0.271739,0.333333,0.245714,5,0,0.5,0.0,1.0,0.0,-0.16
4,0,Rock,Water,0.271654,0.297297,0.533333,0.570652,0.238095,0.285714,1,...,0.0,1.0,0.0,2,0,2.0,2.0,1.0,1.0,0.285714


In [14]:
# Transform pokemon type to dummy values
train = pd.get_dummies(train)

In [15]:
# Transform data to get train set 
Y_train = train["winner"]
X_train = train.drop(labels=["winner"],axis=1)

print(X_train.shape)
print(Y_train.shape)

(50000, 95)
(50000,)


In [16]:
# Separate features and labels on training set
X_train, X_val, y_train, y_val = train_test_split(X_train, Y_train, test_size=0.1, random_state = 3)

In [17]:
model = GradientBoostingClassifier(learning_rate=0.01,
                                   min_samples_split=3,
                                   max_depth=10,
                                   n_estimators=2000,
                                   random_state=2)

In [18]:
model.fit(X_train, y_train)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.01, loss='deviance', max_depth=10,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=3,
              min_weight_fraction_leaf=0.0, n_estimators=2000,
              presort='auto', random_state=2, subsample=1.0, verbose=0,
              warm_start=False)

In [14]:
y_val = np.array(y_val, dtype='int32')
y_val

array([0, 1, 0, ..., 1, 0, 1], dtype=int32)

In [15]:
# Predict values
predictions = model.predict(X_val)
res = model.score(X_val, y_val)
predictions

array([0, 1, 0, ..., 0, 0, 1])

In [16]:
res

0.9814

In [17]:
# Prepare test data
X_test = merge_data(test)
X_test = pd.get_dummies(X_test)
X_test.head()

Unnamed: 0,f_hp,f_attack,f_defense,f_sp_atk,f_sp_def,f_speed,f_generation,f_legendary,s_hp,s_attack,s_defense,s_sp_atk,s_sp_def,s_speed,s_generation,s_legendary,f_t1,f_t2,s_t1,s_t2,speed,f_type_1_Bug,f_type_1_Dark,f_type_1_Dragon,f_type_1_Electric,f_type_1_Fairy,f_type_1_Fighting,f_type_1_Fire,f_type_1_Flying,f_type_1_Ghost,f_type_1_Grass,f_type_1_Ground,f_type_1_Ice,f_type_1_Normal,f_type_1_Poison,f_type_1_Psychic,f_type_1_Rock,f_type_1_Steel,f_type_1_Water,f_type_2_Bug,...,f_type_2_Rock,f_type_2_Steel,f_type_2_Water,s_type_1_Bug,s_type_1_Dark,s_type_1_Dragon,s_type_1_Electric,s_type_1_Fairy,s_type_1_Fighting,s_type_1_Fire,s_type_1_Flying,s_type_1_Ghost,s_type_1_Grass,s_type_1_Ground,s_type_1_Ice,s_type_1_Normal,s_type_1_Poison,s_type_1_Psychic,s_type_1_Rock,s_type_1_Steel,s_type_1_Water,s_type_2_Bug,s_type_2_Dark,s_type_2_Dragon,s_type_2_Electric,s_type_2_Fairy,s_type_2_Fighting,s_type_2_Fire,s_type_2_Flying,s_type_2_Ghost,s_type_2_Grass,s_type_2_Ground,s_type_2_Ice,s_type_2_None,s_type_2_Normal,s_type_2_Poison,s_type_2_Psychic,s_type_2_Rock,s_type_2_Steel,s_type_2_Water
0,0.311024,0.47027,0.266667,0.298913,0.285714,0.36,1,0,0.350394,0.27027,0.311111,0.271739,0.261905,0.142857,1,0,1.0,0.0,1.0,0.0,0.217143,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
1,0.287402,0.481081,0.56,0.23913,0.457143,0.085714,5,0,0.370079,0.432432,0.355556,0.298913,0.214286,0.171429,2,0,4.0,0.5,0.5,1.0,-0.085714,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
2,0.307087,0.540541,0.288889,0.733696,0.285714,0.548571,5,1,0.192913,0.621622,0.213333,0.13587,0.428571,0.468571,1,0,1.0,2.0,0.5,0.0,0.08,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3,0.271654,0.27027,0.222222,0.380435,0.190476,0.228571,2,0,0.291339,0.437838,0.275556,0.521739,0.22381,0.314286,5,0,0.5,0.0,1.0,0.0,-0.085714,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4,0.153543,0.297297,0.111111,0.11413,0.052381,0.371429,1,0,0.645669,0.378378,0.333333,0.163043,0.119048,0.342857,5,0,1.0,1.0,1.0,0.0,0.028571,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0


In [18]:
predictions_test = model.predict(X_test)
predictions_test
result = np.squeeze(np.around(predictions_test).astype(int))
result

array([0, 1, 0, ..., 0, 0, 1])

In [19]:
data_to_submit = pd.DataFrame(columns=['Winner'])
data_to_submit['Winner'] = result
data_to_submit.insert(0, 'battle_number', range(0, len(data_to_submit)))
data_to_submit
data_to_submit.to_csv('submission.csv', index = False)
data_to_submit

Unnamed: 0,battle_number,Winner
0,0,0
1,1,1
2,2,0
3,3,1
4,4,0
5,5,0
6,6,0
7,7,1
8,8,0
9,9,0
