In [1]:
import pickle
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split, cross_val_score

from sklearn.pipeline import make_pipeline
from sklearn_pandas import DataFrameMapper
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import r2_score


from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
poke = pd.read_csv('data/pokemon.csv')
df_train = pd.read_csv('data/train.csv')

df_train.sample()

Unnamed: 0,First_pokemon,Second_pokemon,Winner
423,65,135,135


In [3]:
df_train.columns = map(str.lower, df_train.columns)

In [4]:
#Cleaning Poke to pickle
poke.columns = map(str.lower, poke.columns)

poke = poke.fillna('None')
poke['legendary'].replace([False, True], [0,1], inplace=True)

In [5]:
df_train['winner'] = df_train['second_pokemon'] == df_train['winner']
df_train['winner'].replace([False, True], [0,1], inplace=True)

In [6]:
new_columns = []
for i in poke.columns:
    new_columns.append(i.replace(' ', '_'))
poke.columns = new_columns

In [7]:
poke.sample()

Unnamed: 0,#,name,type_1,type_2,hp,attack,defense,sp._atk,sp._def,speed,generation,legendary
124,136,Magmar,Fire,,65,95,57,100,85,93,1,0


In [8]:
pickle.dump(poke, open('poke.pkl', 'wb'))

In [9]:
#Merging the columns
first_df = df_train.merge(poke, how='left',left_on=['first_pokemon'], right_on = ['#'], suffixes=('_f', '_s'))
final_df = first_df.merge(poke, how='left',left_on=['second_pokemon'], right_on = ['#'],suffixes=('_f', '_s'))
df=final_df.drop(['#_f', '#_s'], axis=1)

In [10]:
df.sample()

Unnamed: 0,first_pokemon,second_pokemon,winner,name_f,type_1_f,type_2_f,hp_f,attack_f,defense_f,sp._atk_f,...,type_1_s,type_2_s,hp_s,attack_s,defense_s,sp._atk_s,sp._def_s,speed_s,generation_s,legendary_s
399,31,80,1,Pikachu,Electric,,35,55,40,50,...,Water,Poison,80,70,65,80,120,100,1,0


In [11]:
#Train Test Split
X = df.drop("winner", axis=1)
y = df.winner

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [12]:
#Mapper
mapper = DataFrameMapper([
   ('first_pokemon', None),
   ('second_pokemon', None),
   ('name_f', MultiLabelBinarizer()),
   ('type_1_f', MultiLabelBinarizer()),
   ('type_2_f', MultiLabelBinarizer()),
   (['hp_f'], StandardScaler()),
   (['attack_f'], StandardScaler()),
   (['defense_f'], StandardScaler()),
   (['sp._atk_f'], StandardScaler()),
   (['sp._def_f'], StandardScaler()),
   (['speed_f'], StandardScaler()),
   ('generation_f', None),
   ('legendary_f', None),
   ('name_s', MultiLabelBinarizer()),
   ('type_1_s', MultiLabelBinarizer()),
   ('type_2_s', MultiLabelBinarizer()),
   (['hp_s'], StandardScaler()),
   (['attack_s'], StandardScaler()),
   (['defense_s'], StandardScaler()),
   (['sp._atk_s'], StandardScaler()),
   (['sp._def_s'], StandardScaler()),
   (['speed_s'], StandardScaler()),
   ('generation_s', None),
   ('legendary_s', None)], df_out=True)

In [13]:
# Model
model = BaggingClassifier()

In [14]:
#Pipleline
pipe = make_pipeline(mapper, model)
pipe.fit(X_train, y_train)

# score the model
print(str(pipe.score(X_train, y_train)))
print(str(pipe.score(X_test, y_test)))

0.9923882017126546
0.8746438746438746


In [15]:
pickle.dump(pipe, open('pipe.pkl', 'wb'))