# Predict Pokemon battles using Machine Learning

Import libraries

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

Import Datasets

In [4]:
pokemon_df = pd.read_csv("pokemon.csv")
combats_df = pd.read_csv("combats.csv")
tests_df = pd.read_csv("tests.csv")
combined_df = pokemon_df

# __`pokemon_df`__

In [3]:
pokemon_df.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


#### Print shape of pokemon_df

In [4]:
pokemon_df.shape

(800, 12)

# __`combats_df`__

In [5]:
combats_df.head()

Unnamed: 0,First_pokemon,Second_pokemon,Winner
0,266,298,298
1,702,701,701
2,191,668,668
3,237,683,683
4,151,231,151


#### Print shape of combats_df

In [6]:
combats_df.shape

(50000, 3)

In [7]:
combined_df.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


In [8]:
combats_df.head()

Unnamed: 0,First_pokemon,Second_pokemon,Winner
0,266,298,298
1,702,701,701
2,191,668,668
3,237,683,683
4,151,231,151


In [9]:
combats_df['First_pokemon'].head()

0    266
1    702
2    191
3    237
4    151
Name: First_pokemon, dtype: int64

In [10]:
combats_df['First_pokemon'][0]

266

In [11]:
combined_df[combined_df['#'] == 1]['Name']


0    Bulbasaur
Name: Name, dtype: object

In [12]:
combined_df[combined_df['#'] == combats_df['First_pokemon'][0]]['Name']


265    Larvitar
Name: Name, dtype: object

In [13]:
print(combats_df['First_pokemon'].head())
print(combats_df['Second_pokemon'].head())
print(combats_df['Winner'].head())
lost_to_df = lambda x: combats[ (combats["First_pokemon"] == x) & (combats["Winner"] != x) ]
win_to_df = lambda x: combats[ (combats["First_pokemon"] == x) & (combats["Winner"] == x) ]
print(lost_to_df)
print(win_to_df)

0    266
1    702
2    191
3    237
4    151
Name: First_pokemon, dtype: int64
0    298
1    701
2    668
3    683
4    231
Name: Second_pokemon, dtype: int64
0    298
1    701
2    668
3    683
4    151
Name: Winner, dtype: int64
<function <lambda> at 0x10edee9d8>
<function <lambda> at 0x10edee400>


In [14]:
features = ['#', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']
combined_df = combined_df[features]
combined_df.head()



Unnamed: 0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,1,Grass,Poison,45,49,49,65,65,45
1,2,Grass,Poison,60,62,63,80,80,60
2,3,Grass,Poison,80,82,83,100,100,80
3,4,Grass,Poison,80,100,123,122,120,80
4,5,Fire,,39,52,43,60,50,65


In [15]:
combats_df = combats_df.rename(index= str, columns= {'First_pokemon': '#', 'Second_pokemon': 'Fought'})
combats_df.head()
print(type(combats_df['#'][0]))

<class 'numpy.int64'>


In [16]:
combined_df = pd.merge(combined_df[features], combats_df, on = '#', how = 'left')
print(combined_df.shape)

combined_df = combined_df.where((pd.notnull(combined_df)), -1)
fought_to_int = combined_df['Fought'].map(lambda x: np.int64(x))
winner_to_int = combined_df['Winner'].map(lambda x: np.int64(x))
type_to_none = combined_df['Type 2'].map(lambda x: None if x == -1 else x)


#print(combined_df.head())
combined_df['Fought'] = fought_to_int
combined_df['Winner'] = winner_to_int
combined_df['Type 2'] = type_to_none

print(combined_df.shape)
combined_df.head()

(50016, 11)
(50016, 11)


Unnamed: 0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Fought,Winner
0,1,Grass,Poison,45,49,49,65,65,45,679,679
1,1,Grass,Poison,45,49,49,65,65,45,687,687
2,1,Grass,Poison,45,49,49,65,65,45,557,557
3,1,Grass,Poison,45,49,49,65,65,45,766,766
4,1,Grass,Poison,45,49,49,65,65,45,153,153


In [None]:
final_features = ['#', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Fought']
y = combined_df['Winner']
x = combined_df[final_features]


print(x.head())
print(y.head())


   # Type 1  Type 2  HP  Attack  Defense  Sp. Atk  Sp. Def  Speed  Fought
0  1  Grass  Poison  45      49       49       65       65     45     679
1  1  Grass  Poison  45      49       49       65       65     45     687
2  1  Grass  Poison  45      49       49       65       65     45     557
3  1  Grass  Poison  45      49       49       65       65     45     766
4  1  Grass  Poison  45      49       49       65       65     45     153
0    679
1    687
2    557
3    766
4    153
Name: Winner, dtype: int64


In [None]:
x_ohe = pd.get_dummies(x, columns = ['#', 'Type 1', 'Type 2', 'Fought'])
print(x_ohe.head())
x_ohe.shape

In [None]:
x_ohe_train, x_ohe_test, y_train, y_test = train_test_split(x_ohe, y, test_size = 0.33)


In [None]:
knn = KNeighborsClassifier(n_jobs = -1)

knn.fit(x_ohe_train, y_train)
y_predict = knn.predict(x_ohe_test)
accuracy = accuracy_score(y_test, y_predict)
print("Knn: ", accuracy)


In [None]:
decisiontree = DecisionTreeClassifier()
decisiontree.fit(x_ohe_train,y_train)
y_predict = decisiontree.predict(x_ohe_test)
accuracy = accuracy_score(y_test, y_predict)
print("Decision Tree: ", accuracy)

In [None]:
random_forest = RandomForestClassifier(n_jobs = -1)
random_forest.fit(x_ohe_train, y_train)
y_predict = random_forest.predict(x_ohe_test)
accuracy = accuracy_score(y_test, y_predict)
print("Random Forest: ", accuracy)

In [None]:
logreg = LogisticRegression()
logreg.fit(x_ohe_train,y_train)
y_predict = logreg.predict(x_ohe_test)
accuracy = accuracy_score(y_test, y_predict)
print("Log Reg: ", accuracy)


In [None]:
naive = GaussianNB()
naive.fit(x_ohe_train, y_train)
acc_test_gaussiannb = naive.score(x_ohe_test, y_test)
print(acc_test_gaussiannb)


In [None]:
acc_train_gaussiannb = naive.score(x_ohe_train, y_train)
print(acc_train_gaussiannb)

In [None]:
lsvm = SVC(kernel='linear', C = 1.0, probability=True)
lsvm.fit(x_ohe_train, y_train)
print(lsvm.score(x_ohe_train, y_train))
#compute test accuracy
print("test accuracy ", lsvm.score(x_ohe_test, y_test))
y_pred = lsvm.predict(x_ohe_test)
print(classification_report(y_test, y_pred))

# NEW STUFF

### Functions

In [10]:
# split up columns so it's easier to iterate over..
first_matches_won_df = lambda x: combats_df[ (combats_df["First_pokemon"] == x) & (combats_df["Winner"] == x) ]
second_matches_won_df = lambda x: combats_df[ (combats_df["Second_pokemon"] == x) & (combats_df["Winner"] == x) ]

### Mark where pokemon 1 won against pokemon 2

In [11]:
create_range = range(1, 800 + 1) # from 1 to 800

num_T = pd.DataFrame(0, index=create_range, columns=create_range) # 800 x 800 - values should be all zeros

print(num_T.shape)
print(type(num_T) )

(800, 800)
<class 'pandas.core.frame.DataFrame'>


### This loops through 2 sets of dataframes where pokemon i has won

In [14]:
pvp_wins = num_T.copy()

for i in range(1, 801):
    for j in first_matches_won_df(i)["Second_pokemon"]: # Second_pokemon column: j is the loser to i
        pvp_wins.loc[i][j] += 1
    for k in second_matches_won_df(i)["First_pokemon"]: # First_pokemon column: k is the loser to i
        pvp_wins.loc[i][k] += 1

In [15]:
pvp_wins.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,791,792,793,794,795,796,797,798,799,800
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
5,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
