# Predict Pokemon battles using Machine Learning
---

__ToDo:__ 
  1. OHE
    1. Handle Type 2 NaN values?
  2. Combats
    1. Find way to combine with `pokemon_df`

#### Import libraries

In [50]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
%matplotlib inline

#### Import Datasets

In [51]:
pokemon = pd.read_csv("pokemon.csv")
combats = pd.read_csv("combats.csv")
tests = pd.read_csv("tests.csv")

# __`pokemon`__ Dataset
---

#### Print __`pokemon`__ csv import

In [52]:
pokemon.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


#### Print shape of __`pokemon`__

In [53]:
print(pokemon.shape)

(800, 12)


#### See info about data/features

In [54]:
print(pokemon.info() )

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 12 columns):
#             800 non-null int64
Name          799 non-null object
Type 1        800 non-null object
Type 2        414 non-null object
HP            800 non-null int64
Attack        800 non-null int64
Defense       800 non-null int64
Sp. Atk       800 non-null int64
Sp. Def       800 non-null int64
Speed         800 non-null int64
Generation    800 non-null int64
Legendary     800 non-null bool
dtypes: bool(1), int64(8), object(3)
memory usage: 69.6+ KB
None


#### Define Feature Columns

In [55]:
pokemon_feature_cols = [
    "#",
    "Name",
    "Type 1", "Type 2",
    "HP",
    "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed",
    "Generation",
    "Legendary"
]

### Create __`pokemon_df`__ with feature columns

In [56]:
pokemon_df = pokemon[pokemon_feature_cols]

#### Print  __`pokemon_df`__

In [57]:
pokemon_df.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


#### Check shape

In [58]:
print(pokemon_df.shape)

(800, 12)


In [110]:
pokemon_df.describe()

Unnamed: 0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,400.5,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,231.0844,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,200.75,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,400.5,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,600.25,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,800.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


### Define features to OHE

In [59]:
features_to_ohe = [
    "#", "Type 1", "Type 2", "Generation", "Legendary"
]

## These features are NOT ohe'd
# Saving so we can combine these as a new DF with feats that are OHE
features_non_OHE = [
    "HP", "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed"
]

In [60]:
pokemon_df_ohe = pd.get_dummies(pokemon_df[features_to_ohe], columns = features_to_ohe)

In [61]:
pokemon_df_ohe.head()

Unnamed: 0,#_1,#_2,#_3,#_4,#_5,#_6,#_7,#_8,#_9,#_10,...,Type 2_Steel,Type 2_Water,Generation_1,Generation_2,Generation_3,Generation_4,Generation_5,Generation_6,Legendary_False,Legendary_True
0,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


#### Combine ohe features with non ohe features

In [100]:
X = pd.concat([pokemon_df_ohe, pokemon_df[features_non_OHE] ], axis=1)

#### Print out - (Don't like how columns are not in original order -- Fixed below)

In [101]:
X.head()

Unnamed: 0,#_1,#_2,#_3,#_4,#_5,#_6,#_7,#_8,#_9,#_10,...,Generation_5,Generation_6,Legendary_False,Legendary_True,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,45,49,49,65,65,45
1,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,60,62,63,80,80,60
2,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,80,82,83,100,100,80
3,0,0,0,1,0,0,0,0,0,0,...,0,0,1,0,80,100,123,122,120,80
4,0,0,0,0,1,0,0,0,0,0,...,0,0,1,0,39,52,43,60,50,65


### Testing OHE each column individually

In [102]:
num = ["#"] # brackets so when used in get_dummies it returns a DF and not a Series
type1 = ["Type 1"]
type2 = ["Type 2"]
gen = ["Generation"]
legend = ["Legendary"]

In [103]:
num_ohe = pd.get_dummies(pokemon_df[num], columns = num)
type1_ohe = pd.get_dummies(pokemon_df[type1], columns = type1)
type2_ohe = pd.get_dummies(pokemon_df[type2], columns = type2)
gen_ohe = pd.get_dummies(pokemon_df[gen])
legend_ohe = pd.get_dummies(pokemon_df[legend], columns = legend)

In [106]:
type2_ohe.head() ## doesn't have NaN column... Maybe don't need it? (All zeros)

Unnamed: 0,Type 2_Bug,Type 2_Dark,Type 2_Dragon,Type 2_Electric,Type 2_Fairy,Type 2_Fighting,Type 2_Fire,Type 2_Flying,Type 2_Ghost,Type 2_Grass,Type 2_Ground,Type 2_Ice,Type 2_Normal,Type 2_Poison,Type 2_Psychic,Type 2_Rock,Type 2_Steel,Type 2_Water
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [105]:
legend_ohe.head()

Unnamed: 0,Legendary_False,Legendary_True
0,1,0
1,1,0
2,1,0
3,1,0
4,1,0


#### Preserve original order of columns.. nice

In [66]:
# used this one to give an order to the columns
X_pokemon = pd.concat( [num_ohe, type1_ohe, type2_ohe, pokemon_df[features_non_OHE], gen_ohe, legend_ohe ], axis=1)

In [67]:
X_pokemon.head(10)

Unnamed: 0,#_1,#_2,#_3,#_4,#_5,#_6,#_7,#_8,#_9,#_10,...,Type 2_Water,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary_False,Legendary_True
0,1,0,0,0,0,0,0,0,0,0,...,0,45,49,49,65,65,45,1,1,0
1,0,1,0,0,0,0,0,0,0,0,...,0,60,62,63,80,80,60,1,1,0
2,0,0,1,0,0,0,0,0,0,0,...,0,80,82,83,100,100,80,1,1,0
3,0,0,0,1,0,0,0,0,0,0,...,0,80,100,123,122,120,80,1,1,0
4,0,0,0,0,1,0,0,0,0,0,...,0,39,52,43,60,50,65,1,1,0
5,0,0,0,0,0,1,0,0,0,0,...,0,58,64,58,80,65,80,1,1,0
6,0,0,0,0,0,0,1,0,0,0,...,0,78,84,78,109,85,100,1,1,0
7,0,0,0,0,0,0,0,1,0,0,...,0,78,130,111,130,85,100,1,1,0
8,0,0,0,0,0,0,0,0,1,0,...,0,78,104,78,159,115,100,1,1,0
9,0,0,0,0,0,0,0,0,0,1,...,0,44,48,65,50,64,43,1,1,0


# `combats` Dataset
---

In [68]:
pkm = ["First_pokemon", "Second_pokemon"]
combats_df = combats[pkm]
combats_df.head()

Unnamed: 0,First_pokemon,Second_pokemon
0,266,298
1,702,701
2,191,668
3,237,683
4,151,231


In [69]:
y = combats["Winner"]
y.head()

0    298
1    701
2    668
3    683
4    151
Name: Winner, dtype: int64

### OHE

In [70]:
X = pd.get_dummies(combats_df[pkm], columns = pkm)
X.head()

Unnamed: 0,First_pokemon_1,First_pokemon_2,First_pokemon_3,First_pokemon_4,First_pokemon_5,First_pokemon_6,First_pokemon_7,First_pokemon_8,First_pokemon_9,First_pokemon_10,...,Second_pokemon_791,Second_pokemon_792,Second_pokemon_793,Second_pokemon_794,Second_pokemon_795,Second_pokemon_796,Second_pokemon_797,Second_pokemon_798,Second_pokemon_799,Second_pokemon_800
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


---
## `Combats` Classification
---

#### Imports

In [71]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score
from sklearn import metrics

In [72]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

### Random Forest

In [92]:
random_forest = RandomForestClassifier(n_estimators = 15, bootstrap = True, n_jobs=-1,random_state = 2)
random_forest.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=15, n_jobs=-1,
            oob_score=False, random_state=2, verbose=0, warm_start=False)

In [93]:
y_random_forest_predict = random_forest.predict(X_test)

In [94]:
random_forest_accuracy = accuracy_score(y_test, y_random_forest_predict)
print("Random Forest: ", random_forest_accuracy)

Random Forest:  0.832133333333


### Decision Tree

In [11]:
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [12]:
y_dt_predict = decision_tree.predict(X_test)

In [13]:
dt_accuracy = accuracy_score(y_test, y_dt_predict)
print("Decision Tree: ", dt_accuracy)

Decision Tree:  0.801733333333


In [None]:
## for sprite filtering
alolan = [
    "Rattata", "Raticate", "Raichu", "Sandshrew", "Sandslash",
    "Vulpix", "Ninetales", "Diglett", "Dugtrio", "Meowth",
    "Persian", "Geodude", "Graveler", "Golem", "Grimer",
    "Muk", "Exeggutor", "Marowak"
]

alolan_dict = {
    19:["Alolan", "Rattata"], 20:["Alolan", "Raticate"], 26:["Raichu", "Alolan"], 27:["Alolan", "Sandshrew"], 28:["Sandslash", "Alolan"],
    37:["Vulpix", "Alolan"], 38:["Alolan", "Ninetales"], 50:["Diglett", "Alolan"], 51:["Alolan", "Dugtrio"], 52:["Meowth", "Alolan"],
    53:["Alolan", "Persian"], 74:["Geodude", "Alolan"], 75:["Alolan", "Graveler"], 76:["Alolan", "Golem"], 88:["Alolan", "Grimer"],
    89:["Alolan", "Muk"], 103:["Alolan", "Exeggutor"], 105:["Alolan", "Marowak"]
}

---
# Testing
---

  1. Going to try and combine __`First_pokemon`__ and __`Second_pokemon`__ in a single row with the winner as the label
  
  Example:
  
  __`First_pokemon_type_1,...,First_pokemon_Attack,..., Second_pokemon_type_2,...,Second_pokemon_Defense,...`__ 

### Functions

In [97]:
# returns a df where x (first pokemon) is the winner
won_df = lambda x: combats[ (combats["First_pokemon"] == x) & (combats["Winner"] == x) ]

# returns a DF where x (first pokemon) has lost
lost_to_df = lambda x: combats[ (combats["First_pokemon"] == x) & (combats["Winner"] != x) ]

# regardless of which function used... won_df or lost_to_df. Returns the Winner
winner_s = lambda y: y["Winner"] # returns a Series
winner_df = lambda y: y[["Winner"]] # returns a dataframe

In [None]:
# get row from combats
# get first_pokemon number
# find that pokemon number (row) in pokemon_df
# rename each feature to first_pokemon where

### Feature Columns

In [23]:
pokemon_feature_cols = [
    "#",
    "Name",
    "Type 1", "Type 2",
    "HP",
    "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed",
    "Generation",
    "Legendary"
]

### Features to OHE

In [None]:
features_to_ohe = [
    "#", "Type 1", "Type 2", "Generation", "Legendary"
]

## These features are NOT ohe'd
# Saving so we can combine these as a new DF with feats that are OHE
features_non_OHE = [
    "HP", "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed"
]