# Predict Pokemon battles using Machine Learning
---

__ToDo:__ 
  1. OHE
    1. Handle Type 2 NaN values?
  2. Combats
    1. Find way to combine with `pokemon_df`

#### Import libraries

In [1]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
%matplotlib inline

#### Import Datasets

In [2]:
pokemon = pd.read_csv("pokemon.csv")
combats = pd.read_csv("combats.csv")
tests = pd.read_csv("tests.csv")

# __`pokemon`__ Dataset
---

#### Print __`pokemon`__ csv import

In [None]:
pokemon.head()

#### Print shape of __`pokemon`__

In [None]:
print(pokemon.shape)

#### See info about data/features

In [None]:
print(pokemon.info() )

#### Define Feature Columns

In [None]:
pokemon_feature_cols = [
    "#",
    "Name",
    "Type 1", "Type 2",
    "HP",
    "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed",
    "Generation",
    "Legendary"
]

## Create __`pokemon_df`__ with feature columns

In [None]:
pokemon_df = pokemon[pokemon_feature_cols]

#### Print  __`pokemon_df`__

In [None]:
pokemon_df.head()

#### Check shape

In [None]:
print(pokemon_df.shape)

## Define features to OHE

In [None]:
features_to_ohe = [
    "#", "Type 1", "Type 2", "Generation", "Legendary"
]

## These features are NOT ohe'd
# Saving so we can combine these as a new DF with feats that are OHE
features_non_OHE = [
    "HP", "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed"
]

In [None]:
pokemon_df_ohe = pd.get_dummies(pokemon_df[features_to_ohe], columns = features_to_ohe)

In [None]:
pokemon_df_ohe.head()

### Testing OHE each column individually

In [None]:
num = ["#"]
type1 = ["Type 1"]
type2 = ["Type 2"]
gen = ["Generation"]
legend = ["Legendary"]

In [None]:
num_ohe = pd.get_dummies(pokemon_df[num], columns = num)
type1_ohe = pd.get_dummies(pokemon_df[type1], columns = type1)
type2_ohe = pd.get_dummies(pokemon_df[type2], columns = type2)
gen_ohe = pd.get_dummies(pokemon_df[gen])
legend_ohe = pd.get_dummies(pokemon_df[legend], columns = legend)

In [None]:
type2_ohe.head() ## doesn't have NaN column... Maybe don't need it?

In [None]:
legend_ohe.head()

In [None]:
#X = pd.concat([pokemon_df_ohe, pokemon_df[features_non_OHE] ], axis=1)
X_pokemon = pd.concat( [num_ohe, type1_ohe, type2_ohe, pokemon_df[features_non_OHE], gen_ohe, legend_ohe ], axis=1)

In [None]:
X_pokemon.head(10)

# `combats` Dataset
---

In [3]:
pkm = ["First_pokemon", "Second_pokemon"]
combats_df = combats[pkm]
combats_df.head()

Unnamed: 0,First_pokemon,Second_pokemon
0,266,298
1,702,701
2,191,668
3,237,683
4,151,231


In [4]:
y = combats["Winner"]
y.head()

0    298
1    701
2    668
3    683
4    151
Name: Winner, dtype: int64

#### OHE

In [5]:
X = pd.get_dummies(combats_df[pkm], columns = pkm)
X.head()

Unnamed: 0,First_pokemon_1,First_pokemon_2,First_pokemon_3,First_pokemon_4,First_pokemon_5,First_pokemon_6,First_pokemon_7,First_pokemon_8,First_pokemon_9,First_pokemon_10,...,Second_pokemon_791,Second_pokemon_792,Second_pokemon_793,Second_pokemon_794,Second_pokemon_795,Second_pokemon_796,Second_pokemon_797,Second_pokemon_798,Second_pokemon_799,Second_pokemon_800
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Classification

#### Imports

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score
from sklearn import metrics

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

#### Random Forest

In [8]:
random_forest = RandomForestClassifier(n_estimators = 10, bootstrap = True, random_state = 2)
random_forest.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=2, verbose=0, warm_start=False)

In [9]:
y_random_forest_predict = random_forest.predict(X_test)

In [10]:
random_forest_accuracy = accuracy_score(y_test, y_random_forest_predict)
print("Random Forest: ", random_forest_accuracy)

Random Forest:  0.822733333333


#### Decision Tree

In [11]:
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [12]:
y_dt_predict = decision_tree.predict(X_test)

In [13]:
dt_accuracy = accuracy_score(y_test, y_dt_predict)
print("Decision Tree: ", dt_accuracy)

Decision Tree:  0.801733333333


### Functions

#### Returns a df where x (first pokemon) is the winner

In [None]:
won_df = lambda x: combats[ (combats["First_pokemon"] == x) & (combats["Winner"] == x) ]

Example:

In [None]:
pokemon_number = 1

won_df(pokemon_number).head() # matches where pokemon 1 wins

#### Returns a DF where x (first pokemon) has lost

In [None]:
lost_to_df = lambda x: combats[ (combats["First_pokemon"] == x) & (combats["Winner"] != x) ]

Example:

In [None]:
pokemon_number = 1

lost_to_df(pokemon_number).head() # pokemon 1 losses

#### Returns a Series or DF of the winning column (Handles both `lost_to` and `won` functions)

In [None]:
# regardless of which function used... won_df or lost_to_df. Returns the Winner
winner_s = lambda y: y["Winner"] # returns a Series
winner_df = lambda y: y[["Winner"]] # returns a dataframe

In [None]:
winner_df(won_df(1) ).head() # which pokemon pokemon 1 has lost to - DataFrame

In [None]:
winner_s(lost_to_df(1) ).head() # which pokemon pokemon 1 has lost to - Series

In [None]:
alolan = [
    "Rattata", "Raticate", "Raichu", "Sandshrew", "Sandslash",
    "Vulpix", "Ninetales", "Diglett", "Dugtrio", "Meowth",
    "Persian", "Geodude", "Graveler", "Golem", "Grimer",
    "Muk", "Exeggutor", "Marowak"
]

In [None]:
alolan_dict = {
    19:["Alolan", "Rattata"], 20:["Alolan", "Raticate"], 26:["Raichu", "Alolan"], 27:["Alolan", "Sandshrew"], 28:["Sandslash", "Alolan"],
    37:["Vulpix", "Alolan"], 38:["Alolan", "Ninetales"], 50:["Diglett", "Alolan"], 51:["Alolan", "Dugtrio"], 52:["Meowth", "Alolan"],
    53:["Alolan", "Persian"], 74:["Geodude", "Alolan"], 75:["Alolan", "Graveler"], 76:["Alolan", "Golem"], 88:["Alolan", "Grimer"],
    89:["Alolan", "Muk"], 103:["Alolan", "Exeggutor"], 105:["Alolan", "Marowak"]
}