# Predict Pokemon battles using Machine Learning
---

__Questions:__
  1. What do we do with NaN values?
    * ex: Look at Charmander Type 2
    * Maybe change it to 0 ?
  2. Combine Type 1 and Type 2?
    * But if Type 1 > Type 2?

__ToDo:__ 
  1. OHE
    1. Handle Type 2 NaN values?
  2. Combats
    1. Find way to combine with `pokemon_df`

#### Import libraries

In [440]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
%matplotlib inline

#### Import Datasets

In [198]:
pokemon = pd.read_csv("pokemon.csv")
combats = pd.read_csv("combats.csv")
tests = pd.read_csv("tests.csv")

# __`pokemon`__ Dataset
---

#### Print __`pokemon`__ csv import

In [199]:
pokemon.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


#### Print shape of __`pokemon`__

In [200]:
print(pokemon.shape)

(800, 12)


#### See info about data/features

In [268]:
print(pokemon.info() )

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 800 entries, 0 to 799
Data columns (total 12 columns):
#             800 non-null int64
Name          799 non-null object
Type 1        800 non-null object
Type 2        414 non-null object
HP            800 non-null int64
Attack        800 non-null int64
Defense       800 non-null int64
Sp. Atk       800 non-null int64
Sp. Def       800 non-null int64
Speed         800 non-null int64
Generation    800 non-null int64
Legendary     800 non-null bool
dtypes: bool(1), int64(8), object(3)
memory usage: 69.6+ KB
None


#### Define Feature Columns

In [202]:
pokemon_feature_cols = [
    "#",
    "Name",
    "Type 1", "Type 2",
    "HP",
    "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed",
    "Generation",
    "Legendary"
]

## Create __`pokemon_df`__ with feature columns

In [397]:
pokemon_df = pokemon[pokemon_feature_cols]

#### Print  __`pokemon_df`__

In [216]:
pokemon_df.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


#### Check shape

In [281]:
print(pokemon_df.shape)

(800, 12)


In [228]:
pokemon_df.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


## Define features to OHE

In [309]:
features_to_ohe = [
    "#", "Type 1", "Type 2", "Generation", "Legendary"
]

## These features are NOT ohe'd
# Saving so we can combine these as a new DF with feats that are OHE
features_non_OHE = [
    "HP", "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed"
]

In [310]:
pokemon_df_ohe = pd.get_dummies(pokemon_df[features_to_ohe], columns = features_to_ohe)

In [311]:
pokemon_df_ohe.head()

Unnamed: 0,#_1,#_2,#_3,#_4,#_5,#_6,#_7,#_8,#_9,#_10,...,Type 2_Steel,Type 2_Water,Generation_1,Generation_2,Generation_3,Generation_4,Generation_5,Generation_6,Legendary_False,Legendary_True
0,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0


### Testing OHE each column individually

In [411]:
num = ["#"]
type1 = ["Type 1"]
type2 = ["Type 2"]
gen = ["Generation"]
legend = ["Legendary"]

In [412]:
num_ohe = pd.get_dummies(pokemon_df[num], columns = num)
type1_ohe = pd.get_dummies(pokemon_df[type1], columns = type1)
type2_ohe = pd.get_dummies(pokemon_df[type2], columns = type2)
gen_ohe = pd.get_dummies(pokemon_df[gen])
legend_ohe = pd.get_dummies(pokemon_df[legend], columns = legend)

In [413]:
type2_ohe.head() ## doesn't have NaN column... Maybe don't need it?

Unnamed: 0,Type 2_Bug,Type 2_Dark,Type 2_Dragon,Type 2_Electric,Type 2_Fairy,Type 2_Fighting,Type 2_Fire,Type 2_Flying,Type 2_Ghost,Type 2_Grass,Type 2_Ground,Type 2_Ice,Type 2_Normal,Type 2_Poison,Type 2_Psychic,Type 2_Rock,Type 2_Steel,Type 2_Water
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [414]:
legend_ohe.head()

Unnamed: 0,Legendary_False,Legendary_True
0,1,0
1,1,0
2,1,0
3,1,0
4,1,0


In [451]:
#X = pd.concat([pokemon_df_ohe, pokemon_df[features_non_OHE] ], axis=1)
X_pokemon = pd.concat( [num_ohe, type1_ohe, type2_ohe, pokemon_df[features_non_OHE], gen_ohe, legend_ohe ], axis=1)

In [453]:
X_pokemon.head(10)

Unnamed: 0,#_1,#_2,#_3,#_4,#_5,#_6,#_7,#_8,#_9,#_10,...,Type 2_Water,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary_False,Legendary_True
0,1,0,0,0,0,0,0,0,0,0,...,0,45,49,49,65,65,45,1,1,0
1,0,1,0,0,0,0,0,0,0,0,...,0,60,62,63,80,80,60,1,1,0
2,0,0,1,0,0,0,0,0,0,0,...,0,80,82,83,100,100,80,1,1,0
3,0,0,0,1,0,0,0,0,0,0,...,0,80,100,123,122,120,80,1,1,0
4,0,0,0,0,1,0,0,0,0,0,...,0,39,52,43,60,50,65,1,1,0
5,0,0,0,0,0,1,0,0,0,0,...,0,58,64,58,80,65,80,1,1,0
6,0,0,0,0,0,0,1,0,0,0,...,0,78,84,78,109,85,100,1,1,0
7,0,0,0,0,0,0,0,1,0,0,...,0,78,130,111,130,85,100,1,1,0
8,0,0,0,0,0,0,0,0,1,0,...,0,78,104,78,159,115,100,1,1,0
9,0,0,0,0,0,0,0,0,0,1,...,0,44,48,65,50,64,43,1,1,0


#### Train Data

In [431]:
from sklearn.model_selection import train_test_split

X_train, X_test = train_test_split(X, test_size = 0.3)

In [432]:
X_train.shape

(560, 845)

# `combats` Dataset
---

### Functions

#### Returns a df where x (first pokemon) is the winner

In [415]:
won_df = lambda x: combats[ (combats["First_pokemon"] == x) & (combats["Winner"] == x) ]

Example:

In [416]:
pokemon_number = 1

won_df(pokemon_number).head() # pokemon 1 wins

Unnamed: 0,First_pokemon,Second_pokemon,Winner
4154,1,194,1
4883,1,285,1
5425,1,579,1
8156,1,324,1
8570,1,385,1


#### Returns a DF where x (first pokemon) has lost

In [417]:
lost_to_df = lambda x: combats[ (combats["First_pokemon"] == x) & (combats["Winner"] != x) ]

Example:

In [418]:
pokemon_number = 1

lost_to_df(pokemon_number).head() # pokemon 1 losses

Unnamed: 0,First_pokemon,Second_pokemon,Winner
426,1,679,679
995,1,687,687
1040,1,557,557
1446,1,766,766
1889,1,153,153


#### Returns a Series or DF of the winning column (Handles both `lost_to` and `won` functions)

In [419]:
# regardless of which function used... won_df or lost_to_df. Returns the Winner
winner_s = lambda y: y["Winner"] # returns a Series
winner_df = lambda y: y[["Winner"]] # returns a dataframe

In [420]:
winner_df(won_df(1) ).head() # which pokemon pokemon 1 has lost to - df

Unnamed: 0,Winner
4154,1
4883,1
5425,1
8156,1
8570,1


In [422]:
winner_s(lost_to_df(1) ).head() # which pokemon pokemon 1 has lost to - Series

426     679
995     687
1040    557
1446    766
1889    153
Name: Winner, dtype: int64

In [None]:
alolan = [
    "Rattata", "Raticate", "Raichu", "Sandshrew", "Sandslash",
    "Vulpix", "Ninetales", "Diglett", "Dugtrio", "Meowth",
    "Persian", "Geodude", "Graveler", "Golem", "Grimer",
    "Muk", "Exeggutor", "Marowak"
]

In [None]:
alolan_dict = {
    19:["Alolan", "Rattata"], 20:["Alolan", "Raticate"], 26:["Raichu", "Alolan"], 27:["Alolan", "Sandshrew"], 28:["Sandslash", "Alolan"],
    37:["Vulpix", "Alolan"], 38:["Alolan", "Ninetales"], 50:["Diglett", "Alolan"], 51:["Alolan", "Dugtrio"], 52:["Meowth", "Alolan"],
    53:["Alolan", "Persian"], 74:["Geodude", "Alolan"], 75:["Alolan", "Graveler"], 76:["Alolan", "Golem"], 88:["Alolan", "Grimer"],
    89:["Alolan", "Muk"], 103:["Alolan", "Exeggutor"], 105:["Alolan", "Marowak"]
}