# *Pokemon Legendary Predict*

## Import Libraries

In [45]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from time import time

## Reading data

In [None]:
data = pd.read_csv('./pokemon.csv')
data.head()

## Setting Data

In [None]:
df = data.drop(['#', 'Name', 'Type 2'], axis='columns')
df.head()

Setting legendary value True will be 1. False will be 0.

In [29]:
df['Legendary'] = df.Legendary.map({False:0,True:1})
df.head()

Unnamed: 0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Type 1_Bug,Type 1_Dark,...,Type 1_Ghost,Type 1_Grass,Type 1_Ground,Type 1_Ice,Type 1_Normal,Type 1_Poison,Type 1_Psychic,Type 1_Rock,Type 1_Steel,Type 1_Water
0,45,49,49,65,65,45,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,60,62,63,80,80,60,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,80,82,83,100,100,80,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,80,100,123,122,120,80,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,39,52,43,60,50,65,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
df = pd.get_dummies(df)
df.head()

Unnamed: 0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Type 1_Bug,Type 1_Dark,...,Type 1_Ghost,Type 1_Grass,Type 1_Ground,Type 1_Ice,Type 1_Normal,Type 1_Poison,Type 1_Psychic,Type 1_Rock,Type 1_Steel,Type 1_Water
0,45,49,49,65,65,45,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,60,62,63,80,80,60,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,80,82,83,100,100,80,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,80,100,123,122,120,80,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,39,52,43,60,50,65,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
y = df['Legendary']
y.head()

0    0
1    0
2    0
3    0
4    0
Name: Legendary, dtype: int64

In [32]:
x = df.drop(['Legendary'], axis='columns')
x.head()

Unnamed: 0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Type 1_Bug,Type 1_Dark,Type 1_Dragon,...,Type 1_Ghost,Type 1_Grass,Type 1_Ground,Type 1_Ice,Type 1_Normal,Type 1_Poison,Type 1_Psychic,Type 1_Rock,Type 1_Steel,Type 1_Water
0,45,49,49,65,65,45,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,60,62,63,80,80,60,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,80,82,83,100,100,80,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,80,100,123,122,120,80,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,39,52,43,60,50,65,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


The data was prepared. Pokemon types have been edited using the GET_DUMMIES method. X and Y values were determined.

## Train Test Split

In [33]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 0)

x_train.head()

Unnamed: 0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Type 1_Bug,Type 1_Dark,Type 1_Dragon,...,Type 1_Ghost,Type 1_Grass,Type 1_Ground,Type 1_Ice,Type 1_Normal,Type 1_Poison,Type 1_Psychic,Type 1_Rock,Type 1_Steel,Type 1_Water
172,50,65,64,44,48,43,2,0,0,0,...,0,0,0,0,0,0,0,0,0,1
76,65,90,50,85,45,55,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
64,90,110,80,100,80,95,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
765,62,55,52,109,94,109,6,0,0,0,...,0,0,0,0,0,0,0,0,0,0
52,60,95,80,60,80,30,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0


## Model Determination

To properly evaluate the performance of each model you've chosen, it's important that you create a training and predicting pipeline that allows you to quickly and effectively train models using various sizes of training data and perform predictions on the testing data.

In [34]:
from sklearn.metrics import fbeta_score
from sklearn.metrics import accuracy_score

def train_predict(learner, X_train, y_train, X_test, y_test): 
    results = {}
    
    start = time()
    learner = learner.fit(X_train, y_train)
    end = time()
    
    results['train_time'] = end - start
  
    start = time() 
    predictions_test = learner.predict(X_test)
    predictions_train = learner.predict(X_train)
    end = time()
    
    results['pred_time'] = end - start
    results['acc_train'] = accuracy_score(y_train, predictions_train)
    results['acc_test'] = accuracy_score(y_test, predictions_test)
    results['f_train'] = fbeta_score(y_train, predictions_train, average='binary', beta=0.5)
    results['f_test'] = fbeta_score(y_test, predictions_test, average='binary', beta=0.5)
       
    # Success
    print("{} trained on all samples.".format(learner.__class__.__name__))
        
    # Return the results
    return results

**Note:** Depending on which algorithms you chose, the following implementation may take some time to run!

In [53]:
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, BaggingClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression, SGDClassifier

clf_A = AdaBoostClassifier(random_state=1)
clf_B = DecisionTreeClassifier(random_state=1)
clf_C = SVC(random_state=1)
clf_D = LogisticRegression(random_state=1, max_iter=1000)
clf_E = BaggingClassifier(random_state=1)
clf_F = RandomForestClassifier(random_state=1)

results = {}
for clf in [clf_A, clf_B, clf_C, clf_D, clf_E, clf_F]:
    clf_name = clf.__class__.__name__
    results[clf_name] = train_predict(clf, x_train, y_train, x_test, y_test)
    

AdaBoostClassifier trained on all samples.
DecisionTreeClassifier trained on all samples.
SVC trained on all samples.
LogisticRegression trained on all samples.
BaggingClassifier trained on all samples.
RandomForestClassifier trained on all samples.


In [54]:
results

{'AdaBoostClassifier': {'train_time': 0.1255040168762207,
  'pred_time': 0.023434877395629883,
  'acc_train': 0.9875,
  'acc_test': 0.93125,
  'f_train': 0.9200000000000002,
  'f_test': 0.6363636363636365},
 'DecisionTreeClassifier': {'train_time': 0.003214120864868164,
  'pred_time': 0.0014178752899169922,
  'acc_train': 1.0,
  'acc_test': 0.9125,
  'f_train': 1.0,
  'f_test': 0.5223880597014926},
 'SVC': {'train_time': 0.005173921585083008,
  'pred_time': 0.004690885543823242,
  'acc_train': 0.95,
  'acc_test': 0.93125,
  'f_train': 0.7191780821917809,
  'f_test': 0.6382978723404255},
 'LogisticRegression': {'train_time': 0.19551467895507812,
  'pred_time': 0.002522706985473633,
  'acc_train': 0.9640625,
  'acc_test': 0.9375,
  'f_train': 0.7943925233644861,
  'f_test': 0.6716417910447763},
 'BaggingClassifier': {'train_time': 0.04117989540100098,
  'pred_time': 0.006119966506958008,
  'acc_train': 0.99375,
  'acc_test': 0.93125,
  'f_train': 0.9829059829059831,
  'f_test': 0.6338028

So we are choosing RandomForestClassifier. Becuase it has the highest f_test and acc_test score. Also it has good pred_time.

## Model Tuning

In [62]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import make_scorer
from sklearn.ensemble import RandomForestClassifier

In [79]:
clf = RandomForestClassifier(random_state = 1)


predictions = (clf.fit(x_train, y_train)).predict(x_test)
predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0])

In [81]:
y_test

299    0
500    0
303    0
40     0
495    0
      ..
193    0
337    0
310    0
266    0
243    0
Name: Legendary, Length: 160, dtype: int64

In [74]:
print("Model\n------")
print("Accuracy score on testing data: {:.4f}".format(accuracy_score(y_test, predictions)))
print("F-score on testing data: {:.4f}".format(fbeta_score(y_test, predictions, beta = 0.5)))

Model
------
Accuracy score on testing data: 0.9437
F-score on testing data: 0.7447


## Final

First we set our pokemon data. Then we need to choose best model for the data. So we test our data with some models. We choose RandomForestClassifier. It has the highest F1 Score and the highest accuracy. 

It can be more optimized.

### **Hasret Özkan**
hasretozkan.me