In [73]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score

from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier



In [3]:
df = pd.read_csv('pokemon_alopez247.csv')
pd.set_option('display.max_columns', None)
df.head()

Unnamed: 0,Number,Name,Type_1,Type_2,Total,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Generation,isLegendary,Color,hasGender,Pr_Male,Egg_Group_1,Egg_Group_2,hasMegaEvolution,Height_m,Weight_kg,Catch_Rate,Body_Style
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False,Green,True,0.875,Monster,Grass,False,0.71,6.9,45,quadruped
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False,Green,True,0.875,Monster,Grass,False,0.99,13.0,45,quadruped
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False,Green,True,0.875,Monster,Grass,True,2.01,100.0,45,quadruped
3,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False,Red,True,0.875,Monster,Dragon,False,0.61,8.5,45,bipedal_tailed
4,5,Charmeleon,Fire,,405,58,64,58,80,65,80,1,False,Red,True,0.875,Monster,Dragon,False,1.09,19.0,45,bipedal_tailed


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 721 entries, 0 to 720
Data columns (total 23 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Number            721 non-null    int64  
 1   Name              721 non-null    object 
 2   Type_1            721 non-null    object 
 3   Type_2            350 non-null    object 
 4   Total             721 non-null    int64  
 5   HP                721 non-null    int64  
 6   Attack            721 non-null    int64  
 7   Defense           721 non-null    int64  
 8   Sp_Atk            721 non-null    int64  
 9   Sp_Def            721 non-null    int64  
 10  Speed             721 non-null    int64  
 11  Generation        721 non-null    int64  
 12  isLegendary       721 non-null    bool   
 13  Color             721 non-null    object 
 14  hasGender         721 non-null    bool   
 15  Pr_Male           644 non-null    float64
 16  Egg_Group_1       721 non-null    object 
 1

In [11]:
df['Pr_Male'].fillna(0,inplace=True)

In [12]:
df['Color'].value_counts()

Blue      134
Brown     110
Green      79
Red        75
Grey       69
Purple     65
Yellow     64
White      52
Pink       41
Black      32
Name: Color, dtype: int64

In [14]:
df['Body_Style'].value_counts()

bipedal_tailed      158
quadruped           135
bipedal_tailless    109
two_wings            63
head_arms            39
head_only            34
with_fins            31
insectoid            30
head_base            30
serpentine_body      29
four_wings           18
head_legs            17
multiple_bodies      15
several_limbs        13
Name: Body_Style, dtype: int64

In [15]:
df.drop(['Number','Name'],axis=1, inplace=True)

In [17]:
df.head()

Unnamed: 0,Type_1,Type_2,Total,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Generation,isLegendary,Color,hasGender,Pr_Male,Egg_Group_1,Egg_Group_2,hasMegaEvolution,Height_m,Weight_kg,Catch_Rate,Body_Style
0,Grass,Poison,318,45,49,49,65,65,45,1,False,Green,True,0.875,Monster,Grass,False,0.71,6.9,45,quadruped
1,Grass,Poison,405,60,62,63,80,80,60,1,False,Green,True,0.875,Monster,Grass,False,0.99,13.0,45,quadruped
2,Grass,Poison,525,80,82,83,100,100,80,1,False,Green,True,0.875,Monster,Grass,True,2.01,100.0,45,quadruped
3,Fire,,309,39,52,43,60,50,65,1,False,Red,True,0.875,Monster,Dragon,False,0.61,8.5,45,bipedal_tailed
4,Fire,,405,58,64,58,80,65,80,1,False,Red,True,0.875,Monster,Dragon,False,1.09,19.0,45,bipedal_tailed


In [20]:
df[['hasGender','hasMegaEvolution']] = df[['hasGender','hasMegaEvolution']] * 1

In [21]:
df.head()

Unnamed: 0,Type_1,Type_2,Total,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Generation,isLegendary,Color,hasGender,Pr_Male,Egg_Group_1,Egg_Group_2,hasMegaEvolution,Height_m,Weight_kg,Catch_Rate,Body_Style
0,Grass,Poison,318,45,49,49,65,65,45,1,False,Green,1,0.875,Monster,Grass,0,0.71,6.9,45,quadruped
1,Grass,Poison,405,60,62,63,80,80,60,1,False,Green,1,0.875,Monster,Grass,0,0.99,13.0,45,quadruped
2,Grass,Poison,525,80,82,83,100,100,80,1,False,Green,1,0.875,Monster,Grass,1,2.01,100.0,45,quadruped
3,Fire,,309,39,52,43,60,50,65,1,False,Red,1,0.875,Monster,Dragon,0,0.61,8.5,45,bipedal_tailed
4,Fire,,405,58,64,58,80,65,80,1,False,Red,1,0.875,Monster,Dragon,0,1.09,19.0,45,bipedal_tailed


In [30]:
cat_df = df.select_dtypes(object)

In [34]:
cat_dummies = pd.get_dummies(cat_df)

In [35]:
ndf = df.copy()

In [40]:
ndf.drop(cat_df.columns.to_list(),axis=1,inplace=True)

In [48]:
total = pd.concat([ndf,cat_dummies],axis=1)

In [52]:
total.head()

Unnamed: 0,Total,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Generation,isLegendary,hasGender,Pr_Male,hasMegaEvolution,Height_m,Weight_kg,Catch_Rate,Type_1_Bug,Type_1_Dark,Type_1_Dragon,Type_1_Electric,Type_1_Fairy,Type_1_Fighting,Type_1_Fire,Type_1_Flying,Type_1_Ghost,Type_1_Grass,Type_1_Ground,Type_1_Ice,Type_1_Normal,Type_1_Poison,Type_1_Psychic,Type_1_Rock,Type_1_Steel,Type_1_Water,Type_2_Bug,Type_2_Dark,Type_2_Dragon,Type_2_Electric,Type_2_Fairy,Type_2_Fighting,Type_2_Fire,Type_2_Flying,Type_2_Ghost,Type_2_Grass,Type_2_Ground,Type_2_Ice,Type_2_Normal,Type_2_Poison,Type_2_Psychic,Type_2_Rock,Type_2_Steel,Type_2_Water,Color_Black,Color_Blue,Color_Brown,Color_Green,Color_Grey,Color_Pink,Color_Purple,Color_Red,Color_White,Color_Yellow,Egg_Group_1_Amorphous,Egg_Group_1_Bug,Egg_Group_1_Ditto,Egg_Group_1_Dragon,Egg_Group_1_Fairy,Egg_Group_1_Field,Egg_Group_1_Flying,Egg_Group_1_Grass,Egg_Group_1_Human-Like,Egg_Group_1_Mineral,Egg_Group_1_Monster,Egg_Group_1_Undiscovered,Egg_Group_1_Water_1,Egg_Group_1_Water_2,Egg_Group_1_Water_3,Egg_Group_2_Amorphous,Egg_Group_2_Bug,Egg_Group_2_Dragon,Egg_Group_2_Fairy,Egg_Group_2_Field,Egg_Group_2_Flying,Egg_Group_2_Grass,Egg_Group_2_Human-Like,Egg_Group_2_Mineral,Egg_Group_2_Monster,Egg_Group_2_Water_1,Egg_Group_2_Water_2,Egg_Group_2_Water_3,Body_Style_bipedal_tailed,Body_Style_bipedal_tailless,Body_Style_four_wings,Body_Style_head_arms,Body_Style_head_base,Body_Style_head_legs,Body_Style_head_only,Body_Style_insectoid,Body_Style_multiple_bodies,Body_Style_quadruped,Body_Style_serpentine_body,Body_Style_several_limbs,Body_Style_two_wings,Body_Style_with_fins
0,318,45,49,49,65,65,45,1,False,1,0.875,0,0.71,6.9,45,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
1,405,60,62,63,80,80,60,1,False,1,0.875,0,0.99,13.0,45,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,525,80,82,83,100,100,80,1,False,1,0.875,1,2.01,100.0,45,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,309,39,52,43,60,50,65,1,False,1,0.875,0,0.61,8.5,45,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,405,58,64,58,80,65,80,1,False,1,0.875,0,1.09,19.0,45,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [64]:
feature = total.drop('isLegendary',axis=1)
target = total['isLegendary']*1 

In [65]:
feature.head()

Unnamed: 0,Total,HP,Attack,Defense,Sp_Atk,Sp_Def,Speed,Generation,hasGender,Pr_Male,hasMegaEvolution,Height_m,Weight_kg,Catch_Rate,Type_1_Bug,Type_1_Dark,Type_1_Dragon,Type_1_Electric,Type_1_Fairy,Type_1_Fighting,Type_1_Fire,Type_1_Flying,Type_1_Ghost,Type_1_Grass,Type_1_Ground,Type_1_Ice,Type_1_Normal,Type_1_Poison,Type_1_Psychic,Type_1_Rock,Type_1_Steel,Type_1_Water,Type_2_Bug,Type_2_Dark,Type_2_Dragon,Type_2_Electric,Type_2_Fairy,Type_2_Fighting,Type_2_Fire,Type_2_Flying,Type_2_Ghost,Type_2_Grass,Type_2_Ground,Type_2_Ice,Type_2_Normal,Type_2_Poison,Type_2_Psychic,Type_2_Rock,Type_2_Steel,Type_2_Water,Color_Black,Color_Blue,Color_Brown,Color_Green,Color_Grey,Color_Pink,Color_Purple,Color_Red,Color_White,Color_Yellow,Egg_Group_1_Amorphous,Egg_Group_1_Bug,Egg_Group_1_Ditto,Egg_Group_1_Dragon,Egg_Group_1_Fairy,Egg_Group_1_Field,Egg_Group_1_Flying,Egg_Group_1_Grass,Egg_Group_1_Human-Like,Egg_Group_1_Mineral,Egg_Group_1_Monster,Egg_Group_1_Undiscovered,Egg_Group_1_Water_1,Egg_Group_1_Water_2,Egg_Group_1_Water_3,Egg_Group_2_Amorphous,Egg_Group_2_Bug,Egg_Group_2_Dragon,Egg_Group_2_Fairy,Egg_Group_2_Field,Egg_Group_2_Flying,Egg_Group_2_Grass,Egg_Group_2_Human-Like,Egg_Group_2_Mineral,Egg_Group_2_Monster,Egg_Group_2_Water_1,Egg_Group_2_Water_2,Egg_Group_2_Water_3,Body_Style_bipedal_tailed,Body_Style_bipedal_tailless,Body_Style_four_wings,Body_Style_head_arms,Body_Style_head_base,Body_Style_head_legs,Body_Style_head_only,Body_Style_insectoid,Body_Style_multiple_bodies,Body_Style_quadruped,Body_Style_serpentine_body,Body_Style_several_limbs,Body_Style_two_wings,Body_Style_with_fins
0,318,45,49,49,65,65,45,1,1,0.875,0,0.71,6.9,45,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
1,405,60,62,63,80,80,60,1,1,0.875,0,0.99,13.0,45,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,525,80,82,83,100,100,80,1,1,0.875,1,2.01,100.0,45,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,309,39,52,43,60,50,65,1,1,0.875,0,0.61,8.5,45,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,405,58,64,58,80,65,80,1,1,0.875,0,1.09,19.0,45,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [66]:
X_train , X_test , y_train , y_test = train_test_split(feature,target,test_size=0.2,random_state=123)

In [67]:
scaller = MinMaxScaler(feature_range=(0,1))
X_scalled = pd.DataFrame(data=scaller.fit_transform(X_train),columns=X_train.columns)
Xt_scalled = pd.DataFrame(data=scaller.transform(X_test),columns=X_test.columns)

In [74]:
os = SMOTE()
X_TRAIN_BALANCE , Y_TRAIN_BALANCE = os.fit_resample(X_scalled,y_train)
X_TEST_BALANCE , Y_TEST_BALANCE = os.fit_resample(Xt_scalled,y_test)

In [77]:
lg = LogisticRegression()
lg.fit(X_TRAIN_BALANCE,Y_TRAIN_BALANCE)
prediction = lg.predict(X_TEST_BALANCE)
F1 = f1_score(Y_TEST_BALANCE,prediction)
ACCURACY = accuracy_score(Y_TEST_BALANCE,prediction)
PRECISION = precision_score(Y_TEST_BALANCE,prediction)
RECALL = recall_score(Y_TEST_BALANCE,prediction)
m1 = pd.DataFrame({"F1SCORE":F1 , 'PRECISION':PRECISION, 'RECALL':RECALL , 'ACCURACY':ACCURACY},index=['LOGISTIC_REGRESSIO'])
m1

Unnamed: 0,F1SCORE,PRECISION,RECALL,ACCURACY
LOGISTIC_REGRESSIO,0.98917,0.978571,1.0,0.989051


In [78]:
tree = DecisionTreeClassifier()
tree.fit(X_TRAIN_BALANCE,Y_TRAIN_BALANCE)
prediction = tree.predict(X_TEST_BALANCE)
F1 = f1_score(Y_TEST_BALANCE,prediction)
ACCURACY = accuracy_score(Y_TEST_BALANCE,prediction)
PRECISION = precision_score(Y_TEST_BALANCE,prediction)
RECALL = recall_score(Y_TEST_BALANCE,prediction)
m2 = pd.DataFrame({"F1SCORE":F1 , 'PRECISION':PRECISION, 'RECALL':RECALL , 'ACCURACY':ACCURACY},index=['DECISION_TREE'])
m2

Unnamed: 0,F1SCORE,PRECISION,RECALL,ACCURACY
DECISION_TREE,0.97037,0.984962,0.956204,0.970803


In [79]:
forest = RandomForestClassifier()
forest.fit(X_TRAIN_BALANCE,Y_TRAIN_BALANCE)
prediction = forest.predict(X_TEST_BALANCE)
F1 = f1_score(Y_TEST_BALANCE,prediction)
ACCURACY = accuracy_score(Y_TEST_BALANCE,prediction)
PRECISION = precision_score(Y_TEST_BALANCE,prediction)
RECALL = recall_score(Y_TEST_BALANCE,prediction)
m3 = pd.DataFrame({"F1SCORE":F1 , 'PRECISION':PRECISION, 'RECALL':RECALL , 'ACCURACY':ACCURACY},index=['RANDOM_FOREST'])
m3

Unnamed: 0,F1SCORE,PRECISION,RECALL,ACCURACY
RANDOM_FOREST,0.992754,0.985612,1.0,0.992701


In [80]:
xgb = XGBClassifier()
xgb.fit(X_TRAIN_BALANCE,Y_TRAIN_BALANCE)
prediction = xgb.predict(X_TEST_BALANCE)
F1 = f1_score(Y_TEST_BALANCE,prediction)
ACCURACY = accuracy_score(Y_TEST_BALANCE,prediction)
PRECISION = precision_score(Y_TEST_BALANCE,prediction)
RECALL = recall_score(Y_TEST_BALANCE,prediction)
m4 = pd.DataFrame({"F1SCORE":F1 , 'PRECISION':PRECISION, 'RECALL':RECALL , 'ACCURACY':ACCURACY},index=['XGB_REGRESSOR'])
m4

Unnamed: 0,F1SCORE,PRECISION,RECALL,ACCURACY
XGB_REGRESSOR,0.992754,0.985612,1.0,0.992701


In [81]:
knn = KNeighborsClassifier()
knn.fit(X_TRAIN_BALANCE,Y_TRAIN_BALANCE)
prediction = knn.predict(X_TEST_BALANCE)
F1 = f1_score(Y_TEST_BALANCE,prediction)
ACCURACY = accuracy_score(Y_TEST_BALANCE,prediction)
PRECISION = precision_score(Y_TEST_BALANCE,prediction)
RECALL = recall_score(Y_TEST_BALANCE,prediction)
m5 = pd.DataFrame({"F1SCORE":F1 , 'PRECISION':PRECISION, 'RECALL':RECALL , 'ACCURACY':ACCURACY},index=['Kneighbors'])
m5

Unnamed: 0,F1SCORE,PRECISION,RECALL,ACCURACY
Kneighbors,0.98917,0.978571,1.0,0.989051


In [82]:
sv = SVC()
sv.fit(X_TRAIN_BALANCE,Y_TRAIN_BALANCE)
prediction = sv.predict(X_TEST_BALANCE)
F1 = f1_score(Y_TEST_BALANCE,prediction)
ACCURACY = accuracy_score(Y_TEST_BALANCE,prediction)
PRECISION = precision_score(Y_TEST_BALANCE,prediction)
RECALL = recall_score(Y_TEST_BALANCE,prediction)
m6 = pd.DataFrame({"F1SCORE":F1 , 'PRECISION':PRECISION, 'RECALL':RECALL , 'ACCURACY':ACCURACY},index=['SVC'])
m6

Unnamed: 0,F1SCORE,PRECISION,RECALL,ACCURACY
SVC,0.985401,0.985401,0.985401,0.985401


In [86]:
result = pd.concat([m1,m2,m3,m4,m5,m6],axis=0)
result.style.highlight_max(axis=0)

Unnamed: 0,F1SCORE,PRECISION,RECALL,ACCURACY
LOGISTIC_REGRESSIO,0.98917,0.978571,1.0,0.989051
DECISION_TREE,0.97037,0.984962,0.956204,0.970803
RANDOM_FOREST,0.992754,0.985612,1.0,0.992701
XGB_REGRESSOR,0.992754,0.985612,1.0,0.992701
Kneighbors,0.98917,0.978571,1.0,0.989051
SVC,0.985401,0.985401,0.985401,0.985401
