# Pokemon

Pokemon is a media franchise managed by The Pokemon Company, a Japanese consortium between Nintendo, Game Freak, and Creatures. The franchise was created by Satoshi Tajiri in 1995, and is centered on fictional creatures called "Pokemon", which humans, known as Pokemon Trainers, catch and train to battle each other for sport. Works within the franchise are set in the Pokemon universe.

# Data

The data contains information for the first 721 Pokemon. The information contained in this dataset include Base Stats, Performance against Other Types, Height, Weight, Classification, Egg Steps, Experience Points, Abilities, etc. The data was scraped from http://serebii.net/.

# Task
- Exploratory Data Analysis (EDA)
- Scaling and Normalization
- Feature Selection
- Proces Training
- Split Training and Testing
- Cross Validation
- Performance Evaluation
- Refined composite multiscale entropy (RCME)
- R2 Score

## Exploratory Data Analysis (EDA)

In [108]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import cm
import numpy as np

%matplotlib inline

In [109]:
pokemons = pd.read_csv('pokemon.csv')
pokemons.head()

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
0,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,1,65,65,45,grass,poison,6.9,1,0
1,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,2,80,80,60,grass,poison,13.0,1,0
2,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,3,122,120,80,grass,poison,100.0,1,0
3,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,4,60,50,65,fire,,8.5,1,0
4,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,5,80,65,80,fire,,19.0,1,0


In [110]:
pokemons.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
against_bug,801.0,0.9962547,0.597248,0.25,0.5,1.0,1.0,4.0
against_dark,801.0,1.057116,0.438142,0.25,1.0,1.0,1.0,4.0
against_dragon,801.0,0.968789,0.353058,0.0,1.0,1.0,1.0,2.0
against_electric,801.0,1.07397,0.654962,0.0,0.5,1.0,1.0,4.0
against_fairy,801.0,1.068976,0.522167,0.25,1.0,1.0,1.0,4.0
against_fight,801.0,1.065543,0.717251,0.0,0.5,1.0,1.0,4.0
against_fire,801.0,1.135456,0.691853,0.25,0.5,1.0,2.0,4.0
against_flying,801.0,1.192884,0.604488,0.25,1.0,1.0,1.0,4.0
against_ghost,801.0,0.9850187,0.558256,0.0,1.0,1.0,1.0,4.0
against_grass,801.0,1.03402,0.788896,0.25,0.5,1.0,1.0,4.0


In [111]:
print(pokemons.isnull().sum()[pokemons.columns[pokemons.isnull().any()]])
print("Total Missing values : " , pokemons.isna().sum().sum())

height_m            20
percentage_male     98
type2              384
weight_kg           20
dtype: int64
Total Missing values :  522


In [112]:
pokemons.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 801 entries, 0 to 800
Data columns (total 41 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   abilities          801 non-null    object 
 1   against_bug        801 non-null    float64
 2   against_dark       801 non-null    float64
 3   against_dragon     801 non-null    float64
 4   against_electric   801 non-null    float64
 5   against_fairy      801 non-null    float64
 6   against_fight      801 non-null    float64
 7   against_fire       801 non-null    float64
 8   against_flying     801 non-null    float64
 9   against_ghost      801 non-null    float64
 10  against_grass      801 non-null    float64
 11  against_ground     801 non-null    float64
 12  against_ice        801 non-null    float64
 13  against_normal     801 non-null    float64
 14  against_poison     801 non-null    float64
 15  against_psychic    801 non-null    float64
 16  against_rock       801 non

In [113]:
num_cols = list(pokemons.select_dtypes(exclude=['object']).columns)
obj_cols = list(pokemons.select_dtypes(include=['object']).columns)
null_cols = pokemons.columns[pokemons.isnull().any()]

print("Number of numeric columns:", len(num_cols))
print("Number of object columns:", len(obj_cols))
print("Number of null columns:", len(null_cols))

Number of numeric columns: 34
Number of object columns: 7
Number of null columns: 4


In [114]:
pokemons[null_cols].head()

Unnamed: 0,height_m,percentage_male,type2,weight_kg
0,0.7,88.1,poison,6.9
1,1.0,88.1,poison,13.0
2,2.0,88.1,poison,100.0
3,0.6,88.1,,8.5
4,1.1,88.1,,19.0


In [115]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

count_type1 = pokemons.groupby('type1').size().sort_values(ascending=True)
count_type1.index = count_type1.index.map(lambda x: x.capitalize())
count_type2 = pokemons.groupby('type2').size().sort_values(ascending=True)
count_type2.index = count_type2.index.map(lambda x: x.capitalize())

dict_color_types = {
    "Water": "rgb(51, 153, 255)",
    "Normal": "#e0e0d1",
    "Grass": "#47d147",
    "Bug": "#669900",
    "Psychic": "#ff33cc",
    "Fire": "#ff0000",
    "Rock": "#ffd24d",
    "Electric": "#ffff00",
    "Poison": "#990099",
    "Ground": "#cc9900",
    "Dark": "#1a001a",
    "Fighting": "#ff4000",
    'Ghost': "#1a001a",
    'Dragon': "#0033cc",
    'Steel': "#999966",                    
    'Ice': "#00ffff",                    
    'Fairy': "#ffccee",                  
    'Flying': "#d1e0e0",
}

fig = make_subplots(rows=1, cols=2, subplot_titles=("Type 1", "Type 2"),
                    )

for idx, typ in enumerate([count_type1, count_type2]):
    fig.add_trace(go.Bar(y=typ.index, x=typ.values, 
                       text=typ.values, textposition='outside',
                       orientation='h',
                       marker_color=typ.index.map(dict_color_types),
                       name="Type "+str(idx+1),
                       ),
                       row=1, col=idx+1)
    if idx == 1: # update range to show text on bars
        fig.update_xaxes(visible=False, range=[0, 500], row=1, col=idx+1) 
    else:
        fig.update_xaxes(visible=False, range=[0, 130], row=1, col=idx+1)

fig.update_layout(title='Nº of Pokemon Types',
                 template='plotly_white', width=1000, height=600,
                 showlegend=False,
                 margin=dict(l=40, r=10, t=100, b=20),
                 yaxis_title="Type",
                 )

fig.show()

In [116]:
count_type2 = pokemons.groupby('type2').size().sort_values(ascending=True)
count_type2.index = count_type2.index.map(lambda x: x.capitalize())
count_type2_leg = pokemons[pokemons.is_legendary == 1].groupby('type2').size().sort_values(ascending=True)
count_type2_leg.index = count_type2_leg.index.map(lambda x: x.capitalize())

fig = make_subplots(rows=1, cols=2, subplot_titles=("Non-legendary", "Legendary"),
                    )

for idx, typ in enumerate([count_type2, count_type2_leg]):
    fig.add_trace(go.Bar(y=typ.index, x=typ.values, 
                       text=typ.values, textposition='outside',
                       orientation='h',
                       marker_color=typ.index.map(dict_color_types),
                       name="Type "+str(idx+1),
                       ),
                       row=1, col=idx+1)
    if idx == 1: # update range to show text on bars
        fig.update_xaxes(visible=False, range=[0, 500], row=1, col=idx+1) 
    else:
        fig.update_xaxes(visible=False, range=[0, 500], row=1, col=idx+1)

fig.update_layout(title="Nº of Legendary Pokémon's Types",
                 template='plotly_white', width=1000, height=600,
                 showlegend=False,
                 margin=dict(l=40, r=10, t=100, b=20),
                 yaxis_title="Type",
                 )

fig.show()

In [117]:
print("The strongest Pokémon! (including their mega-evolution)")
pokemons[["base_total", "name","is_legendary"]].sort_values("base_total", ascending=False).head(20)

The strongest Pokémon! (including their mega-evolution)


Unnamed: 0,base_total,name,is_legendary
149,780,Mewtwo,1
383,780,Rayquaza,1
382,770,Groudon,1
381,770,Kyogre,1
492,720,Arceus,1
717,708,Zygarde,1
380,700,Latios,1
372,700,Salamence,0
247,700,Tyranitar,0
444,700,Garchomp,0


## Scaling and Normalization

In [126]:
from sklearn.preprocessing import StandardScaler

x = ['sp_attack', 'sp_defense', 'attack', 'defense', 'speed', 'hp']
y = ['is_legendary']

scaler = StandardScaler()
pokemons[x] = scaler.fit_transform(pokemons[x])
pokemons[x].head()

Unnamed: 0,sp_attack,sp_defense,attack,defense,speed,hp
0,-0.195025,-0.211687,-0.897909,-0.780773,-0.738486,-0.902083
1,0.268888,0.325465,-0.493413,-0.325488,-0.219268,-0.337312
2,1.567846,1.757871,0.68896,1.625735,0.473022,0.415717
3,-0.349663,-0.748839,-0.804564,-0.975896,-0.046196,-1.127992
4,0.268888,-0.211687,-0.431183,-0.48809,0.473022,-0.412614


## Feature Selection

In [127]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
pokemons['is_legendary'] = le.fit_transform(pokemons['is_legendary'])
pokemons['is_legendary'].head()

0    0
1    0
2    0
3    0
4    0
Name: is_legendary, dtype: int64

## Split Training and Testing

In [128]:
from sklearn.model_selection import train_test_split

X = pokemons[x]
y = pokemons[y]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Cross Validation

In [129]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

## Performance Evaluation

In [132]:
from sklearn.metrics import r2_score

print("R2 Score: ", r2_score(y_test, y_pred))

R2 Score:  0.22831086622621277


## Refined composite multiscale entropy (RCME)

In [131]:
from sklearn.metrics import mean_squared_error

print("MSE: ", mean_squared_error(y_test, y_pred))

MSE:  0.07663006173888848


## R2 Score

In [130]:
from sklearn.metrics import mean_absolute_error

print("MAE: ", mean_absolute_error(y_test, y_pred))

MAE:  0.18000894311067803
