In [1]:
## for data
import pandas as pd
import numpy as np
## for plotting
import matplotlib.pyplot as plt
import seaborn as sns
## for statistical tests
import scipy
import statsmodels.formula.api as smf
import statsmodels.api as sm
## for machine learning
from sklearn import model_selection, preprocessing, feature_selection, ensemble, linear_model, metrics, decomposition


In [2]:
sns.set_theme(style='darkgrid', palette=sns.color_palette('bright6'))

In [3]:
def utils_recognize_type(dtf, col, max_cat=20):
    if (dtf[col].dtype == "O") | (dtf[col].nunique() < max_cat):
        return "cat"
    else:
        return "num"

In [4]:
dtf = pd.read_csv('../assets/pokemon.csv')
dtf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1007 entries, 0 to 1006
Data columns (total 24 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   pokemon_id                    1007 non-null   int64  
 1   pokemon_name                  1007 non-null   object 
 2   base_attack                   1007 non-null   int64  
 3   base_defense                  1007 non-null   int64  
 4   base_stamina                  1007 non-null   int64  
 5   type                          1007 non-null   object 
 6   rarity                        1007 non-null   object 
 7   charged_moves                 1007 non-null   object 
 8   fast_moves                    1007 non-null   object 
 9   candy_required                471 non-null    float64
 10  distance                      1007 non-null   int64  
 11  max_cp                        1007 non-null   int64  
 12  attack_probability            904 non-null    float64
 13  bas

In [5]:
dtf.head()

Unnamed: 0,pokemon_id,pokemon_name,base_attack,base_defense,base_stamina,type,rarity,charged_moves,fast_moves,candy_required,...,base_flee_rate,dodge_probability,max_pokemon_action_frequency,min_pokemon_action_frequency,found_egg,found_evolution,found_wild,found_research,found_raid,found_photobomb
0,1,Bulbasaur,118,111,128,"['Grass', 'Poison']",Standard,"['Sludge Bomb', 'Seed Bomb', 'Power Whip']","['Vine Whip', 'Tackle']",,...,-1.0,0.15,1.6,0.2,True,False,True,True,True,True
1,2,Ivysaur,151,143,155,"['Grass', 'Poison']",Standard,"['Sludge Bomb', 'Solar Beam', 'Power Whip']","['Razor Leaf', 'Vine Whip']",25.0,...,-1.0,0.15,1.6,0.2,False,True,True,True,True,True
2,3,Venusaur,198,189,190,"['Grass', 'Poison']",Standard,"['Sludge Bomb', 'Petal Blizzard', 'Solar Beam']","['Razor Leaf', 'Vine Whip']",100.0,...,-1.0,0.15,1.6,0.2,False,True,True,True,True,True
3,4,Charmander,116,93,118,['Fire'],Standard,"['Flame Charge', 'Flame Burst', 'Flamethrower']","['Ember', 'Scratch']",,...,-1.0,0.15,1.6,0.2,True,False,True,True,True,True
4,5,Charmeleon,158,126,151,['Fire'],Standard,"['Fire Punch', 'Flame Burst', 'Flamethrower']","['Ember', 'Fire Fang']",25.0,...,-1.0,0.15,1.6,0.2,False,True,True,True,True,True


In [6]:
dtf.type.unique()

array(["['Grass', 'Poison']", "['Fire']", "['Fire', 'Flying']",
       "['Water']", "['Bug']", "['Bug', 'Flying']", "['Bug', 'Poison']",
       "['Normal', 'Flying']", "['Dark', 'Normal']", "['Poison']",
       "['Electric']", "['Electric', 'Psychic']", "['Ice', 'Steel']",
       "['Poison', 'Ground']", "['Fairy']", "['Ice']", "['Ice', 'Fairy']",
       "['Normal', 'Fairy']", "['Poison', 'Flying']", "['Bug', 'Grass']",
       "['Ground', 'Steel']", "['Dark']", "['Fighting']",
       "['Fire', 'Rock']", "['Water', 'Fighting']", "['Psychic']",
       "['Water', 'Poison']", "['Rock', 'Electric']",
       "['Psychic', 'Fairy']", "['Water', 'Psychic']",
       "['Electric', 'Steel']", "['Water', 'Ice']", "['Poison', 'Dark']",
       "['Ghost', 'Poison']", "['Rock', 'Ground']",
       "['Electric', 'Grass']", "['Grass', 'Psychic']",
       "['Grass', 'Dragon']", "['Ground']", "['Fire', 'Ghost']",
       "['Normal']", "['Poison', 'Fairy']", "['Ground', 'Rock']",
       "['Grass']", "['Ice', '

In [7]:
def str_to_str_list(src: str) -> str:
    res = []
    s = src.replace('[', '').replace(']','').replace(", ", ",").replace("'", "")
    res = s.split(',')
    return res

In [8]:
def flatten_list(src: list) -> list:
    return [ x for xs in src for x in xs]

In [9]:
dtf.type.unique()

array(["['Grass', 'Poison']", "['Fire']", "['Fire', 'Flying']",
       "['Water']", "['Bug']", "['Bug', 'Flying']", "['Bug', 'Poison']",
       "['Normal', 'Flying']", "['Dark', 'Normal']", "['Poison']",
       "['Electric']", "['Electric', 'Psychic']", "['Ice', 'Steel']",
       "['Poison', 'Ground']", "['Fairy']", "['Ice']", "['Ice', 'Fairy']",
       "['Normal', 'Fairy']", "['Poison', 'Flying']", "['Bug', 'Grass']",
       "['Ground', 'Steel']", "['Dark']", "['Fighting']",
       "['Fire', 'Rock']", "['Water', 'Fighting']", "['Psychic']",
       "['Water', 'Poison']", "['Rock', 'Electric']",
       "['Psychic', 'Fairy']", "['Water', 'Psychic']",
       "['Electric', 'Steel']", "['Water', 'Ice']", "['Poison', 'Dark']",
       "['Ghost', 'Poison']", "['Rock', 'Ground']",
       "['Electric', 'Grass']", "['Grass', 'Psychic']",
       "['Grass', 'Dragon']", "['Ground']", "['Fire', 'Ghost']",
       "['Normal']", "['Poison', 'Fairy']", "['Ground', 'Rock']",
       "['Grass']", "['Ice', '

In [10]:
types = set(flatten_list([str_to_str_list(x) for x in dtf.type.unique()]))
types

{'Bug',
 'Dark',
 'Dragon',
 'Electric',
 'Fairy',
 'Fighting',
 'Fire',
 'Flying',
 'Ghost',
 'Grass',
 'Ground',
 'Ice',
 'Normal',
 'Poison',
 'Psychic',
 'Rock',
 'Steel',
 'Water'}

In [11]:
dtf['type'] = [str_to_str_list(x) for x in list(dtf.type)]

In [12]:
dtf.type

0          [Grass, Poison]
1          [Grass, Poison]
2          [Grass, Poison]
3                   [Fire]
4                   [Fire]
               ...        
1002          [Dark, Fire]
1003        [Dragon, Dark]
1004     [Fairy, Fighting]
1005    [Fighting, Dragon]
1006    [Electric, Dragon]
Name: type, Length: 1007, dtype: object

In [13]:
for type in types:
    M = [type]
    dtf['Is'+type] = dtf.type.isin(M)

In [14]:
for type in types:
    M = [type]
    print(M)

['Grass']
['Ground']
['Normal']
['Ghost']
['Bug']
['Water']
['Flying']
['Steel']
['Fairy']
['Dark']
['Electric']
['Poison']
['Ice']
['Rock']
['Psychic']
['Fire']
['Dragon']
['Fighting']


In [15]:
dtf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1007 entries, 0 to 1006
Data columns (total 42 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   pokemon_id                    1007 non-null   int64  
 1   pokemon_name                  1007 non-null   object 
 2   base_attack                   1007 non-null   int64  
 3   base_defense                  1007 non-null   int64  
 4   base_stamina                  1007 non-null   int64  
 5   type                          1007 non-null   object 
 6   rarity                        1007 non-null   object 
 7   charged_moves                 1007 non-null   object 
 8   fast_moves                    1007 non-null   object 
 9   candy_required                471 non-null    float64
 10  distance                      1007 non-null   int64  
 11  max_cp                        1007 non-null   int64  
 12  attack_probability            904 non-null    float64
 13  bas

In [16]:
dtf.head()

Unnamed: 0,pokemon_id,pokemon_name,base_attack,base_defense,base_stamina,type,rarity,charged_moves,fast_moves,candy_required,...,IsFairy,IsDark,IsElectric,IsPoison,IsIce,IsRock,IsPsychic,IsFire,IsDragon,IsFighting
0,1,Bulbasaur,118,111,128,"[Grass, Poison]",Standard,"['Sludge Bomb', 'Seed Bomb', 'Power Whip']","['Vine Whip', 'Tackle']",,...,False,False,False,False,False,False,False,False,False,False
1,2,Ivysaur,151,143,155,"[Grass, Poison]",Standard,"['Sludge Bomb', 'Solar Beam', 'Power Whip']","['Razor Leaf', 'Vine Whip']",25.0,...,False,False,False,False,False,False,False,False,False,False
2,3,Venusaur,198,189,190,"[Grass, Poison]",Standard,"['Sludge Bomb', 'Petal Blizzard', 'Solar Beam']","['Razor Leaf', 'Vine Whip']",100.0,...,False,False,False,False,False,False,False,False,False,False
3,4,Charmander,116,93,118,[Fire],Standard,"['Flame Charge', 'Flame Burst', 'Flamethrower']","['Ember', 'Scratch']",,...,False,False,False,False,False,False,False,False,False,False
4,5,Charmeleon,158,126,151,[Fire],Standard,"['Fire Punch', 'Flame Burst', 'Flamethrower']","['Ember', 'Fire Fang']",25.0,...,False,False,False,False,False,False,False,False,False,False


In [17]:
dtf.to_csv('unfolded.csv')