In [177]:
# Librerias

import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import plotly.graph_objs as go
import jupyterlab_plotly as jp
import plotly.express as px

In [178]:
# Cargar los datos

pokemon_data = pd.read_csv('pokemon.csv')
pokemon_data

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


In [179]:
# Vamos a ver si hay valores nulos

pokemon_data.isnull().sum()

#               0
Name            0
Type 1          0
Type 2        386
Total           0
HP              0
Attack          0
Defense         0
Sp. Atk         0
Sp. Def         0
Speed           0
Generation      0
Legendary       0
dtype: int64

In [180]:
# Vamos a sustituir los valores nulos por la palabra 'None'

pokemon_data.fillna('None', inplace=True)
pokemon_data.isnull().sum()

#             0
Name          0
Type 1        0
Type 2        0
Total         0
HP            0
Attack        0
Defense       0
Sp. Atk       0
Sp. Def       0
Speed         0
Generation    0
Legendary     0
dtype: int64

In [181]:
# Vamos a trabajar la columna Name para eliminar espacio y caracteres especiales y convertir todo a formato Title

pokemon_data['Name'] = pokemon_data['Name'].str.replace(' ', '').str.replace('.', '').str.replace('-', '').str.title()
pokemon_data.head()


The default value of regex will change from True to False in a future version. In addition, single character regular expressions will *not* be treated as literal strings when regex=True.



Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,Venusaurmegavenusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [182]:
# Eliminar valores nulos

pokemon_data.dropna(inplace=True)

In [183]:
# Codificar variables categóricas

pokemon_data = pd.get_dummies(pokemon_data, columns=['Type 1', 'Type 2'])
pokemon_data.head()

# pd.get_dummies sirve para convertir variables categóricas en variables numéricas

Unnamed: 0,#,Name,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,...,Type 2_Grass,Type 2_Ground,Type 2_Ice,Type 2_None,Type 2_Normal,Type 2_Poison,Type 2_Psychic,Type 2_Rock,Type 2_Steel,Type 2_Water
0,1,Bulbasaur,318,45,49,49,65,65,45,1,...,0,0,0,0,0,1,0,0,0,0
1,2,Ivysaur,405,60,62,63,80,80,60,1,...,0,0,0,0,0,1,0,0,0,0
2,3,Venusaur,525,80,82,83,100,100,80,1,...,0,0,0,0,0,1,0,0,0,0
3,3,Venusaurmegavenusaur,625,80,100,123,122,120,80,1,...,0,0,0,0,0,1,0,0,0,0
4,4,Charmander,309,39,52,43,60,50,65,1,...,0,0,0,1,0,0,0,0,0,0


In [184]:
# Entrenar el modelo

X = pokemon_data.drop(['Name', 'Legendary'], axis=1) # Tomamos todas las columnas menos Name y Legendary porque son las que queremos predecir
y = pokemon_data['Legendary'] # Tomamos la columna Legendary porque es la que queremos predecir
model = DecisionTreeClassifier()
model.fit(X, y)

In [215]:
# Creamos una función para predecir el ganador de una batalla

def predict_battle(pokemon1, pokemon2):
    stats1 = pokemon_data[pokemon_data['Name'] == pokemon1][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']].values[0]
    stats2 = pokemon_data[pokemon_data['Name'] == pokemon2][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']].values[0]

    if stats1.sum() > stats2.sum():
        return pokemon1
    else:
        return pokemon2


In [218]:
def plot_stats(pokemon1, pokemon2):
    winner = predict_battle(pokemon1, pokemon2)

    if winner is None:
        print("No se encontraron uno o ambos Pokémon en el conjunto de datos. Por favor, verifica los nombres e intenta de nuevo.")
        return

    stats1 = pokemon_data[pokemon_data['Name'] == pokemon1][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']].values[0]
    stats2 = pokemon_data[pokemon_data['Name'] == pokemon2][['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']].values[0]

    data = pd.DataFrame({'Estadísticas': ['HP', 'Ataque', 'Defensa', 'Atq. Esp.', 'Def. Esp.', 'Velocidad'],
                         pokemon1: stats1,
                         pokemon2: stats2})

    data_melted = data.melt(id_vars='Estadísticas', var_name='Pokémon', value_name='Valor')

    fig = px.bar(data_melted, x='Estadísticas', y='Valor', color='Pokémon', barmode='group', text='Valor')

    fig.show()
    print(f'{winner} gana la batalla!')

In [219]:
plot_stats('Pikachu', 'Charizard')

Charizard gana la batalla!


In [223]:
# Extraer los nombres de las características de las estadísticas base
feature_names = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']

# Obtener las importancias de las características
importances = model.feature_importances_[:6] # Solo queremos las importancias de las características de las estadísticas base

# Crear un DataFrame para almacenar las importancias de las características
importances_df = pd.DataFrame({'feature': feature_names, 'importance': importances})

# Ordenar el DataFrame por importancia (de mayor a menor)
importances_df.sort_values(by='importance', ascending=False, inplace=True)

# Mostrar las importancias de las características
print(importances_df)


   feature  importance
1   Attack    0.596113
0       HP    0.193924
2  Defense    0.072695
4  Sp. Def    0.022327
3  Sp. Atk    0.008993
5    Speed    0.008373


In [224]:
# Crear un gráfico de barras para mostrar las importancias de las características

fig = go.Figure()
fig.add_trace(go.Bar(x=importances_df['feature'], y=importances_df['importance']))
fig.update_layout(title='Importancia de las características', xaxis_title='Características', yaxis_title='Importancia')
fig.show()
