# Embedding pokemon and moves

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


I will use preexisting pokemon embeddings created by Max Woolf from the Hugging Face Hub.

In [2]:
pokemonEmbeddings = pd.read_parquet("hf://datasets/minimaxir/pokemon-embeddings/pokemon_embeddings.parquet")
pokemonEmbeddings.head(386)[["id", "text_embedding"]]

Unnamed: 0,id,text_embedding
0,1,"[0.023699194, 0.098256916, -0.113635585, -0.02..."
1,2,"[0.020813923, 0.08126713, -0.13159968, -0.0227..."
2,3,"[0.01392012, 0.079689376, -0.15031755, -0.0238..."
3,4,"[0.021281311, 0.048030715, -0.13584474, -0.066..."
4,5,"[0.011542956, 0.06856951, -0.1338552, -0.03754..."
...,...,...
381,382,"[0.026272489, 0.08711059, -0.14678231, -0.0860..."
382,383,"[-0.004261688, 0.08824785, -0.14485669, -0.049..."
383,384,"[-1.8315268e-05, 0.08927726, -0.14492452, -0.0..."
384,385,"[-0.015184594, 0.07384356, -0.1294859, -0.0504..."


In [3]:
import requests

query = """
{
  pokemon_v2_pokemonspecies(where: {pokemon_v2_generation: {name: {_in: ["generation-i", "generation-ii", "generation-iii"]}}}, order_by: {id: asc}) {
    name
    id
      }
}
"""

r = requests.post(
    "https://beta.pokeapi.co/graphql/v1beta",
    json = {
        "query": query,
    },
)

mons = r.json()["data"]["pokemon_v2_pokemonspecies"]

df = pokemonEmbeddings.head(386)

monDF = pd.DataFrame(mons)
df = pd.concat([monDF["name"], df], axis = 1)

df

embeddingDict = {}
for i, j in zip(df["name"], df["text_embedding"]) :
    embeddingDict.update({i : j})

product = pd.DataFrame.from_dict(embeddingDict)

product

Unnamed: 0,bulbasaur,ivysaur,venusaur,charmander,charmeleon,charizard,squirtle,wartortle,blastoise,caterpie,...,regirock,regice,registeel,latias,latios,kyogre,groudon,rayquaza,jirachi,deoxys
0,0.023699,0.020814,0.013920,0.021281,0.011543,0.024877,0.016634,0.013249,0.027176,0.020452,...,0.027651,0.003434,0.028039,-0.008763,-0.014489,0.026272,-0.004262,-0.000018,-0.015185,-0.000274
1,0.098257,0.081267,0.079689,0.048031,0.068570,0.068339,0.098808,0.095270,0.059752,0.057449,...,0.061335,0.074844,0.094368,0.070113,0.104725,0.087111,0.088248,0.089277,0.073844,0.073857
2,-0.113636,-0.131600,-0.150318,-0.135845,-0.133855,-0.145211,-0.099926,-0.123792,-0.125123,-0.137710,...,-0.121841,-0.145589,-0.122804,-0.127923,-0.124292,-0.146782,-0.144857,-0.144925,-0.129486,-0.118046
3,-0.029429,-0.022770,-0.023809,-0.066563,-0.037545,-0.020529,-0.072942,-0.068186,-0.050244,-0.046679,...,-0.061592,-0.039345,-0.053246,-0.077166,-0.072119,-0.086041,-0.049501,-0.035046,-0.050409,-0.057487
4,0.027496,0.052761,0.047402,0.068088,0.095889,0.093023,0.078210,0.089622,0.087191,0.068475,...,0.092220,0.084009,0.099121,0.069408,0.056872,0.064153,0.059923,0.062013,0.035229,0.084424
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
763,0.007767,0.003410,0.012276,0.013154,0.005987,0.011857,0.006344,0.015051,0.005662,-0.000514,...,0.012562,0.020601,0.010592,0.022415,0.007357,-0.013426,-0.000842,0.018386,0.012947,0.018271
764,-0.048871,-0.055848,-0.052815,-0.058147,-0.054453,-0.059677,-0.054495,-0.062924,-0.061142,-0.036217,...,-0.059045,-0.082881,-0.082491,-0.087123,-0.077268,-0.050088,-0.061086,-0.062657,-0.072424,-0.063789
765,0.011726,-0.016407,-0.006640,-0.018654,0.012999,-0.003816,-0.005198,0.001689,-0.014739,-0.019395,...,0.002333,0.004368,-0.010159,-0.019254,0.015021,0.004458,0.016872,-0.007906,-0.004744,-0.006778
766,-0.011219,-0.016188,-0.011576,0.019359,-0.000120,0.009497,0.016567,-0.001354,-0.000863,0.004637,...,0.017730,0.038298,-0.004936,0.004251,-0.012786,0.003884,0.009679,0.007626,0.008228,-0.004261


The following cell executes a graphql query on PokeAPI to retrieve all of the metadata for gen 3 moves.

In [4]:
import requests

query = """
{
  pokemon_v2_move(where: {pokemon_v2_generation: {name: {_in: ["generation-i", "generation-ii", "generation-iii"]}}}, order_by: {id: asc}) {
    name
    id
    pp
    power
    type_id
    move_damage_class_id
    accuracy
    move_effect_id
    move_effect_chance
      }
}
"""

r = requests.post(
    "https://beta.pokeapi.co/graphql/v1beta",
    json = {
        "query": query,
    },
)

moves = r.json()["data"]["pokemon_v2_move"]

moves[0:10]

[{'name': 'pound',
  'id': 1,
  'pp': 35,
  'power': 40,
  'type_id': 1,
  'move_damage_class_id': 2,
  'accuracy': 100,
  'move_effect_id': 1,
  'move_effect_chance': None},
 {'name': 'karate-chop',
  'id': 2,
  'pp': 25,
  'power': 50,
  'type_id': 2,
  'move_damage_class_id': 2,
  'accuracy': 100,
  'move_effect_id': 44,
  'move_effect_chance': None},
 {'name': 'double-slap',
  'id': 3,
  'pp': 10,
  'power': 15,
  'type_id': 1,
  'move_damage_class_id': 2,
  'accuracy': 85,
  'move_effect_id': 30,
  'move_effect_chance': None},
 {'name': 'comet-punch',
  'id': 4,
  'pp': 15,
  'power': 18,
  'type_id': 1,
  'move_damage_class_id': 2,
  'accuracy': 85,
  'move_effect_id': 30,
  'move_effect_chance': None},
 {'name': 'mega-punch',
  'id': 5,
  'pp': 20,
  'power': 80,
  'type_id': 1,
  'move_damage_class_id': 2,
  'accuracy': 85,
  'move_effect_id': 1,
  'move_effect_chance': None},
 {'name': 'pay-day',
  'id': 6,
  'pp': 20,
  'power': 40,
  'type_id': 1,
  'move_damage_class_id': 2

In [5]:
moveTable = pd.DataFrame(moves)
moveTable = moveTable.dropna(axis = 0, subset = "pp")
moveTable.fillna(0)

Unnamed: 0,name,id,pp,power,type_id,move_damage_class_id,accuracy,move_effect_id,move_effect_chance
0,pound,1,35.0,40.0,1,2,100.0,1,0.0
1,karate-chop,2,25.0,50.0,2,2,100.0,44,0.0
2,double-slap,3,10.0,15.0,1,2,85.0,30,0.0
3,comet-punch,4,15.0,18.0,1,2,85.0,30,0.0
4,mega-punch,5,20.0,80.0,1,2,85.0,1,0.0
...,...,...,...,...,...,...,...,...,...
349,rock-blast,350,10.0,25.0,6,2,90.0,30,0.0
350,shock-wave,351,20.0,60.0,13,3,0.0,18,0.0
351,water-pulse,352,20.0,60.0,11,3,100.0,77,20.0
352,doom-desire,353,5.0,140.0,9,3,100.0,149,0.0


The following cells will build an autoencoder to assign embeddings to each move. It will take the structure of a multilayer perceptron with one hidden layer, an embedding layer, and an output layer that tries to reconstruct the input vector from the embedding layer. The model will be trained on the reconstructed input vectors. I will then run the feature vectors through the model to obtain custom embeddings for each move.

In [6]:
def one_hot_encoding(pokeAPICode, variable_to_encode) :
    assert (variable_to_encode in {"type_id", "move_damage_class_id", "move_effect_id"}), "Error: must be a valid variable (type_id, move_damage_class_id, move_effect_id)"
    array = np.zeros(max(moveTable[f"{variable_to_encode}"].to_numpy()))
    array[pokeAPICode - 1] = 1
    return array

In [7]:
identifiers = moveTable[["name", "id"]]

moveTable["accuracy"] = moveTable["accuracy"].fillna(100)
moveTable["move_effect_chance"] = moveTable["move_effect_chance"].fillna(0)
moveTable.at[11, "power"] = 200
moveTable.at[31, "power"] = 200
moveTable.at[328, "power"] = 200
moveTable.at[89, "power"] = 200
moveTable["power"] = moveTable["power"].fillna(0)

scaler = StandardScaler()
scaled_power = scaler.fit_transform(moveTable[["power"]])
powerFrame = pd.DataFrame(scaled_power).rename({0 : "scaled power"}, axis = 1)
scaled_accuracy = scaler.fit_transform(moveTable[["accuracy"]])
accuracyFrame = pd.DataFrame(scaled_accuracy).rename({0 : "scaled accuracy"}, axis = 1)
scaled_pp = scaler.fit_transform(moveTable[["pp"]])
ppFrame = pd.DataFrame(scaled_pp).rename({0 : "scaled pp"}, axis = 1)
scaled_chance = scaler.fit_transform(moveTable[["move_effect_chance"]])
chanceFrame = pd.DataFrame(scaled_chance).rename({0 : "scaled chance"}, axis = 1)



type_ids = moveTable["type_id"].to_numpy()
labels = ["type_id"] * moveTable.shape[0]
one_hot_encoded_types = pd.DataFrame(list(map(one_hot_encoding, type_ids, labels)))

move_class_ids = moveTable["move_damage_class_id"].to_numpy()
labels = ["move_damage_class_id"] * moveTable.shape[0]
one_hot_encoded_move_classes = pd.DataFrame(list(map(one_hot_encoding, move_class_ids, labels)))

move_effect_ids = moveTable["move_effect_id"].to_numpy()
labels = ["move_effect_id"] * moveTable.shape[0]
one_hot_encoded_effects = pd.DataFrame(list(map(one_hot_encoding, move_effect_ids, labels)))

prepared_moves = pd.concat([identifiers, powerFrame, accuracyFrame, ppFrame, chanceFrame, one_hot_encoded_types, one_hot_encoded_move_classes, one_hot_encoded_effects], axis = 1)

features = prepared_moves.drop("name", axis = 1).drop("id", axis = 1).to_numpy()
features

array([[-0.02044019,  0.4318579 ,  1.97870125, ...,  0.        ,
         0.        ,  0.        ],
       [ 0.18868774,  0.4318579 ,  0.8957386 , ...,  0.        ,
         0.        ,  0.        ],
       [-0.54326   , -0.92907115, -0.72870538, ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.39781567,  0.4318579 ,  0.35425727, ...,  0.        ,
         0.        ,  0.        ],
       [ 2.07083908,  0.4318579 , -1.2701867 , ...,  0.        ,
         0.        ,  0.        ],
       [ 2.07083908, -0.47542813, -1.2701867 , ...,  0.        ,
         0.        ,  0.        ]])

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim

class Autoencoder(nn.Module) :
    def __init__(self) :
        super(Autoencoder, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(features.shape[1], 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )

        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, features.shape[1]),
            nn.Sigmoid()
        )
        
    def forward(self, x) :
        x = self.encoder(x)
        x = self.decoder(x)
        return x

moveEncoder = Autoencoder()

loss_fn = nn.MSELoss()
optimizer = optim.Adam(moveEncoder.parameters(), lr = 0.001)

In [9]:
input_tensor = torch.FloatTensor(features)

num_epochs = 5000
for epoch in range(num_epochs) :
    output = moveEncoder(input_tensor)
    loss = loss_fn(output, input_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0 :
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [100/5000], Loss: 0.0186
Epoch [200/5000], Loss: 0.0179
Epoch [300/5000], Loss: 0.0149
Epoch [400/5000], Loss: 0.0143
Epoch [500/5000], Loss: 0.0135
Epoch [600/5000], Loss: 0.0119
Epoch [700/5000], Loss: 0.0112
Epoch [800/5000], Loss: 0.0106
Epoch [900/5000], Loss: 0.0098
Epoch [1000/5000], Loss: 0.0094
Epoch [1100/5000], Loss: 0.0092
Epoch [1200/5000], Loss: 0.0091
Epoch [1300/5000], Loss: 0.0090
Epoch [1400/5000], Loss: 0.0090
Epoch [1500/5000], Loss: 0.0090
Epoch [1600/5000], Loss: 0.0090
Epoch [1700/5000], Loss: 0.0089
Epoch [1800/5000], Loss: 0.0089
Epoch [1900/5000], Loss: 0.0088
Epoch [2000/5000], Loss: 0.0088
Epoch [2100/5000], Loss: 0.0088
Epoch [2200/5000], Loss: 0.0087
Epoch [2300/5000], Loss: 0.0087
Epoch [2400/5000], Loss: 0.0087
Epoch [2500/5000], Loss: 0.0087
Epoch [2600/5000], Loss: 0.0087
Epoch [2700/5000], Loss: 0.0087
Epoch [2800/5000], Loss: 0.0087
Epoch [2900/5000], Loss: 0.0087
Epoch [3000/5000], Loss: 0.0087
Epoch [3100/5000], Loss: 0.0087
Epoch [3200/5000]

In [10]:
input_tensor = torch.FloatTensor(features)
embeddingModel = Autoencoder()
embeddingModel.load_state_dict(torch.load("C:/Users/coliv/summerProjects/Summer-Repository/embeddingModel.pt"))
embeddingModel.eval()

with torch.no_grad():
    moveEmbeddings = embeddingModel.encoder(input_tensor)

moveEmbeddings.shape

torch.Size([354, 32])

In [11]:
"""
embeddingDF = pd.DataFrame(moveEmbeddings.numpy())
names = moveTable["name"]

moveEmbeddingDF = pd.concat([names, embeddingDF], axis = 1)
moveEmbeddingDF

"""
moveDictionary = {}
embeddingArray = moveEmbeddings.numpy()
for i, j in zip(moveTable["name"], embeddingArray) :
    moveDictionary.update({i : j})

In [12]:
pd.read_csv("C:/Users/coliv/summerProjects/Summer-Repository/moveEmbeddings.csv")

Unnamed: 0.1,Unnamed: 0,pound,karate-chop,double-slap,comet-punch,mega-punch,pay-day,fire-punch,ice-punch,thunder-punch,...,magical-leaf,water-sport,calm-mind,leaf-blade,dragon-dance,rock-blast,shock-wave,water-pulse,doom-desire,psycho-boost
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2.696851,3.786913,2.51158,2.388641,3.538375,2.246343,4.771016,4.111046,5.312578,...,2.697376,2.153226,0.0,3.684766,1.758792,2.459598,3.941864,2.800724,3.336219,5.563962
2,2,2.152655,3.577251,4.831429,4.567638,4.141576,2.269519,3.957352,3.552237,3.153275,...,1.510539,3.285356,1.097018,3.117794,2.10814,4.563623,1.621274,2.697959,2.507022,6.096521
3,3,1.747984,2.98275,4.560847,4.620383,2.067173,1.445493,0.0,3.151987,2.442618,...,0.736958,1.623204,4.331639,1.246977,3.630155,5.096465,1.931218,2.192497,1.805429,1.877586
4,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,6,1.167794,1.64291,0.956939,0.846676,1.177349,2.302423,1.850318,1.58448,1.136545,...,2.615313,2.997191,3.515262,2.541995,0.339515,0.360095,2.198762,1.173156,2.704708,2.286018
7,7,0.06168,0.617143,2.676614,2.444113,1.93072,1.826223,2.789597,1.336361,1.901158,...,2.950134,4.491783,5.749531,1.734311,1.36559,2.341716,2.743745,2.182761,3.722554,9.489176
8,8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,9,0.0,0.0,0.122283,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.370503,0.0,0.237848,0.0,0.0,0.0,0.32012,0.975338,2.1442


Max Woolf's embeddings are overly dimensional. Considering that I am only operating on pokemon up to generation three, his embeddings are higher dimensional than a simple one-hot encoding. I will recreate my own custom embeddings by creating a model that maps pokemon attributes to pokemon names. I will then take the last hidden layer to use as embeddings for each of the pokemon. Woolf's embeddings contain a lot of data that is not relevant to competitive pokemon battling. Additionally, it takes all pokemon into account, including non-canon mons. My embeddings should be more appropriate to the task at hand. 

In [13]:
import requests 

query = """
{
  pokemon_v2_pokemon(limit: 386, order_by: {id: asc}) {
    id
    name
    pokemon_v2_pokemontypes(order_by: { id: asc}) {
      pokemon_v2_type {
        name
        id
      }
    }
    pokemon_v2_pokemonstats {
      pokemon_v2_stat {
        name
      }
      base_stat
    }
    pokemon_v2_pokemonabilities(
      distinct_on: ability_id
    	order_by: { ability_id: asc}
    ) {
      pokemon_v2_ability {
        name
      }
    }
    pokemon_v2_pokemonmoves(distinct_on: move_id, order_by : { move_id: asc }) {
      pokemon_v2_move {
        name
      }
    }
      }
}
"""

r = requests.post(
    "https://beta.pokeapi.co/graphql/v1beta",
    json = {
        "query": query,
    },
)

pokemon = r.json()["data"]["pokemon_v2_pokemon"]

pd.DataFrame(pokemon)

Unnamed: 0,id,name,pokemon_v2_pokemontypes,pokemon_v2_pokemonstats,pokemon_v2_pokemonabilities,pokemon_v2_pokemonmoves
0,1,bulbasaur,"[{'pokemon_v2_type': {'name': 'grass', 'id': 1...","[{'pokemon_v2_stat': {'name': 'hp'}, 'base_sta...",[{'pokemon_v2_ability': {'name': 'chlorophyll'...,"[{'pokemon_v2_move': {'name': 'razor-wind'}}, ..."
1,2,ivysaur,"[{'pokemon_v2_type': {'name': 'grass', 'id': 1...","[{'pokemon_v2_stat': {'name': 'hp'}, 'base_sta...",[{'pokemon_v2_ability': {'name': 'chlorophyll'...,[{'pokemon_v2_move': {'name': 'swords-dance'}}...
2,3,venusaur,"[{'pokemon_v2_type': {'name': 'grass', 'id': 1...","[{'pokemon_v2_stat': {'name': 'hp'}, 'base_sta...",[{'pokemon_v2_ability': {'name': 'chlorophyll'...,[{'pokemon_v2_move': {'name': 'swords-dance'}}...
3,4,charmander,"[{'pokemon_v2_type': {'name': 'fire', 'id': 10}}]","[{'pokemon_v2_stat': {'name': 'hp'}, 'base_sta...","[{'pokemon_v2_ability': {'name': 'blaze'}}, {'...","[{'pokemon_v2_move': {'name': 'mega-punch'}}, ..."
4,5,charmeleon,"[{'pokemon_v2_type': {'name': 'fire', 'id': 10}}]","[{'pokemon_v2_stat': {'name': 'hp'}, 'base_sta...","[{'pokemon_v2_ability': {'name': 'blaze'}}, {'...","[{'pokemon_v2_move': {'name': 'mega-punch'}}, ..."
...,...,...,...,...,...,...
381,382,kyogre,"[{'pokemon_v2_type': {'name': 'water', 'id': 1...","[{'pokemon_v2_stat': {'name': 'hp'}, 'base_sta...",[{'pokemon_v2_ability': {'name': 'drizzle'}}],"[{'pokemon_v2_move': {'name': 'headbutt'}}, {'..."
382,383,groudon,"[{'pokemon_v2_type': {'name': 'ground', 'id': ...","[{'pokemon_v2_stat': {'name': 'hp'}, 'base_sta...",[{'pokemon_v2_ability': {'name': 'drought'}}],"[{'pokemon_v2_move': {'name': 'mega-punch'}}, ..."
383,384,rayquaza,"[{'pokemon_v2_type': {'name': 'dragon', 'id': ...","[{'pokemon_v2_stat': {'name': 'hp'}, 'base_sta...",[{'pokemon_v2_ability': {'name': 'air-lock'}}],[{'pokemon_v2_move': {'name': 'swords-dance'}}...
384,385,jirachi,"[{'pokemon_v2_type': {'name': 'steel', 'id': 9...","[{'pokemon_v2_stat': {'name': 'hp'}, 'base_sta...",[{'pokemon_v2_ability': {'name': 'serene-grace...,"[{'pokemon_v2_move': {'name': 'mega-punch'}}, ..."


In [14]:
query = """
{
  pokemon_v2_pokemon(limit: 386, order_by: {id: asc}) {
    id
    name
    pokemon_v2_pokemontypes(order_by: { id: asc}) {
      pokemon_v2_type {
        id
      }
    }
    pokemon_v2_pokemonstats {
      base_stat
    }
    pokemon_v2_pokemonabilities(
      distinct_on: ability_id
    	order_by: { ability_id: asc}
    ) {
      pokemon_v2_ability {
        id
      }
    }
    pokemon_v2_pokemonmoves(distinct_on: move_id, order_by : { move_id: asc }) {
      pokemon_v2_move {
        id
      }
    }
      }
}
"""

r = requests.post(
    "https://beta.pokeapi.co/graphql/v1beta",
    json = {
        "query": query,
    },
)

pokemon = r.json()["data"]["pokemon_v2_pokemon"]

df = pd.DataFrame(pokemon)

df.at[385, "name"] = "deoxys"

df

Unnamed: 0,id,name,pokemon_v2_pokemontypes,pokemon_v2_pokemonstats,pokemon_v2_pokemonabilities,pokemon_v2_pokemonmoves
0,1,bulbasaur,"[{'pokemon_v2_type': {'id': 12}}, {'pokemon_v2...","[{'base_stat': 45}, {'base_stat': 49}, {'base_...","[{'pokemon_v2_ability': {'id': 34}}, {'pokemon...","[{'pokemon_v2_move': {'id': 13}}, {'pokemon_v2..."
1,2,ivysaur,"[{'pokemon_v2_type': {'id': 12}}, {'pokemon_v2...","[{'base_stat': 60}, {'base_stat': 62}, {'base_...","[{'pokemon_v2_ability': {'id': 34}}, {'pokemon...","[{'pokemon_v2_move': {'id': 14}}, {'pokemon_v2..."
2,3,venusaur,"[{'pokemon_v2_type': {'id': 12}}, {'pokemon_v2...","[{'base_stat': 80}, {'base_stat': 82}, {'base_...","[{'pokemon_v2_ability': {'id': 34}}, {'pokemon...","[{'pokemon_v2_move': {'id': 14}}, {'pokemon_v2..."
3,4,charmander,[{'pokemon_v2_type': {'id': 10}}],"[{'base_stat': 39}, {'base_stat': 52}, {'base_...","[{'pokemon_v2_ability': {'id': 66}}, {'pokemon...","[{'pokemon_v2_move': {'id': 5}}, {'pokemon_v2_..."
4,5,charmeleon,[{'pokemon_v2_type': {'id': 10}}],"[{'base_stat': 58}, {'base_stat': 64}, {'base_...","[{'pokemon_v2_ability': {'id': 66}}, {'pokemon...","[{'pokemon_v2_move': {'id': 5}}, {'pokemon_v2_..."
...,...,...,...,...,...,...
381,382,kyogre,[{'pokemon_v2_type': {'id': 11}}],"[{'base_stat': 100}, {'base_stat': 100}, {'bas...",[{'pokemon_v2_ability': {'id': 2}}],"[{'pokemon_v2_move': {'id': 29}}, {'pokemon_v2..."
382,383,groudon,[{'pokemon_v2_type': {'id': 5}}],"[{'base_stat': 100}, {'base_stat': 150}, {'bas...",[{'pokemon_v2_ability': {'id': 70}}],"[{'pokemon_v2_move': {'id': 5}}, {'pokemon_v2_..."
383,384,rayquaza,"[{'pokemon_v2_type': {'id': 16}}, {'pokemon_v2...","[{'base_stat': 105}, {'base_stat': 150}, {'bas...",[{'pokemon_v2_ability': {'id': 76}}],"[{'pokemon_v2_move': {'id': 14}}, {'pokemon_v2..."
384,385,jirachi,"[{'pokemon_v2_type': {'id': 9}}, {'pokemon_v2_...","[{'base_stat': 100}, {'base_stat': 100}, {'bas...",[{'pokemon_v2_ability': {'id': 32}}],"[{'pokemon_v2_move': {'id': 5}}, {'pokemon_v2_..."


In [15]:
#test = df["pokemon_v2_pokemontypes"][0]
#test[0].get("pokemon_v2_type").get("id")

def get_type_id(row_entry) :
    assert len(row_entry) <= 2, "Pokemon should not have more than two types."
    result = np.zeros(18)
    count = 0
    for i in row_entry :
        index = (i.get("pokemon_v2_type").get("id")) - 1
        result[index] = 1
    return result


def get_bst(row_entry) : 
    assert len(row_entry) == 6, "All pokemon have six base stats."
    result = np.empty(6)
    count = 0
    for i in row_entry :
        result[count] = i.get("base_stat")
        count += 1
    return result


def get_ability(row_entry) :
    assert len(row_entry) <= 3, "Pokemon should not have more than two abilities in gen3."
    result = np.zeros(3)
    count = 0
    for i in row_entry :
        result[count] = i.get("pokemon_v2_ability").get("id")
        count += 1
    return result


def get_moveset(row_entry) :
    result = np.zeros(354)
    for i in row_entry :
        index = i.get("pokemon_v2_move").get("id") - 1
        if index > 353 :
            continue
        result[index] = 1
    return result

type_ids = df["pokemon_v2_pokemontypes"].apply(get_type_id)
bsts = df["pokemon_v2_pokemonstats"].apply(get_bst)
abilities = df["pokemon_v2_pokemonabilities"].apply(get_ability)
moves = df["pokemon_v2_pokemonmoves"].apply(get_moveset)


abilities


0      [34.0, 65.0, 0.0]
1      [34.0, 65.0, 0.0]
2      [34.0, 65.0, 0.0]
3      [66.0, 94.0, 0.0]
4      [66.0, 94.0, 0.0]
             ...        
381      [2.0, 0.0, 0.0]
382     [70.0, 0.0, 0.0]
383     [76.0, 0.0, 0.0]
384     [32.0, 0.0, 0.0]
385     [46.0, 0.0, 0.0]
Name: pokemon_v2_pokemonabilities, Length: 386, dtype: object

In [16]:
moves

0      [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
1      [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
2      [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
3      [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, ...
4      [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, ...
                             ...                        
381    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
382    [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, ...
383    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
384    [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, ...
385    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, ...
Name: pokemon_v2_pokemonmoves, Length: 386, dtype: object

In [17]:
typeMatrix = np.vstack(type_ids.to_numpy())
bstMatrix = np.vstack(bsts.to_numpy())
abilityMatrix = np.vstack(abilities.to_numpy())
moveMatrix = np.vstack(moves.to_numpy())

inputMatrix = np.concatenate([typeMatrix, bstMatrix, abilityMatrix, moveMatrix], axis = 1)

abilityTable = pd.DataFrame(abilityMatrix)
#abilities = abilityTable[[0, 1, 2]]


enc = OneHotEncoder()


encoded = enc.fit_transform(abilityTable)

encoded_abilities = pd.DataFrame(encoded.toarray())
abilityDF = pd.DataFrame(abilityMatrix)

typeDF = pd.DataFrame(typeMatrix)
moveDF = pd.DataFrame(moveMatrix)

statTable = pd.DataFrame(bstMatrix)
scaler = StandardScaler()
scaled0 = pd.DataFrame(scaler.fit_transform(statTable[[0]]))
scaled1 = pd.DataFrame(scaler.fit_transform(statTable[[1]]))
scaled2 = pd.DataFrame(scaler.fit_transform(statTable[[2]]))
scaled3 = pd.DataFrame(scaler.fit_transform(statTable[[3]]))
scaled4 = pd.DataFrame(scaler.fit_transform(statTable[[4]]))
scaled5 = pd.DataFrame(scaler.fit_transform(statTable[[5]]))

processed_inputs = pd.concat([df["id"], df["name"], typeDF, scaled0, scaled1, scaled2, scaled3, scaled4, scaled5, encoded_abilities, moveDF], axis = 1)



In [29]:
moveDictionary = pd.concat([df["name"], moveDF], axis = 1)
moveDictionary.at[82, "name"] = "farfetch’d"


In [34]:
processed_inputs["id"] = processed_inputs["id"].apply(lambda x : x - 1)

In [327]:
statTable.iloc[3]

0    39.0
1    52.0
2    43.0
3    60.0
4    50.0
5    65.0
Name: 3, dtype: float64

In [328]:
statTable.iloc[154]

0    39.0
1    52.0
2    43.0
3    60.0
4    50.0
5    65.0
Name: 154, dtype: float64

In [315]:
arr = processed_inputs.to_numpy()
np.random.shuffle(arr)
arr



array([[78, 'rapidash', 0.0, ..., 0.0, 0.0, 0.0],
       [227, 'skarmory', 0.0, ..., 0.0, 0.0, 0.0],
       [310, 'manectric', 0.0, ..., 0.0, 0.0, 0.0],
       ...,
       [265, 'wurmple', 0.0, ..., 0.0, 0.0, 0.0],
       [59, 'arcanine', 0.0, ..., 0.0, 0.0, 0.0],
       [332, 'cacturne', 0.0, ..., 0.0, 0.0, 0.0]], dtype=object)

In [49]:
class PokemonDataset(Dataset) :
    def __init__(self, data, labels) :
        self.data = data
        self.labels = labels

    def __len__(self) :
        return len(self.data)
    
    def __getitem__(self, idx) :
        return torch.tensor(self.data[idx], dtype = torch.float32), torch.tensor(self.labels[idx], dtype = torch.long)
    
dataset = PokemonDataset(processed_inputs.to_numpy()[:, 2:].astype(float), processed_inputs.to_numpy()[:, 0].astype(float))
dataloader = DataLoader(dataset, batch_size = 2, shuffle = True)

class PokemonClassificationNet(nn.Module) :
    def __init__(self, input_dim, embedding_dim, num_classes) :
        super(PokemonClassificationNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, embedding_dim)
        self.fc3 = nn.Linear(embedding_dim, num_classes)

    def forward(self, x) :
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        embeddings = x
        x = self.fc3(x)
        return x, embeddings
    
num_classes = 386
model = PokemonClassificationNet(processed_inputs.shape[1] - 2, 64, num_classes)

optimizer = optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()


In [50]:
epochs = 100
for epoch in range(epochs) :
    for batch in dataloader:
        inputs, targets = batch
        optimizer.zero_grad()
        outputs, _ = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    
    if (epoch + 1) % 10 == 0 :
        print(f"Epoch [{epoch + 1} / {epochs}], Loss: {loss.item():.4f}")

Epoch [10 / 100], Loss: 0.0154
Epoch [20 / 100], Loss: 0.1190


KeyboardInterrupt: 

In [45]:
inputTensorEmb = torch.FloatTensor(processed_inputs.to_numpy()[:, 2:].astype(float))

classes, embeddings = model.(inputTensorEmb)

In [47]:
embeddings

tensor([[ 1.7651,  0.0000,  0.2689,  ...,  5.4958,  4.1589,  0.0000],
        [ 2.3636,  1.6800,  6.7665,  ...,  0.0000,  5.2155,  0.0800],
        [ 5.8946,  4.0762, 14.3093,  ...,  0.0000,  7.0911,  4.8802],
        ...,
        [ 2.6367,  5.8525,  7.8737,  ...,  0.0000,  3.7256, 14.3066],
        [ 3.8767,  7.7772, 14.2702,  ...,  0.0000,  5.1161, 11.6694],
        [ 7.9241,  5.9373, 12.3183,  ...,  0.0000, 14.4944, 11.0546]],
       grad_fn=<ReluBackward0>)

In [22]:
arr = processed_inputs.to_numpy()
np.random.shuffle(arr)
arr

class PokemonEncoder(nn.Module) :
    def __init__(self) :
        super(PokemonEncoder, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(arr[:, 2:].shape[1], 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )

        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, arr[:, 2:].shape[1]),
            nn.Sigmoid()
        )
        
    def forward(self, x) :
        x = self.encoder(x)
        x = self.decoder(x)
        return x

moveEncoder = PokemonEncoder()

loss_fn = nn.MSELoss()
optimizer = optim.Adam(moveEncoder.parameters(), lr = 0.001)

In [317]:
arr[:, 2:]

array([[1.0, 0.0, 1.0, ..., 0.0, 0.0, 0.0],
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
       ...,
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
       [0.0, 0.0, 0.0, ..., 1.0, 0.0, 0.0],
       [0.0, 0.0, 0.0, ..., 1.0, 0.0, 0.0]], dtype=object)

In [23]:
processed_tensor = torch.FloatTensor(arr[:, 2:].astype(float))
num_epochs = 10000
for epoch in range(num_epochs) :
    output = moveEncoder(processed_tensor)
    loss = loss_fn(output, processed_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0 :
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [100/10000], Loss: 0.0700
Epoch [200/10000], Loss: 0.0564
Epoch [300/10000], Loss: 0.0473
Epoch [400/10000], Loss: 0.0386
Epoch [500/10000], Loss: 0.0338
Epoch [600/10000], Loss: 0.0308
Epoch [700/10000], Loss: 0.0290
Epoch [800/10000], Loss: 0.0278
Epoch [900/10000], Loss: 0.0271
Epoch [1000/10000], Loss: 0.0267
Epoch [1100/10000], Loss: 0.0265
Epoch [1200/10000], Loss: 0.0264
Epoch [1300/10000], Loss: 0.0263
Epoch [1400/10000], Loss: 0.0263
Epoch [1500/10000], Loss: 0.0263
Epoch [1600/10000], Loss: 0.0263
Epoch [1700/10000], Loss: 0.0262
Epoch [1800/10000], Loss: 0.0262
Epoch [1900/10000], Loss: 0.0262
Epoch [2000/10000], Loss: 0.0262
Epoch [2100/10000], Loss: 0.0262
Epoch [2200/10000], Loss: 0.0262
Epoch [2300/10000], Loss: 0.0262
Epoch [2400/10000], Loss: 0.0262
Epoch [2500/10000], Loss: 0.0262
Epoch [2600/10000], Loss: 0.0262
Epoch [2700/10000], Loss: 0.0262
Epoch [2800/10000], Loss: 0.0262
Epoch [2900/10000], Loss: 0.0262
Epoch [3000/10000], Loss: 0.0262
Epoch [3100/10000],

In [319]:
with torch.no_grad():
    monEmbeddings = moveEncoder.encoder(processed_tensor)

In [320]:
embeddings = pd.DataFrame(monEmbeddings.numpy())
pokeDict = pd.concat([pd.DataFrame({"id" :arr[:, 0]}), pd.DataFrame({"name" :arr[:, 1]}), embeddings], axis = 1)
pokeDict = pokeDict.sort_values(by = "id")
pokeDict = pokeDict.set_index("name").transpose()




In [322]:
np.dot(pokeDict["chansey"].to_numpy(), pokeDict["snorlax"].to_numpy()) / (np.linalg.norm(pokeDict["chansey"].to_numpy()) * np.linalg.norm(pokeDict["snorlax"].to_numpy()))
np.dot(pokeDict["chansey"].to_numpy(), pokeDict["blissey"].to_numpy()) / (np.linalg.norm(pokeDict["chansey"].to_numpy()) * np.linalg.norm(pokeDict["blissey"].to_numpy()))
np.dot(pokeDict["charizard"].to_numpy(), pokeDict["typhlosion"].to_numpy()) / (np.linalg.norm(pokeDict["charmander"].to_numpy()) * np.linalg.norm(pokeDict["cyndaquil"].to_numpy()))

0.7259878014484039

In [323]:
df[df["name"] == "snorlax"]
pokeDict["chansey"]


id          113
0     30.468021
1     43.795952
2     23.611874
3      40.11808
4     39.122253
5      54.55072
6     32.610924
7     41.782402
8           0.0
9           0.0
10    31.370684
11     3.244185
12     1.047812
13    50.752121
14    22.591076
15    46.022797
16    25.825954
17    22.497046
18    15.659537
19      47.9716
20    32.036781
21    41.171253
22          0.0
23          0.0
24    21.447227
25    14.095514
26    17.137753
27          0.0
28    64.620102
29          0.0
30    25.306595
31    22.583336
Name: chansey, dtype: object

In [324]:
from sklearn.preprocessing import normalize

pokeDict

#np.linalg.norm(normalized_df.iloc[0], 2)

normalized_df = pd.DataFrame(normalize(pokeDict.T, norm = "l2")).T
normalized_df.columns = pokeDict.columns

np.linalg.norm(normalized_df["bulbasaur"])


correlation_matrix = pd.DataFrame(np.matmul(normalized_df.T.to_numpy(), normalized_df.to_numpy())).set_index(pokeDict.columns)
correlation_matrix.columns = pokeDict.columns

correlation_matrix


name,bulbasaur,ivysaur,venusaur,charmander,charmeleon,charizard,squirtle,wartortle,blastoise,caterpie,...,regirock,regice,registeel,latias,latios,kyogre,groudon,rayquaza,jirachi,deoxys
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
bulbasaur,1.000000,0.999158,0.996475,0.824439,0.825302,0.809051,0.707315,0.716026,0.714112,0.743064,...,0.236322,0.222168,0.230233,0.259298,0.249811,0.214811,0.268544,0.242890,0.257004,0.271082
ivysaur,0.999158,1.000000,0.997341,0.825994,0.827169,0.810967,0.696000,0.704391,0.702260,0.744153,...,0.243545,0.227977,0.237244,0.267047,0.257412,0.219848,0.276976,0.249058,0.264627,0.276862
venusaur,0.996475,0.997341,1.000000,0.822715,0.823999,0.814705,0.691580,0.699941,0.703086,0.734340,...,0.252820,0.235743,0.245256,0.274843,0.265494,0.229614,0.286096,0.259341,0.267674,0.284050
charmander,0.824439,0.825994,0.822715,1.000000,0.999384,0.991461,0.767776,0.765015,0.767925,0.706202,...,0.264739,0.242407,0.264523,0.273543,0.266035,0.249950,0.320212,0.297365,0.269971,0.266138
charmeleon,0.825302,0.827169,0.823999,0.999384,1.000000,0.991811,0.774276,0.771771,0.775357,0.694054,...,0.273742,0.251288,0.273008,0.280176,0.272455,0.257596,0.328897,0.304577,0.277997,0.273114
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
kyogre,0.214811,0.219848,0.229614,0.249950,0.257596,0.267405,0.308191,0.315915,0.324328,0.378363,...,0.988324,0.995117,0.992668,0.987932,0.989086,1.000000,0.983208,0.995687,0.981043,0.984147
groudon,0.268544,0.276976,0.286096,0.320212,0.328897,0.341471,0.292064,0.300529,0.312420,0.373925,...,0.993145,0.986282,0.993174,0.980899,0.981941,0.983208,1.000000,0.992397,0.974558,0.978918
rayquaza,0.242890,0.249058,0.259341,0.297365,0.304577,0.317473,0.307931,0.315700,0.326952,0.389216,...,0.988615,0.990412,0.992569,0.988911,0.989990,0.995687,0.992397,1.000000,0.977520,0.982419
jirachi,0.257004,0.264627,0.267674,0.269971,0.277997,0.278817,0.295411,0.303708,0.302247,0.399164,...,0.973570,0.981680,0.980450,0.988724,0.987591,0.981043,0.974558,0.977520,1.000000,0.991949


In [254]:
pokeDict.T

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
bulbasaur,45.422272,38.957497,0.000000,68.783150,0.0,57.208935,0.0,125.187668,0.0,0.0,...,98.333740,45.766521,94.647430,70.701134,0.0,119.426849,0.0,0.0,65.822479,0.000000
ivysaur,40.261860,32.523327,0.000000,67.050163,0.0,54.715733,0.0,115.975563,0.0,0.0,...,97.188957,41.406387,94.586044,69.496933,0.0,111.781212,0.0,0.0,64.507179,0.000000
venusaur,38.355293,26.223263,0.000000,68.305702,0.0,48.386902,0.0,116.799522,0.0,0.0,...,101.495560,38.631615,91.048401,67.253830,0.0,110.224380,0.0,0.0,69.007988,0.000000
charmander,66.363686,26.754160,5.716218,0.000000,0.0,20.817135,0.0,121.809036,0.0,0.0,...,99.104530,55.818867,127.624268,80.734367,0.0,83.590294,0.0,0.0,92.433578,0.000000
charmeleon,64.062172,26.224554,4.236176,0.000000,0.0,19.648956,0.0,113.666977,0.0,0.0,...,95.879272,53.882713,123.446877,78.712776,0.0,79.317886,0.0,0.0,88.807747,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
kyogre,41.656879,49.447723,0.000000,0.000000,0.0,42.792068,0.0,21.874123,0.0,0.0,...,42.956554,70.611397,105.588661,29.978325,0.0,41.365288,0.0,0.0,72.437881,0.000000
groudon,42.078789,23.614719,8.306293,0.000000,0.0,24.830399,0.0,76.205353,0.0,0.0,...,80.660385,36.198055,110.277733,81.699318,0.0,42.989780,0.0,0.0,72.753632,0.000000
rayquaza,28.470453,17.437136,14.944954,0.000000,0.0,23.024975,0.0,42.850079,0.0,0.0,...,48.610188,49.833263,97.444618,33.405369,0.0,29.873241,0.0,0.0,74.574745,0.000000
jirachi,12.622849,13.509622,5.805139,0.000000,0.0,46.203373,0.0,0.000000,0.0,0.0,...,11.131160,42.561134,155.917145,55.092873,0.0,44.100998,0.0,0.0,38.721622,2.388888
