In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import datasets
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.transforms import ToTensor, Compose, Resize, ToPILImage
import matplotlib.pyplot as plt
from tqdm import tqdm
import pickle
from PIL import Image

Mounted at /content/drive


In [None]:
# Let's use GPU's or we'll waste all of our time we have
device = "cuda"

In [None]:
# Label mapping for cross entropy loss
label_map = {
    0: 'Grass',
    1: 'Fire',
    2: 'Water',
    3: 'Bug',
    4: 'Normal',
    5: 'Poison',
    6: 'Electric',
    7: 'Ground',
    8: 'Fairy',
    9: 'Fighting',
    10: 'Psychic',
    11: 'Rock',
    12: 'Ghost',
    13: 'Ice',
    14: 'Dragon',
    15: 'Dark',
    16: 'Steel',
    17: 'Flying',
}
inverse_label_map = {v: k for k, v in label_map.items()}

In [None]:
# train_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/train_dataset.csv')
# test_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/test_dataset.csv')
# train_df.head()

In [None]:
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/pokemon_data_pokeapi.csv')
data['Pokedex Number'] = data['Pokedex Number'].astype(str).str.zfill(3)
data = data.set_index('Pokedex Number')
data.head()

Unnamed: 0_level_0,Name,Type1,Type2,Classification,Height (m),Weight (kg),Abilities,Generation,Legendary Status
Pokedex Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Bulbasaur,Grass,Poison,Seed Pokémon,0.7,6.9,"Overgrow, Chlorophyll",1,No
2,Ivysaur,Grass,Poison,Seed Pokémon,1.0,13.0,"Overgrow, Chlorophyll",1,No
3,Venusaur,Grass,Poison,Seed Pokémon,2.0,100.0,"Overgrow, Chlorophyll",1,No
4,Charmander,Fire,,Lizard Pokémon,0.6,8.5,"Blaze, Solar-power",1,No
5,Charmeleon,Fire,,Flame Pokémon,1.1,19.0,"Blaze, Solar-power",1,No


In [None]:
# train_df['Type'] = train_df['Type'].map(inverse_label_map)
# # Don't map test_df since the format is a little different
# train_df['Pokedex Number'] = train_df['Pokedex Number'].astype(str).str.zfill(3)
# test_df['Pokedex Number'] = test_df['Pokedex Number'].astype(str).str.zfill(3)
# train_df['Image'] = None
# test_df['Image'] = None

In [None]:
numbers = [str(i).zfill(3) for i in range(1, 906)]
data['Image'] = None
images = list()
for string in numbers:
  img = plt.imread('/content/drive/MyDrive/Colab Notebooks/thumbnails/' + string + '.png', )
  pil_image = ToPILImage()(img)
  resizer = Resize((156, 156))
  resized_pil_image = resizer(pil_image)
  img = np.array(resized_pil_image).transpose(2, 0, 1)
  img = img[:3,:,:]
  images.append(img)
  data.loc[string,'Image'] = np.array([img])
  # train_df.loc[train_df['Pokedex Number'] == string, 'Image'] = train_df.loc[train_df['Pokedex Number'] == string, 'Image'].apply(lambda _: img)
  # test_df.loc[test_df['Pokedex Number'] == string, 'Image'] = test_df.loc[test_df['Pokedex Number'] == string, 'Image'].apply(lambda _: img)

In [None]:
pokemon_types = sorted(list(data.Type1.unique()))
type_to_idx = {type_name: idx for idx, type_name in enumerate(pokemon_types)}
def convert_to_multi_hot(row, type_to_idx, num_types):
  # This function is a multi-hot-encoder
  # Initialize a zero vector
  type_vector = torch.zeros(num_types)

  # Set 1 for Type1
  type_vector[type_to_idx[row['Type1']]] = 1.0

  # Set 1 for Type2 if it exists (not NaN)
  if pd.notna(row['Type2']):
      type_vector[type_to_idx[row['Type2']]] = 1.0

  return type_vector

num_types = len(pokemon_types)
data['type_vector'] = data.apply(lambda row: convert_to_multi_hot(row, type_to_idx, num_types), axis=1)

In [None]:
# train_data = train_df[['Image','Type']].values
# test_data = test_df[['Image','Type']].values

In [None]:
# class PokemonDataset(Dataset):
#     def __init__(self, data):
#         self.data = data

#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):
#         image, label = self.data[idx]
#         image = torch.tensor(image, dtype=torch.float)
#         label = torch.tensor(label, dtype=torch.long)
#         return image, label

In [None]:
class PokemonTypeDataset(Dataset):
    def __init__(self, images, type_vectors, transform=None):
        self.images = images
        self.type_vectors = type_vectors
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]

        if self.transform:
            image = self.transform(image)

        # Return image and corresponding multi-hot encoded type vector
        return image, self.type_vectors[idx]

In [None]:
class PokemonTypeCNN(nn.Module):
  def __init__(self, num_types):
    super(PokemonTypeCNN, self).__init__()

    # You can use a pre-trained model like ResNet as the base
    self.base_model = models.resnet18(pretrained=True)

    # Replace the final fully connected layer
    in_features = self.base_model.fc.in_features
    self.base_model.fc = nn.Linear(in_features, num_types)

    # We don't add a sigmoid here because BCEWithLogitsLoss
    # applies sigmoid internally for better numerical stability

  def forward(self, x):
      return self.base_model(x)

# Create the model
net = PokemonTypeCNN(num_types=len(pokemon_types)).to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 109MB/s]


RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

In [None]:
# class Net(nn.Module):
#     def __init__(self,drop):
#         super(Net, self).__init__()
#         self.drop = drop
#         # Initial input is 4*156*156
#         self.conv1 = nn.Conv2d(
#             in_channels = 4,
#             out_channels = 16,
#             kernel_size = 5,
#             stride = 1,
#             padding = 2,
#         )
#         self.conv2 = nn.Conv2d(
#             in_channels = 16,
#             out_channels = 32,
#             kernel_size = 5,
#             stride = 1,
#             padding = 2,
#         )
#         self.conv3 = nn.Conv2d(
#             in_channels = 32,
#             out_channels = 64,
#             kernel_size = 5,
#             stride = 1,
#             padding = 2,
#         )
#         self.maxpool2 = nn.MaxPool2d(
#             kernel_size = 2,
#             stride = 2,
#         )
#         self.maxpool3 = nn.MaxPool2d(
#             kernel_size = 3,
#             stride = 3,
#         )
#         self.fc1 = nn.Linear(
#             in_features = 16 * 78 * 78,
#             out_features = 1024,
#         )
#         self.dropout = nn.Dropout(p=0.4)
#         self.fc2 = nn.Linear(
#             in_features = 1024,
#             out_features = 18,
#         )
#     def forward(self, x):
#         x = self.conv1(x)
#         x = F.relu(x)
#         x = self.maxpool2(x)
#         # x = self.conv2(x)
#         # x = F.relu(x)
#         # x = self.maxpool2(x)
#         x = torch.flatten(x, 1)
#         x = self.fc1(x)
#         x = F.relu(x)
#         # if self.drop:
#         #     x = self.dropout(x)
#         x = self.fc2(x)
#         output = F.log_softmax(x, dim=1)
#         return output
# net = Net(drop=True).to(device)
# print(net)

In [None]:
full_data = PokemonTypeDataset(images, data['type_vector'].reset_index(drop=True))
train_dataset, test_dataset = random_split(full_data, [0.8, 0.2])
train_loader = DataLoader(train_dataset, batch_size=200, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=200, shuffle=False)

In [None]:
train_loader = DataLoader(full_data, batch_size=200, shuffle=True)

In [None]:
# Define loss function and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=1e-3)

# Train the model
num_epochs = 15
for epoch in range(num_epochs):
  net.train()
  running_loss = 0.0

  for images, labels in tqdm(train_loader):
    images = images.float()
    images, labels = images.to(device), labels.to(device)

    # Zero the parameter gradients
    optimizer.zero_grad()

    # Forward + backward + optimize
    outputs = net(images)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

  print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader):.4f}')

  # Evaluate
  net.eval()
  correct = 0
  total = 0

  with torch.no_grad():
      for images, labels in train_loader: # this should be a test_loader
        images = images.float()
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)

        # Apply sigmoid to get probabilities
        predictions = (torch.sigmoid(outputs) > 0.5).float()

        # Count correct predictions (exactly matching all types)
        correct += (predictions == labels).all(dim=1).sum().item()
        total += labels.size(0)

  accuracy = correct / total
  print(f'Accuracy: {accuracy:.4f}')

# Inference function to get predicted types
def predict_types(model, image_tensor, pokemon_types, threshold=0.5):
  model.eval()
  with torch.no_grad():
      image_tensor = image_tensor.unsqueeze(0).to(device)
      outputs = model(image_tensor)
      probabilities = torch.sigmoid(outputs)[0]

      # Get predicted types based on threshold
      predicted_types = []
      for i, prob in enumerate(probabilities):
          if prob > threshold:
              predicted_types.append(pokemon_types[i])

      # If no type exceeds threshold, take the highest probability
      if not predicted_types:
          max_idx = probabilities.argmax().item()
          predicted_types.append(pokemon_types[max_idx])

      return predicted_types, probabilities

In [None]:
# def train(train_data, net, to_train, opt, epochs=10, learning_rate=1e-2):
#     # Initialize loss
#     criterion = nn.CrossEntropyLoss()

#     # Store loss and accuracy for visualization purpose
#     losslist = []
#     acclist = []

#     # Select optimizer
#     if(opt=='adam'):
#         optimizer = optim.Adam(to_train, lr=learning_rate)
#     else:
#         optimizer = optim.SGD(to_train, lr=learning_rate, momentum=0.99)

#     # Set model to training mode
#     net.train()
#     for k in tqdm(range(epochs)):
#         for it, (X, y) in enumerate(train_data):
#             # Send to device
#             X, y = X.to(device), y.to(device)

#             optimizer.zero_grad()

#             pred = net(X)
#             y = y.type(torch.long)

#             loss = criterion(pred, y)
#             loss.backward()
#             optimizer.step()

#             _, predicted = torch.max(pred, 1)
#             correct = (predicted == y).sum().item()
#             acc = correct / len(y)

#             losslist.append(loss.item())
#             acclist.append(acc)

#     return losslist, acclist

# def test(test_data, net, prob_threshold=0.6):
#     size = 0
#     correct = 0

#     # Set model to eval mode
#     net.eval()

#     with torch.no_grad():
#         for X, y in test_data:
#             batch_size = X.size(0)
#             size += batch_size

#             X = X.to(device)

#             y_labels = [str(y[i].item()) for i in range(batch_size)]

#             outputs = net(X)

#             # Apply softmax to get probabilities
#             probabilities = F.softmax(outputs, dim=1)

#             for i in range(batch_size):
#                 # Get values and indices sorted by probability (descending)
#                 probs, indices = torch.sort(probabilities[i], dim=0, descending=True)

#                 # Get top two class indices and their probabilities
#                 top1_idx = indices[0].item()
#                 top2_idx = indices[1].item()
#                 top1_prob = probs[0].item()
#                 top2_prob = probs[1].item()

#                 # Convert indices to type names
#                 # print(top2_prob, top2_idx)
#                 top1_class = label_map[top1_idx]
#                 top2_class = label_map[top2_idx]

#                 # Create possible label combinations
#                 pred_types = []
#                 pred_types.append(top1_class)  # Single type prediction

#                 # Add second type if probability exceeds threshold
#                 if top2_prob >= prob_threshold:
#                     pred_types.append(f"{top1_class}, {top2_class}")
#                     pred_types.append(f"{top2_class}, {top1_class}")  # Order doesn't matter

#                 # Get true label
#                 true_label = y_labels[i]

#                 # print(true_label,pred_types)

#                 # Check if any prediction matches the true label
#                 if true_label in pred_types or true_label.split(', ')[0] in pred_types:
#                     correct += 1

#     # Calculate accuracy
#     accuracy = correct / size if size > 0 else 0
#     return accuracy

In [None]:
# def train_multi_label(train_data, net, to_train, opt, epochs=10, learning_rate=1e-3):
#     # Initialize loss for multi-label classification
#     criterion = nn.BCEWithLogitsLoss()

#     # Store loss and accuracy
#     losslist = []
#     acclist = []

#     # Select optimizer
#     if(opt=='adam'):
#         optimizer = optim.Adam(to_train, lr=learning_rate)
#     else:
#         optimizer = optim.SGD(to_train, lr=learning_rate, momentum=0.99)

#     # Set model to training mode
#     net.train()
#     for k in tqdm(range(epochs)):
#         for it, (X, y) in enumerate(train_data):
#             # Send to device
#             X, y = X.to(device), y.to(device)

#             # Zero gradients
#             optimizer.zero_grad()

#             # Forward pass
#             pred = net(X)

#             # Compute loss
#             loss = criterion(pred, y)

#             # Backward pass
#             loss.backward()
#             optimizer.step()

#             # Calculate accuracy for multi-label (using threshold of 0.5)
#             predicted = (torch.sigmoid(pred) > 0.5).float()
#             correct = (predicted == y).all(dim=1).sum().item()
#             acc = correct / len(y)

#             losslist.append(loss.item())
#             acclist.append(acc)

#     return losslist, acclist

# def test_multi_label(test_data, net, threshold=0.5):
#     size = 0
#     correct = 0

#     # Set model to eval mode
#     net.eval()

#     with torch.no_grad():
#         for X, y in test_data:
#             batch_size = X.size(0)
#             size += batch_size

#             # Send to device
#             X, y = X.to(device), y.to(device)

#             # Forward pass
#             outputs = net(X)

#             # Apply sigmoid to get probabilities
#             probabilities = torch.sigmoid(outputs)

#             # Convert to binary predictions based on threshold
#             predictions = (probabilities > threshold).float()

#             # A prediction is correct if all labels match
#             correct += (predictions == y).all(dim=1).sum().item()

#     # Calculate accuracy
#     accuracy = correct / size if size > 0 else 0
#     return accuracy

In [None]:
# train_dataset = PokemonDataset(train_df[['Image', 'Type']].values)
# test_dataset = PokemonDataset(test_df[['Image', 'Type']].values)

# train_loader = DataLoader(train_dataset, batch_size=200, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=200, shuffle=False)

In [None]:
# net = Net(drop=False).to(device)
# loss1, acc1 = train(train_loader, net, net.parameters(), 'adam')
# ax=range(len(loss1))
# plt.plot(ax, loss1, ax, acc1)
# plt.legend(['loss', 'accuracy'])
# plt.show()
# print('Train Accuracy:{}'.format(test(train_loader, net)))
# #print('Test Accuracy:{}'.format(test(test_loader, net)))