In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from torchsummary import summary
from PIL import Image
import numpy as np
import os

running_on_colab = False
dataset_path = "/content/Pokemon151to10k/dataset"

if "COLAB_RELEASE_TAG" in os.environ:
    print("Running on Google Colab")
    running_on_colab = True
    !unzip /content/drive/MyDrive/Datasets/Pokemon151to10k.zip -d /content/Pokemon151to10k
    dataset_path = "/content/Pokemon151to10k/dataset"
else:
    print("Running locally")

# 1. Define transformations for preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),          # resize all images to 224x224
    transforms.Lambda(lambda img: img.convert("RGB")),  # drop alpha channel
    transforms.ToTensor(),                  # convert images to PyTorch tensors
    # transforms.Normalize(
    #     mean=[0.485, 0.456, 0.406],
    #     std=[0.229, 0.224, 0.225]
    transforms.Normalize(
        mean=[0, 0, 0],
        std=[1, 1, 1]
    )
])

def rgb_loader(path):
    with open(path, "rb") as f:
        img = Image.open(f)
        return img.convert("RGB")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/Pokemon151to10k/dataset/Mewtwo/00000200.jpg  
 extracting: /content/Pokemon151to10k/dataset/Mewtwo/00000201.png  
  inflating: /content/Pokemon151to10k/dataset/Mewtwo/00000202.png  
  inflating: /content/Pokemon151to10k/dataset/Mewtwo/00000203.jpg  
  inflating: /content/Pokemon151to10k/dataset/Mewtwo/00000204.jpg  
  inflating: /content/Pokemon151to10k/dataset/Mewtwo/00000205.png  
  inflating: /content/Pokemon151to10k/dataset/Mewtwo/00000206.png  
 extracting: /content/Pokemon151to10k/dataset/Mewtwo/00000207.png  
  inflating: /content/Pokemon151to10k/dataset/Mewtwo/00000208.png  
 extracting: /content/Pokemon151to10k/dataset/Mewtwo/00000209.png  
  inflating: /content/Pokemon151to10k/dataset/Mewtwo/00000210.jpg  
  inflating: /content/Pokemon151to10k/dataset/Mewtwo/00000211.jpg  
  inflating: /content/Pokemon151to10k/dataset/Mewtwo/00000212.png  
  inflating: /content/Pokemon151to10k/dataset/Mewtw

In [4]:
dataset = datasets.ImageFolder(root=dataset_path, transform=transform, loader=rgb_loader)
train_size = int(0.9 * len(dataset))  # 80% for training
test_size  = len(dataset) - train_size  # remaining 20%

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset,batch_size=64, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=1, shuffle=False)
num_classes = len(dataset.classes)

In [5]:
class PokemonRecognizer(nn.Module):
    def __init__(self):
        super(PokemonRecognizer, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)

        self.pool = nn.MaxPool2d(2, 2)
        self.flatten_size = 64 * 28 * 28
        self.fc = nn.Linear(self.flatten_size, 128)
        self.deep_output = nn.Linear(128, 1)
        self.class_output = nn.Linear(128, num_classes)

    def forward(self, x):
        # Convolution + Pooling
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = self.pool(torch.relu(self.conv3(x)))

        # Flatten
        x = x.view(x.size(0), -1)  # batch_size x flatten_size

        # Fully connected layers
        x = torch.relu(self.fc(x))

        # Two heads
        deep_out = self.deep_output(x)      # e.g., regression output
        class_out = self.class_output(x)    # classification output

        return deep_out, class_out

In [6]:
model = PokemonRecognizer()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

cuda


In [8]:
epochs = 10000
model.train()
for epoch in range(epochs):

    # Load all training data at once
    all_inputs, all_targets = next(iter(train_loader))  # one big batch
    all_inputs, all_targets = all_inputs.to(device), all_targets.to(device)
    optimizer.zero_grad()
    # Forward pass
    outputs = model(all_inputs)
    classification_outputs = outputs[1]

    # Compute loss
    loss = criterion(classification_outputs, all_targets)

    # Backpropagation and optimization
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")


Epoch [1/10000], Loss: 4.5364
Epoch [11/10000], Loss: 4.0795
Epoch [21/10000], Loss: 4.4314
Epoch [31/10000], Loss: 4.5029
Epoch [41/10000], Loss: 4.4956
Epoch [51/10000], Loss: 4.0571
Epoch [61/10000], Loss: 4.4726
Epoch [71/10000], Loss: 4.3498
Epoch [81/10000], Loss: 4.0344
Epoch [91/10000], Loss: 4.2971
Epoch [101/10000], Loss: 4.0599
Epoch [111/10000], Loss: 3.7992
Epoch [121/10000], Loss: 4.1868
Epoch [131/10000], Loss: 3.9301
Epoch [141/10000], Loss: 3.8999
Epoch [151/10000], Loss: 3.8281
Epoch [161/10000], Loss: 3.2426
Epoch [171/10000], Loss: 3.8897
Epoch [181/10000], Loss: 3.7594
Epoch [191/10000], Loss: 3.5990
Epoch [201/10000], Loss: 3.7825
Epoch [211/10000], Loss: 3.4921
Epoch [221/10000], Loss: 3.2132
Epoch [231/10000], Loss: 3.3154
Epoch [241/10000], Loss: 3.6458
Epoch [251/10000], Loss: 3.0454
Epoch [261/10000], Loss: 3.3204
Epoch [271/10000], Loss: 3.2978
Epoch [281/10000], Loss: 2.7814
Epoch [291/10000], Loss: 2.9924
Epoch [301/10000], Loss: 3.0232
Epoch [311/10000], 

KeyboardInterrupt: 

In [10]:
model.eval()
class_names = dataset.classes # Get class names from the dataset
break_iter =0
break_at = 100
accuracy = 0

with torch.no_grad():
    for inputs, target in test_loader:
        break_iter+=1
        if break_iter == break_at:
            break
        inputs, target = inputs.to(device), target.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs[1].data, 1)
        actual_class = class_names[target]
        predicted_class = class_names[predicted]
        if predicted_class == actual_class:
            accuracy += 1
        print("Actual: {:s}, Predicted: {:s}".format(actual_class, predicted_class))
        # plt.imshow(inputs_cpu[0].permute(1,2,0))
        # plt.title(f"Actual: {actual_class}, Predicted: {predicted_class}")
        # plt.axis('off')
        # plt.show()
accuracy = accuracy/break_at

print("Accuracy: {:.4f}".format(accuracy*100))

Actual: Rattata, Predicted: Sandshrew
Actual: Pidgey, Predicted: Pidgey
Actual: Aerodactyl, Predicted: Onix
Actual: Victreebel, Predicted: Weepinbell
Actual: Dodrio, Predicted: Charizard
Actual: Marowak, Predicted: Sandshrew
Actual: Graveler, Predicted: Geodude
Actual: Rhyhorn, Predicted: Rhydon
Actual: Caterpie, Predicted: Victreebel
Actual: Tentacool, Predicted: Tentacool
Actual: Pikachu, Predicted: Psyduck
Actual: Slowpoke, Predicted: MrMime
Actual: Gloom, Predicted: Oddish
Actual: Farfetchd, Predicted: Arcanine
Actual: Muk, Predicted: Grimer
Actual: Staryu, Predicted: Sandshrew
Actual: Meowth, Predicted: Doduo
Actual: Muk, Predicted: Muk
Actual: Chansey, Predicted: Chansey
Actual: Arbok, Predicted: Victreebel
Actual: Exeggcute, Predicted: Mankey
Actual: Meowth, Predicted: Magnemite
Actual: Pidgeot, Predicted: Ninetales
Actual: Venomoth, Predicted: Muk
Actual: Pikachu, Predicted: Pikachu
Actual: Nidoqueen, Predicted: Golduck
Actual: Geodude, Predicted: Tentacool
Actual: Seel, Predic