In [14]:
import torch
import fastai

In [15]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

data_trans = transforms.Compose([
                                transforms.Resize((224,224)),
                                  transforms.ToTensor(), 
                                transforms.Normalize(mean = [0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

dataset = datasets.ImageFolder(root = "PokemonData", transform = data_trans)

train_size = int(len(dataset) * 0.8)
dev_size = int(len(dataset) - train_size)

train_set, dev_set = random_split(dataset, [train_size, dev_size])

batchSize = 32

train_loader = DataLoader(train_set, batch_size = batchSize, shuffle = True)
dev_loader = DataLoader(dev_set, batch_size = batchSize, shuffle = False)

In [16]:
import torchvision.models as models
import torch.nn as nn

model = models.resnet101(pretrained = True)
model.fc = nn.Linear(model.fc.in_features, 150)
model = model.cuda()




In [17]:
print(torch.cuda.is_available())


True


In [18]:
device = ('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [19]:
from torch import optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 10e-3)

In [20]:
num_epochs = 5

for epoch in range(num_epochs):
    loss = 0.0

    for inputs, labels in train_loader:
        
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        loss += loss.item()
        print(f'Epoch : {epoch} Loss: {loss / len(train_loader)}')

Epoch : 0 Loss: 0.05840962380170822
Epoch : 0 Loss: 0.17848260700702667
Epoch : 0 Loss: 0.183319553732872
Epoch : 0 Loss: 0.08374486118555069
Epoch : 0 Loss: 0.07170655578374863
Epoch : 0 Loss: 0.061117734760046005
Epoch : 0 Loss: 0.06908971071243286
Epoch : 0 Loss: 0.06790728867053986
Epoch : 0 Loss: 0.0941804051399231
Epoch : 0 Loss: 0.06356559693813324
Epoch : 0 Loss: 0.07608997821807861
Epoch : 0 Loss: 0.07806649804115295
Epoch : 0 Loss: 0.07186968624591827
Epoch : 0 Loss: 0.07123015075922012
Epoch : 0 Loss: 0.06121000647544861
Epoch : 0 Loss: 0.06608516722917557
Epoch : 0 Loss: 0.060740504413843155
Epoch : 0 Loss: 0.05834100395441055
Epoch : 0 Loss: 0.06006963551044464
Epoch : 0 Loss: 0.0612688846886158
Epoch : 0 Loss: 0.059783171862363815
Epoch : 0 Loss: 0.058146532624959946
Epoch : 0 Loss: 0.05904241278767586
Epoch : 0 Loss: 0.0580902062356472
Epoch : 0 Loss: 0.05863013118505478
Epoch : 0 Loss: 0.05937424674630165
Epoch : 0 Loss: 0.05879533290863037
Epoch : 0 Loss: 0.05828123539

In [21]:
torch.save(model.state_dict(), 'pokemodel.pt')

In [22]:
import torchvision.models as models
import torch.nn as nn

model = models.resnet101()
model.fc = nn.Linear(model.fc.in_features, 150)
model.load_state_dict(torch.load('pokemodel.pt'))

<All keys matched successfully>

In [56]:
import cv2
import torch
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import os
model.eval()

img_path = 'test/charmander.jpg'
img = cv2.imread(img_path)

# Convert BGR image to RGB
rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Convert the NumPy array image to a PIL Image
cv_img = Image.fromarray(rgb_img)

# Define the same transformations used during training
data_trans = transforms.Compose([
                                transforms.Resize((224,224)),
                                  transforms.ToTensor(), 
                                transforms.Normalize(mean = [0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

# Preprocess the image
input_tensor = data_trans(cv_img).unsqueeze(0)  # Add batch dimension

# Perform inference
with torch.no_grad():
    output = model(input_tensor)

# Get the predicted class (you need to map class indices to Pokemon names)
predicted_class = output.argmax().item()
# Assuming 'output' is your model's output tensor
class_probabilities = torch.softmax(output, dim=1)
predicted_class = torch.argmax(class_probabilities, dim=1).item()


# Define the path to your dataset directory
dataset_dir = 'PokemonData'  # Replace with your dataset path

# Get a list of class names from the subfolder names
class_names = sorted(os.listdir(dataset_dir))

# Create a dictionary to map class indices to Pokemon names
class_to_name = {i: class_name for i, class_name in enumerate(class_names)}

# Now, when you have a predicted class index (predicted_class), you can get the Pokemon name
predicted_pokemon = class_to_name[predicted_class]

cv_img = np.array(img)
# Display the image with the predicted classification
cv2.putText(cv_img, f' {predicted_pokemon}', (60, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

# Show the image with the classification
cv2.imshow('Pokemon Classification', cv_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
