In [100]:
import torch
import fastai

In [101]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

data_trans = transforms.Compose([
              transforms.Resize((224,224)),
              transforms.ToTensor(), 
              transforms.Normalize(mean = [0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

dataset = datasets.ImageFolder(root = "PokemonData", transform = data_trans)

train_size = int(len(dataset) * 0.8)
dev_size = int(len(dataset) - train_size)

train_set, dev_set = random_split(dataset, [train_size, dev_size])

batchSize = 32

train_loader = DataLoader(train_set, batch_size = batchSize, shuffle = True)
dev_loader = DataLoader(dev_set, batch_size = batchSize, shuffle = False)

In [None]:
train_set.classes

In [102]:
import torchvision.models as models
import torch.nn as nn
from facenet_pytorch import InceptionResnetV1

model = InceptionResnetV1(pretrained = 'vggface2')
model.logits = nn.Linear(model.logits.in_features, 150)
model = nn.Sequential(model, nn.Softmax(dim = 1))
model = model.cuda()

In [103]:
print(torch.cuda.is_available())


True


In [104]:
device = ('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

Sequential(
  (0): InceptionResnetV1(
    (conv2d_1a): BasicConv2d(
      (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (conv2d_2a): BasicConv2d(
      (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (conv2d_2b): BasicConv2d(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU()
    )
    (maxpool_3a): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2d_3b): BasicConv2d(
      (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_r

In [105]:
from torch import optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 10e-3)

In [106]:
num_epochs = 1

for epoch in range(num_epochs):
    loss = 0.0

    for inputs, labels in train_loader:
        
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        loss += loss.item()
        print(f'Epoch : {epoch + 1} Loss: {loss / len(train_loader)}')

TypeError: 'tuple' object cannot be interpreted as an integer

In [None]:
torch.save(model.state_dict(), 'pokemodel.pt')

In [None]:
import torch.nn as nn
from facenet_pytorch import InceptionResnetV1

model = InceptionResnetV1(pretrained = 'vggface2')
model.logits = nn.Linear(model.logits.in_features, 150)
model = nn.Sequential(model, nn.Softmax(dim = 1))
model.load_state_dict(torch.load('pokemodel.pt'))

<All keys matched successfully>

In [None]:
import cv2
import torch
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import os
model.eval()

img_path = 'test/charmander.jpg'
img = cv2.imread(img_path)

# Convert BGR image to RGB
rgb_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Convert the NumPy array image to a PIL Image
cv_img = Image.fromarray(rgb_img)

# Define the same transformations used during training
data_trans = transforms.Compose([
              transforms.Resize((224,224)),
              transforms.ToTensor(), 
              transforms.Normalize(mean = [0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

# Preprocess the image
input_tensor = data_trans(cv_img).unsqueeze(0)  # Add batch dimension

# Perform inference
with torch.no_grad():
    output = model(input_tensor)

# Get the predicted class (you need to map class indices to Pokemon names)
predicted_class = output.argmax().item()
# Assuming 'output' is your model's output tensor
class_probabilities = torch.softmax(output, dim=1)
predicted_class = torch.argmax(class_probabilities, dim=1).item()


# Define the path to your dataset directory
dataset_dir = 'PokemonData'  # Replace with your dataset path

# Get a list of class names from the subfolder names
class_names = sorted(os.listdir(dataset_dir))

# Create a dictionary to map class indices to Pokemon names
class_to_name = {i: class_name for i, class_name in enumerate(class_names)}

# Now, when you have a predicted class index (predicted_class), you can get the Pokemon name
predicted_pokemon = class_to_name[predicted_class]

cv_img = np.array(img)
# Display the image with the predicted classification
cv2.putText(cv_img, f' {predicted_pokemon}', (60, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

# Show the image with the classification
cv2.imshow('Pokemon Classification', cv_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
