In [3]:
import os
import sys
import pandas as pd
import sklearn
import numpy as np
from matplotlib import pyplot as plt
import random
import math
from torch import nn

import torch
import torchvision
import torchvision.transforms as transforms

In [6]:

    
def load_class_names(dataset_path=''):
  
  names = {}
  
  with open(os.path.join(dataset_path, 'classes.txt')) as f:
    for line in f:
      pieces = line.strip().split()
      class_id = pieces[0]
      names[class_id] = ' '.join(pieces[1:])
  
  return names

def load_image_labels(dataset_path=''):
  labels = {}
  
  with open(os.path.join(dataset_path, 'image_class_labels.txt')) as f:
    for line in f:
      pieces = line.strip().split()
      image_id = pieces[0]
      class_id = pieces[1]
      labels[image_id] = class_id
  
  return labels
        
def load_image_paths(dataset_path='', path_prefix=''):
  
  paths = {}
  
  with open(os.path.join(dataset_path, 'images.txt')) as f:
    for line in f:
      pieces = line.strip().split()
      image_id = pieces[0]
      path = os.path.join(path_prefix, pieces[1])
      paths[image_id] = path
  
  return paths

def load_train_test_split(dataset_path=''):
  train_images = []
  test_images = []
  
  with open(os.path.join(dataset_path, 'train_test_split.txt')) as f:
    for line in f:
      pieces = line.strip().split()
      image_id = pieces[0]
      is_train = int(pieces[1])
      if is_train:
        train_images.append(image_id)
      else:
        test_images.append(image_id)
        
  return train_images, test_images 
      
if __name__ == '__main__':
  
  dataset_path = '/home/sanzio/Uni/Intro2ML/proveResNet50/nabirds'
  image_path  = 'images'
  
  # Load in the image data
  # Assumes that the images have been extracted into a directory called "images"
  image_paths = load_image_paths(dataset_path, path_prefix=image_path)
  image_class_labels = load_image_labels(dataset_path)
  
  # Load in the class data
  class_names = load_class_names(dataset_path)


  # Load in the train / test split
  train_images, test_images = load_train_test_split(dataset_path)

In [31]:
from torch.utils.data import Dataset
from torchvision.io import read_image
from torchvision.io import ImageReadMode


class CustomImageDataset(Dataset):
    def __init__(self, image_class_labels, images, image_paths, dataset_path, test=False):
        self.img_labels = image_class_labels
        self.images = images
        self.image_paths = image_paths
        self.dataset_path = dataset_path
        self.test = test

    def __len__(self):
        # Return the number of samples in the split
        return len(self.images)
    
    def __getitem__(self, idx):
        if self.test == False:
            transform = transforms.Compose([
                transforms.Resize((256, 256)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
                transforms.CenterCrop(224)
                ])
        else:
            transform = transforms.Compose([transforms.Resize((256, 256))])
        img_id = self.images[idx]
        img_path = os.path.join(dataset_path, image_paths[img_id])
        image = read_image(img_path, ImageReadMode.RGB )
        image = transform(image)
        image = image/255
        
        label = int(self.img_labels[img_id])

        return image, label

In [5]:
train_data = CustomImageDataset(image_class_labels, train_images, image_paths, dataset_path)

In [6]:
train_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)

In [7]:
model = torchvision.models.resnet50(weights='DEFAULT')

In [8]:
model.fc = nn.Linear(2048, 1011)

In [9]:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [17]:
# Set device
device = 'cpu'

# Set hyperparameters
num_epochs = 20
batch_size = 64
learning_rate = 0.001



# Set the model to run on the device
model = model.to(device)

# Define the loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    step = 0
    for inputs, labels in train_loader:
        # Move input and label tensors to the device
        inputs = inputs.float().to(device)
        labels = labels.to(device)

        # Zero out the optimizer
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        # Backward pass
        loss.backward()
        optimizer.step()

    # Print the loss for every epoch
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')




Epoch 3/20, Loss: 0.0706




Epoch 4/20, Loss: 0.0849




Epoch 5/20, Loss: 0.0942




Epoch 6/20, Loss: 0.0981




Epoch 7/20, Loss: 0.1437




Epoch 8/20, Loss: 0.0466




Epoch 9/20, Loss: 0.0310




Epoch 10/20, Loss: 0.0461




Epoch 11/20, Loss: 0.0414


KeyboardInterrupt: 

In [18]:
torch.save(model, "resnet_birds_31iter.pth")

In [27]:
model = torch.load("resnet_birds_31iter.pth")

In [32]:
test_data = CustomImageDataset(image_class_labels, test_images, image_paths, dataset_path, test=True)

In [33]:
test_loader = torch.utils.data.DataLoader(test_data, shuffle=True)

In [34]:
device = 'cpu'

test_loss, test_acc = 0, 0
loss_fn = nn.CrossEntropyLoss()
model.to(device)

def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100 
    return acc

with torch.inference_mode(): 
    for X, y in test_loader:
        # Send data to device
        X, y = X.float().to(device), y.to(device)
            
        # 1. Forward pass
        test_pred = model(X)
        # 2. Calculate loss and accuracy
        test_loss += loss_fn(test_pred, y)
        test_acc += accuracy_fn(y_true=y,
            y_pred=test_pred.argmax(dim=1) # Go from logits -> pred labels
            )
        
    # Adjust metrics and print out
    test_loss /= len(test_loader)
    test_acc /= len(test_loader)
    print(f"Test loss: {test_loss:.5f} | Test accuracy: {test_acc:.2f}%\n")

In [56]:
transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
        transforms.CenterCrop(224)
        ])

img_id = test_images[82]
img_path = os.path.join(dataset_path, image_paths[img_id])
image = read_image(img_path, ImageReadMode.RGB )
image = transform(image)
print(image.shape)
image = (image.float()/255).to('cpu')


torch.Size([3, 224, 224])


In [57]:
m = torch.nn.Softmax(dim=1)
output = m(model(image.unsqueeze(0)))
output.argmax()

tensor(940)

In [58]:
label = int(image_class_labels[img_id])
label

946

In [29]:
test_data.__getitem__(1)[0].shape

torch.Size([3, 224, 224])

In [None]:
transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5),
        transforms.CenterCrop(224)
])

In [None]:
img_id = test_images[1]
img_path = os.path.join(dataset_path, image_paths[img_id])
image = read_image(img_path, ImageReadMode.RGB )
image = transform(image)
image = image/255


tensor([[[0.3608, 0.3569, 0.3569,  ..., 0.2549, 0.2510, 0.2549],
         [0.3569, 0.3608, 0.3569,  ..., 0.2706, 0.2667, 0.2667],
         [0.3569, 0.3569, 0.3529,  ..., 0.2784, 0.2784, 0.2784],
         ...,
         [0.3216, 0.2706, 0.2078,  ..., 0.3059, 0.3255, 0.3333],
         [0.3059, 0.2275, 0.2000,  ..., 0.3176, 0.3294, 0.3412],
         [0.2471, 0.2039, 0.1961,  ..., 0.3255, 0.3333, 0.3490]],

        [[0.5216, 0.5176, 0.5137,  ..., 0.2667, 0.2627, 0.2627],
         [0.5216, 0.5216, 0.5216,  ..., 0.2784, 0.2745, 0.2745],
         [0.5294, 0.5294, 0.5255,  ..., 0.2784, 0.2784, 0.2784],
         ...,
         [0.3216, 0.2941, 0.2510,  ..., 0.4118, 0.4275, 0.4392],
         [0.3176, 0.2627, 0.2157,  ..., 0.4275, 0.4314, 0.4471],
         [0.2941, 0.2314, 0.2078,  ..., 0.4392, 0.4431, 0.4588]],

        [[0.3961, 0.3961, 0.3922,  ..., 0.2549, 0.2549, 0.2549],
         [0.3961, 0.3961, 0.3961,  ..., 0.2706, 0.2706, 0.2667],
         [0.3961, 0.3961, 0.3922,  ..., 0.2784, 0.2784, 0.