In [16]:
import random
from PIL import Image
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torchvision
import mlxtend
import matplotlib.pyplot as plt
from typing import Tuple
from typing import Dict
from typing import List
from timeit import default_timer as timer
from tqdm.auto import tqdm
import gc
from numba import cuda
import shutil
import numpy as np
import os
import torch
from torch.utils.data import DataLoader, TensorDataset
import torchvision.transforms as transforms
import torch.optim as optim

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [33]:
import os
import numpy as np
from PIL import Image
import random

def load_images(data_path, target_size=(224, 224), num_images_per_class=100):
    images = []
    labels = []
    class_names = os.listdir(data_path)
    class_names.sort()
    if '.DS_Store' in class_names:
        class_names.remove('.DS_Store')

    for label, class_name in enumerate(class_names):
        class_path = os.path.join(data_path, class_name)
        image_names = os.listdir(class_path)
        if '.DS_Store' in image_names:
            image_names.remove('.DS_Store')

        # Randomly select images from each class
        selected_images = random.sample(image_names, min(len(image_names), num_images_per_class))
        for image_name in selected_images:
            image_path = os.path.join(class_path, image_name)
            try:
                image = Image.open(image_path).convert('RGB')
                image = image.resize(target_size)
                images.append(np.array(image))
                labels.append(label)
            except Exception as e:
                print(f"Error loading image {image_path}: {e}")

    return np.array(images), np.array(labels), class_names

data_path = "/content/drive/MyDrive/colab_data/animals"
images, labels, class_names = load_images(data_path)


In [6]:
print(images.shape)
print(labels.shape)

(100, 224, 224, 3)
(100,)


In [34]:
zipped_data = list(zip(labels, images))

random.seed(42)
random.shuffle(zipped_data)

labels, images = zip(*zipped_data)

labels = np.array(labels)
images = np.array(images)
print(images.shape)
print(labels.shape)

(1000, 224, 224, 3)
(1000,)


In [35]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)

print(f"Training set size: {X_train.shape[0]}")
print(f"Validation set size: {X_val.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")


Training set size: 600
Validation set size: 200
Test set size: 200


In [36]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) #default values for ImageNet dataset, where model is trained
])

#transform np images to pil images.
X_train = torch.stack([transform(img) for img in X_train])
X_val = torch.stack([transform(img) for img in X_val])
X_test = torch.stack([transform(img) for img in X_test])

y_train = torch.tensor(y_train)
y_val = torch.tensor(y_val)
y_test = torch.tensor(y_test)

train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print("Data loaders prepared!")


Data loaders prepared!


In [9]:
weights = torchvision.models.VGG16_Weights.DEFAULT
auto_transforms = weights.transforms()

device = "cuda" if torch.cuda.is_available() else "cpu"
model = torchvision.models.vgg16(weights=weights).to(device)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:07<00:00, 76.0MB/s]


In [13]:
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [25]:
#OPTIONAL
num_classes = len(class_names)
model.classifier[6] = nn.Linear(model.classifier[6].in_features, num_classes) #reshape model output layer to adapt Animals dataset

In [20]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [27]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to the selected device
model = model.to(device)
device

device(type='cuda')

In [31]:
import torch

def train_model(model, train_loader, criterion, optimizer, num_epochs=10, freeze=True):
    model.train()  # Set the model to training mode
    train_loss_history = []
    train_accuracy_history = []

    #freeze feature extraction layer params. only fine tunes classification layers.
    if freeze:
      for param in model.features.parameters():
            param.requires_grad = False

    for epoch in range(num_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to('cuda'), labels.to('cuda')  # Move data to GPU
            optimizer.zero_grad()  # Zero the parameter gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Calculate loss
            loss.backward()  # Backward pass
            optimizer.step()  # Optimize

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = 100 * correct / total
        train_loss_history.append(epoch_loss)
        train_accuracy_history.append(epoch_accuracy)

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

    return train_loss_history, train_accuracy_history


def validate_model(model, val_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    val_loss_history = []
    val_accuracy_history = []

    with torch.no_grad():
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in val_loader:
            inputs, labels = inputs.to('cuda'), labels.to('cuda')  # Move data to GPU
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / len(val_loader)
        epoch_accuracy = 100 * correct / total
        val_loss_history.append(epoch_loss)
        val_accuracy_history.append(epoch_accuracy)

        print(f'Validation Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%')

    return val_loss_history, val_accuracy_history


def test_model(model, test_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    running_loss = 0.0
    correct = 0
    total = 0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to('cuda'), labels.to('cuda')  # Move data to GPU
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    test_loss = running_loss / len(test_loader)
    test_accuracy = 100 * correct / total

    print(f'Test Loss: {test_loss:.4f}, Accuracy: {test_accuracy:.2f}%')

    return test_loss, test_accuracy, all_predictions, all_labels


In [37]:
num_epochs = 10
train_loss, train_accuracy = train_model(model, train_loader, criterion, optimizer, num_epochs)
val_loss, val_accuracy = validate_model(model, val_loader, criterion)
test_loss, test_accuracy, predictions, true_labels = test_model(model, test_loader, criterion)

Epoch 1/10, Loss: 1.4580, Accuracy: 54.00%
Epoch 2/10, Loss: 0.3807, Accuracy: 91.33%
Epoch 3/10, Loss: 0.1741, Accuracy: 97.17%
Epoch 4/10, Loss: 0.1105, Accuracy: 98.17%
Epoch 5/10, Loss: 0.0643, Accuracy: 99.50%
Epoch 6/10, Loss: 0.0462, Accuracy: 99.50%
Epoch 7/10, Loss: 0.0361, Accuracy: 99.83%
Epoch 8/10, Loss: 0.0237, Accuracy: 100.00%
Epoch 9/10, Loss: 0.0242, Accuracy: 99.83%
Epoch 10/10, Loss: 0.0217, Accuracy: 99.83%
Validation Loss: 0.1673, Accuracy: 94.50%
Test Loss: 0.1660, Accuracy: 94.00%
