# Image Classification

In [1]:
# Install required Python packages (quiet mode)
!pip install -q torch torchvision scikit-learn

In [2]:
# Import required libraries
import os, zipfile, shutil
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
import torch.optim as optim
from sklearn.metrics import classification_report

In [3]:
# Select CPU or GPU device for computations
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [4]:
# Extract the ZIP dataset once and set DATASET_PATH
with zipfile.ZipFile("/content/dataset.zip", 'r') as zip_ref:
    zip_ref.extractall("/content")

DATASET_PATH = "/content/dataset"
print(os.listdir(DATASET_PATH))

['accordion', 'gramophone', 'windsor_chair', 'pyramid', 'pizza', 'sea_horse', 'crocodile', 'camera', 'emu', 'crocodile_head', 'cup', 'bass', 'dollar_bill', 'nautilus', 'hedgehog']


In [5]:
# Split dataset into training (images 0001-0040) and test (remaining) subsets based on filename

def split_by_filename(dataset):
    """
    Split the dataset into training and test subsets based on the filenames of the images.
    The images with filenames ranging from 'image_0001.jpg' to 'image_0040.jpg' are considered
    as training images, while the rest are considered as test images.

    Args:
        dataset (torchvision.datasets.ImageFolder): The dataset to be split.

    Returns:
        tuple: A tuple containing two Subset objects. The first Subset contains the training images,
        and the second Subset contains the test images.
    """
    train_idx, test_idx = [], []
    # Iterate over each image in the dataset
    for idx, (path, _) in enumerate(dataset.samples):
        # Extract the filename from the path
        fname = os.path.basename(path)
        # Extract the numeric part from the filename
        # For example, 'image_0001.jpg' → 1
        num = int(os.path.splitext(fname)[0].replace('image_', ''))
        # Check if the numeric part is between 1 and 40 (inclusive)
        if 1 <= num <= 40:
            # If it is, add the index to the training indices list
            train_idx.append(idx)
        else:
            # If it is not, add the index to the test indices list
            test_idx.append(idx)
    # Return the training and test subsets of the dataset
    return Subset(dataset, train_idx), Subset(dataset, test_idx)

In [6]:
# Resize all images to 224x224
# Apply random horizontal flip to some images during training
# Convert PIL Image to tensor
# Normalize the tensor image using mean and standard deviation of ImageNet images
train_tfms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

# Resize all images to 224x224
# Convert PIL Image to tensor
# Normalize the tensor image using mean and standard deviation of ImageNet images
test_tfms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

# Load the dataset from the specified path
# Apply the specified transforms to the images
full_dataset = datasets.ImageFolder(DATASET_PATH, transform=train_tfms)

# Split the dataset into train and test sets
train_set, test_set = split_by_filename(full_dataset)

# Apply the test transforms to the test set
test_set.dataset.transform = test_tfms

# Create data loaders for the train and test sets
# Load images in batches of size 32
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

# Number of classes in the dataset
num_classes = 15

# Names of the classes in the dataset
class_names = full_dataset.classes

In [7]:
# Utility functions: training loop and evaluation (accuracy & classification report)

def train_model(model, epochs=10, lr=1e-4):
    """
    Trains the given model on the training data for a specified number of epochs.

    Args:
        model (nn.Module): The model to be trained.
        epochs (int, optional): The number of epochs to train the model for. Defaults to 10.
        lr (float, optional): The learning rate for the optimizer. Defaults to 1e-4.
    """
    model.to(device)  # Move the model to the device (GPU or CPU)
    criterion = nn.CrossEntropyLoss()  # Define the loss function
    optimizer = optim.Adam(model.parameters(), lr=lr)  # Define the optimizer

    for epoch in range(epochs):
        model.train()  # Set model to training mode
        running_loss = 0  # Initialize the running loss
        for x, y in train_loader:  # Iterate over the training data batch by batch
            x, y = x.to(device), y.to(device)  # Move the data to the device
            optimizer.zero_grad()  # Zero the gradients
            loss = criterion(model(x), y)  # Compute the loss
            loss.backward()  # Compute the gradients
            optimizer.step()  # Update the model parameters
            running_loss += loss.item()  # Add the batch loss to the running loss
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}")  # Print the epoch loss

def evaluate_model(model):
    """
    Evaluates the given model on the test data and prints the classification report.

    Args:
        model (nn.Module): The model to be evaluated.
    """
    model.eval()  # Set model to evaluation mode
    y_true, y_pred = [], []  # Initialize lists to store true labels and predicted labels
    with torch.no_grad():  # Disable gradient computation to save memory
        for x, y in test_loader:  # Iterate over the test data batch by batch
            x = x.to(device)  # Move the data to the device
            preds = torch.argmax(model(x), dim=1)  # Get the predicted labels
            y_true.extend(y.numpy())  # Add the true labels to the list
            y_pred.extend(preds.cpu().numpy())  # Add the predicted labels to the list

    print(classification_report(y_true, y_pred, target_names=class_names, digits=4))  # Print the classification report

# ===================== Q2 =====================

### Finetuned VGG19

In [None]:
# Load a pre-trained VGG19 model
vgg19_ft = models.vgg19(pretrained=True)

# Set all the parameters in the features to require gradients
# This allows us to update the model's weights during training
for param in vgg19_ft.features.parameters():
    param.requires_grad = True

# Replace the last two layers of the pre-trained model with new linear layers
# The first linear layer has 4096 inputs and 1024 outputs
# The second linear layer has 1024 inputs and num_classes outputs
vgg19_ft.classifier[5] = nn.Linear(4096, 1024)
vgg19_ft.classifier[6] = nn.Linear(1024, num_classes)

# Train the model for 10 epochs with a learning rate of 1e-5
train_model(vgg19_ft, epochs=10, lr=1e-5)

# Evaluate the trained model's performance on the test set
evaluate_model(vgg19_ft)



Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth


100%|██████████| 548M/548M [00:08<00:00, 70.2MB/s]


Epoch [1/10], Loss: 2.1129
Epoch [2/10], Loss: 0.3782
Epoch [3/10], Loss: 0.0777
Epoch [4/10], Loss: 0.0243
Epoch [5/10], Loss: 0.0069
Epoch [6/10], Loss: 0.0069
Epoch [7/10], Loss: 0.0047
Epoch [8/10], Loss: 0.0020
Epoch [9/10], Loss: 0.0008
Epoch [10/10], Loss: 0.0008
                precision    recall  f1-score   support

     accordion     1.0000    1.0000    1.0000        15
          bass     1.0000    1.0000    1.0000        14
        camera     1.0000    1.0000    1.0000        10
     crocodile     0.8000    0.8000    0.8000        10
crocodile_head     0.8182    0.8182    0.8182        11
           cup     1.0000    0.9412    0.9697        17
   dollar_bill     1.0000    1.0000    1.0000        12
           emu     1.0000    1.0000    1.0000        13
    gramophone     1.0000    0.9091    0.9524        11
      hedgehog     1.0000    0.9286    0.9630        14
      nautilus     0.9375    1.0000    0.9677        15
         pizza     1.0000    1.0000    1.0000        13
