In [1]:
!unzip -q /content/img_cls_scenes_classification.zip

In [2]:
import os
import random

# PyTorch imports
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# Other libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

# Scikit-learn import
from sklearn.model_selection import train_test_split

In [3]:
def set_seed(seed):
    """
    Sets the seed for reproducibility across various libraries.
    """
    # Python's random module
    random.seed(seed)

    # NumPy
    np.random.seed(seed)

    # PyTorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # If using multiple GPUs

    # Ensures reproducibility in PyTorch's backend operations
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


# Set the seed value
seed = 59
set_seed(seed)

In [4]:
# Define root directory and subdirectories for training and testing
root_dir = 'scenes_classification'
train_dir = os.path.join(root_dir, 'train')
test_dir = os.path.join(root_dir, 'val')

# Create a dictionary mapping label indices to class names
classes = {
    label_idx: class_name
    for label_idx, class_name in enumerate(sorted(os.listdir(train_dir)))
}

# Initialize datasets
X_train = []
y_train = []
X_test = []
y_test = []

# Loop through train and test directories to populate datasets
for dataset_path in [train_dir, test_dir]:
    for label_idx, class_name in classes.items():
        class_dir = os.path.join(dataset_path, class_name)  # Directory for each class
        for img_filename in os.listdir(class_dir):
            img_path = os.path.join(class_dir, img_filename)  # Full path to the image
            if 'train' in dataset_path:
                X_train.append(img_path)  # Append image path to training data
                y_train.append(label_idx)  # Append label index to training labels
            else:
                X_test.append(img_path)  # Append image path to test data
                y_test.append(label_idx)  # Append label index to test labels

In [5]:
# Define split parameters
seed = 0           # Random seed for reproducibility
val_size = 0.2     # Proportion of the training data to use for validation
is_shuffle = True  # Whether to shuffle the data before splitting

# Perform the train-validation split
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train,       # Input data and labels
    test_size=val_size,     # Size of the validation set
    random_state=seed,      # Seed for reproducibility
    shuffle=is_shuffle      # Shuffle data before splitting
)

In [6]:
class ScenesDataset(Dataset):
    """
    Custom Dataset for loading scene classification data.
    """
    def __init__(self, X, y, transform=None):
        """
        Initialize the dataset.

        Args:
            X (list): List of image file paths.
            y (list): List of labels corresponding to the images.
            transform (callable, optional): Transformations to apply to the images.
        """
        self.img_paths = X
        self.labels = y
        self.transform = transform

    def __len__(self):
        """
        Returns the total number of samples.
        """
        return len(self.img_paths)

    def __getitem__(self, idx):
        """
        Retrieve an image and its label by index.

        Args:
            idx (int): Index of the sample to retrieve.

        Returns:
            tuple: (transformed image, label)
        """
        # Load image and convert to RGB
        img_path = self.img_paths[idx]
        img = Image.open(img_path).convert("RGB")

        # Apply transformations if any
        if self.transform:
            img = self.transform(img)

        # Return the image and its corresponding label
        return img, self.labels[idx]

In [7]:
def transform(img, img_size=(224, 224)):
    """
    Transforms an input image to a tensor suitable for deep learning models.

    Args:
        img (PIL.Image.Image): Input image.
        img_size (tuple): Target size for the image (width, height).

    Returns:
        torch.Tensor: Normalized and resized image tensor.
    """
    # Resize the image to the specified dimensions
    img = img.resize(img_size)

    # Convert the image to a NumPy array and retain only the RGB channels
    img = np.array(img)[..., :3]

    # Convert the NumPy array to a PyTorch tensor and rearrange dimensions
    # Channels are moved to the first position: (H, W, C) -> (C, H, W)
    img = torch.tensor(img).permute(2, 0, 1).float()

    # Normalize the image to [0, 1] range
    normalized_img = img / 255.0

    return normalized_img

In [8]:
# Create the training dataset
train_dataset = ScenesDataset(
    X_train, y_train,
    transform=transform  # Apply the transform function to the training images
)

# Create the validation dataset
val_dataset = ScenesDataset(
    X_val, y_val,
    transform=transform  # Apply the transform function to the validation images
)

# Create the test dataset
test_dataset = ScenesDataset(
    X_test, y_test,
    transform=transform  # Apply the transform function to the test images
)

In [9]:
from torch.utils.data import DataLoader

# Batch sizes
train_batch_size = 64  # Batch size for training
test_batch_size = 8    # Batch size for validation and testing

# DataLoader for training dataset
train_loader = DataLoader(
    train_dataset,
    batch_size=train_batch_size,
    shuffle=True  # Shuffle the training data for better generalization
)

# DataLoader for validation dataset
val_loader = DataLoader(
    val_dataset,
    batch_size=test_batch_size,
    shuffle=False  # No need to shuffle validation data
)

# DataLoader for test dataset
test_loader = DataLoader(
    test_dataset,
    batch_size=test_batch_size,
    shuffle=False  # No need to shuffle test data
)

In [10]:
class BottleneckBlock(nn.Module):
    """
    A bottleneck block for DenseNet.
    Applies two convolutional layers with BatchNorm and ReLU, followed by concatenation.
    """
    def __init__(self, in_channels, growth_rate):
        super(BottleneckBlock, self).__init__()
        # First BatchNorm and 1x1 convolution
        self.bn1 = nn.BatchNorm2d(in_channels)
        self.conv1 = nn.Conv2d(
            in_channels,
            4 * growth_rate,
            kernel_size=1,
            bias=False
        )

        # Second BatchNorm and 3x3 convolution
        self.bn2 = nn.BatchNorm2d(4 * growth_rate)
        self.conv2 = nn.Conv2d(
            4 * growth_rate,
            growth_rate,
            kernel_size=3,
            padding=1,
            bias=False
        )

        # Activation function
        self.relu = nn.ReLU()

    def forward(self, x):
        """
        Forward pass for the bottleneck block.
        Args:
            x (torch.Tensor): Input tensor.
        Returns:
            torch.Tensor: Output tensor with concatenated features.
        """
        res = x.clone().detach()  # Preserve input for concatenation
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv1(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = torch.cat([res, x], dim=1)  # Concatenate along the channel dimension
        return x

class DenseBlock(nn.Module):
    """
    A dense block containing multiple bottleneck blocks.
    Each layer in the block receives input from all previous layers.
    """
    def __init__(self, num_layers, in_channels, growth_rate):
        super(DenseBlock, self).__init__()
        # Add BottleneckBlock layers
        layers = [
            BottleneckBlock(in_channels + i * growth_rate, growth_rate)
            for i in range(num_layers)
        ]
        self.block = nn.Sequential(*layers)

    def forward(self, x):
        """
        Forward pass for the dense block.
        Args:
            x (torch.Tensor): Input tensor.
        Returns:
            torch.Tensor: Output tensor with features from all layers concatenated.
        """
        return self.block(x)

In [11]:
class DenseNet(nn.Module):
    """
    DenseNet architecture implementation.
    Combines DenseBlocks with transition layers for efficient feature reuse.
    """
    def __init__(self, num_blocks, growth_rate, num_classes):
        """
        Initialize the DenseNet model.

        Args:
            num_blocks (list): Number of layers in each dense block.
            growth_rate (int): Growth rate for the number of channels.
            num_classes (int): Number of output classes.
        """
        super(DenseNet, self).__init__()

        # Initial convolution layer
        self.conv1 = nn.Conv2d(
            in_channels=3,
            out_channels=2 * growth_rate,
            kernel_size=7,
            padding=3,
            stride=2,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(2 * growth_rate)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Dense blocks and transition layers
        self.dense_blocks = nn.ModuleList()
        in_channels = 2 * growth_rate

        for i, num_layers in enumerate(num_blocks):
            # Add a dense block
            self.dense_blocks.append(DenseBlock(num_layers, in_channels, growth_rate))
            in_channels += num_layers * growth_rate  # Update channel count

            # Add a transition layer if not the last block
            if i != len(num_blocks) - 1:
                out_channels = in_channels // 2
                self.dense_blocks.append(nn.Sequential(
                    nn.BatchNorm2d(in_channels),
                    nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
                    nn.AvgPool2d(kernel_size=2, stride=2)
                ))
                in_channels = out_channels  # Update channel count

        # Final layers
        self.bn2 = nn.BatchNorm2d(in_channels)
        self.pool2 = nn.AvgPool2d(kernel_size=7)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(in_channels, num_classes)

    def forward(self, x):
        """
        Forward pass through the DenseNet model.

        Args:
            x (torch.Tensor): Input tensor.
        Returns:
            torch.Tensor: Output tensor after passing through DenseNet.
        """
        # Initial convolution
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.pool1(x)

        # Dense blocks and transition layers
        for block in self.dense_blocks:
            x = block(x)

        # Final batch normalization and pooling
        x = self.bn2(x)
        x = self.relu(x)
        x = self.pool2(x)

        # Flatten and fully connected layer
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [12]:
# Determine the number of classes
n_classes = len(list(classes.keys()))

# Select the device: CUDA if available, otherwise CPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Initialize the DenseNet model
model = DenseNet(
    [6, 12, 24, 16],  # Number of layers in each dense block
    growth_rate=32,   # Growth rate for feature maps
    num_classes=n_classes  # Number of output classes
).to(device)  # Move the model to the selected device

In [13]:
lr = 1e-2
epochs = 15

criteration = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [14]:
def fit(model, train_loader, val_loader, optimizer, criterion, epochs, device):
    """
    Trains the given model using the provided data and training parameters.

    Args:
        model: The neural network model to be trained.
        train_loader: DataLoader for the training dataset.
        val_loader: DataLoader for the validation dataset.
        optimizer: Optimization algorithm used for updating model weights.
        criterion: Loss function to calculate the training error.
        epochs: Number of training iterations over the entire dataset.
        device: Device (CPU or GPU) to perform training on.

    Returns:
        Tuple[List[float], List[float]]: Lists containing training and validation losses
                                         across epochs.
    """
    train_losses = []
    val_losses = []

    # Training loop
    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        epoch_loss = 0

        # Iterate over batches in the training dataset
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()  # Reset gradients
            loss.backward()        # Calculate gradients
            optimizer.step()       # Update model weights

            epoch_loss += loss.item() * images.size(0)  # Accumulate batch loss

        # Calculate average training loss for the epoch
        epoch_loss /= len(train_loader.dataset)
        train_losses.append(epoch_loss)

        # Evaluate on the validation dataset
        val_loss = evaluate(model, val_loader, criterion, device)[0]
        val_losses.append(val_loss)

        # Print epoch summary
        print(f'Epoch {epoch + 1}/{epochs}, '
              f'Training Loss: {epoch_loss:.4f}, '
              f'Validation Loss: {val_loss:.4f}')

    return train_losses, val_losses

def evaluate(model, data_loader, criterion, device):
    """
    Evaluates the model on the given dataset.

    Args:
        model: The neural network model to evaluate.
        data_loader: DataLoader for the evaluation dataset.
        criterion: Loss function to calculate the evaluation error.
        device: Device (CPU or GPU) to perform evaluation on.

    Returns:
        Tuple[float, float]: Average loss and accuracy on the dataset.
    """
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    correct = 0
    total = 0

    # No need to calculate gradients during evaluation
    with torch.no_grad():
        # Iterate over batches in the dataset
        for images, labels in data_loader:
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            total_loss += loss.item() * images.size(0)  # Accumulate batch loss

            # Calculate predictions and accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    # Calculate average loss and accuracy
    avg_loss = total_loss / len(data_loader.dataset)
    accuracy = correct / total

    return avg_loss, accuracy

In [15]:
train_losses, val_losses = fit(model, train_loader, val_loader, optimizer, criteration, epochs, device)

Epoch 1/15, Training Loss: 1.4434, Validation Loss: 1.2657
Epoch 2/15, Training Loss: 1.2100, Validation Loss: 1.1482
Epoch 3/15, Training Loss: 1.1276, Validation Loss: 1.0951
Epoch 4/15, Training Loss: 1.0720, Validation Loss: 1.0341
Epoch 5/15, Training Loss: 1.0205, Validation Loss: 0.9810
Epoch 6/15, Training Loss: 0.9743, Validation Loss: 0.9365
Epoch 7/15, Training Loss: 0.9335, Validation Loss: 0.8877
Epoch 8/15, Training Loss: 0.8882, Validation Loss: 0.8493
Epoch 9/15, Training Loss: 0.8524, Validation Loss: 0.8183
Epoch 10/15, Training Loss: 0.8240, Validation Loss: 0.8068
Epoch 11/15, Training Loss: 0.7868, Validation Loss: 0.7810
Epoch 12/15, Training Loss: 0.7576, Validation Loss: 0.7439
Epoch 13/15, Training Loss: 0.7301, Validation Loss: 0.7222
Epoch 14/15, Training Loss: 0.6906, Validation Loss: 0.6791
Epoch 15/15, Training Loss: 0.6633, Validation Loss: 0.6657


In [17]:
# Evaluate the model on the validation dataset
val_loss, val_acc = evaluate(
    model,        # Trained model
    val_loader,   # Validation DataLoader
    criteration,    # Loss function # Changed from criterion to criteration
    device        # Device (CPU or GPU)
)

# Evaluate the model on the test dataset
test_loss, test_acc = evaluate(
    model,        # Trained model
    test_loader,  # Test DataLoader
    criteration,    # Loss function # Changed from criterion to criteration
    device        # Device (CPU or GPU)
)

# Print the evaluation results
print('Evaluation on val/test dataset:')
print('Validation Accuracy:', val_acc)
print('Test Accuracy:', test_acc)

Evaluation on val/test dataset:
Validation Accuracy: 0.7623797648735304
Test Accuracy: 0.759
