In [1]:
"""
Imports for building a deep learning model using PyTorch, Torchvision, and other essential libraries.

- PyTorch: Provides deep learning functionality, including neural networks, optimization, and custom datasets.
- Torchvision: Contains utilities for vision-based tasks, including datasets and image transformations.
- Image handling and visualization: Libraries for handling and displaying images, including OpenCV.
- Data manipulation: Libraries for handling data, file processing, and randomization.
"""

# PyTorch core imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader, Dataset, ConcatDataset, SubsetRandomSampler  # Dataset for custom dataset creation
from torchvision.models import ResNet18_Weights

# CUDA
from torch.cuda import amp

# Torchvision imports for vision-based tasks
import torchvision
from torchvision import transforms, models, datasets

# Image handling and visualization
import matplotlib.pyplot as plt
from PIL import Image
import cv2  # OpenCV for image processing
from tqdm import tqdm # For progress bars 
from sklearn.metrics import classification_report

# Data manipulation and file handling
import numpy as np
import pandas as pd
from collections import Counter  # Counting utility for analyzing data
import glob  # File path handling
import os  # Operating system interface for directory management

# Randomization
from random import shuffle, seed  # Random shuffling and seeding for reproducibility

# Optimization
from torch.optim import lr_scheduler

# Tensorboard
from torch.utils.tensorboard import SummaryWriter

In [2]:
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [3]:
torch.cuda.empty_cache()  # Free unused memory

In [4]:
# Directories
TRAIN_DIR = './data/data_256'
TEST_DIR = './data/test_256'
VAL_DIR = './data/val_256'

In [5]:
def count_files_in_dir(directory):
    '''
    Counts the number of files in each subdirectory of the given directory,
    returning a DataFrame with folder names, subfolder names, and their image counts.

    Type 'quit' to exit, or 'default' to display all folders.

    Args:
        directory (str): The path to the main directory containing subfolders.

    Returns:
        df (pd.DataFrame): DataFrame containing folder names, subfolder names, and image counts.
    '''
    data = []

    # Iterate through subdirectories (folders)
    for folder in os.listdir(directory):
        folder_path = os.path.join(directory, folder)

        # Check if it's a directory (i.e., a folder)
        if os.path.isdir(folder_path):
            # Iterate through subdirectories (subfolders)
            for subdir in os.listdir(folder_path):
                subdir_path = os.path.join(folder_path, subdir)

                # Check if it's a directory (i.e., a subfolder)
                if os.path.isdir(subdir_path):
                    # Count number of files in the subdirectory
                    file_count = len(os.listdir(subdir_path))
                    data.append({'Folder': folder, 'Subfolder': subdir, 'Image Count': file_count})

    # Create a DataFrame from the collected data
    df = pd.DataFrame(data)

    user_input = input("Enter your choice: ").strip().lower()

    if user_input == 'quit':
        print("Exiting the program.")
        return None  # or you can raise an exception or return a specific value if needed
    elif user_input == 'default':
        pd.set_option('display.max_rows', None)  # Show all rows in DataFrame
        return df
    elif user_input.isalpha() and len(user_input) == 1:
        # Display the selected folder's subfolders
        filtered_df = df[df['Folder'].str.lower() == user_input]  # Filter by folder
        if not filtered_df.empty:
            pd.set_option('display.max_rows', None)  # Show all rows in DataFrame
            return filtered_df
        else:
            print(f"No subfolders found for folder: {user_input}")
            return None  # or handle this case as needed
    else:
        print("Invalid input. Please enter a valid folder letter or 'quit'.")
        return None  # or handle this case as needed

In [6]:
# Count files in train and test directories
train_class_counts = count_files_in_dir(TRAIN_DIR)
train_class_counts

Exiting the program.


In [7]:
class CustomDataset(Dataset):
    def __init__(self, root_dir, transform=None, labeled=False):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []
        self.labeled = labeled
        
        # Load all images and labels if available
        for root, _, filenames in os.walk(root_dir):
            for filename in filenames:
                if filename.endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(root, filename)
                    self.images.append(img_path)
                    if labeled:
                        # Assuming label is the parent directory name
                        label = os.path.basename(os.path.dirname(img_path))
                        self.labels.append(label)
        
    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        if self.labeled:
            label = self.labels[idx]
            return image, label
        else:
            return image
        
    def __len__(self):
        # Return the total number of samples in the dataset
        return len(self.images)

def load_preprocess_resnet18(train_dir=TRAIN_DIR, val_dir=VAL_DIR, test_dir=TEST_DIR, batch_size=64):
    '''
    Load and preprocess the data for ResNet50 by iterating through directories
    a-z in the train_dir and applying ImageFolder to each one. Also loads 
    validation and test data.

    Args:
        train_dir (str): Path to the training data directory.
        val_dir (str): Path to the validation data directory.
        test_dir (str): Path to the test data directory.
        batch_size (int): The batch size for the DataLoader.

    Returns:
        tuple: DataLoader for training, validation, and test datasets, and list of class names.
    '''

    # Define the transformations for ResNet50
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize images to 224x224
        transforms.ToTensor(),  # Convert images to PyTorch tensors
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet stats
    ])

    def create_dataset_from_folders(parent_dir):
        """
        Iterates through directories (a-z) at the same level in parent_dir 
        and applies ImageFolder to each directory.
        """
        datasets_list = []
        class_names = set()  # Use a set to avoid duplicates
        for folder in os.listdir(parent_dir):  # Loop through folders a-z
            folder_path = os.path.join(parent_dir, folder)

            if os.path.isdir(folder_path):
                # Apply ImageFolder on each folder (a, b, c, etc.)
                folder_dataset = datasets.ImageFolder(root=folder_path, transform=transform)
                datasets_list.append(folder_dataset)
                # Collect class names from this folder's dataset
                class_names.update(folder_dataset.classes)

        return ConcatDataset(datasets_list), sorted(list(class_names))  # Concatenate all folder datasets and return class names

    # Create the training dataset and get the unique class names
    train_dataset, class_names = create_dataset_from_folders(train_dir)

    # Create DataLoader for the training dataset
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # Create DataLoader for the validation dataset using CustomDataset
    val_dataset = CustomDataset(root_dir=val_dir, transform=transform)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Create DataLoader for the test dataset using CustomDataset
    test_dataset = CustomDataset(root_dir=test_dir, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader, class_names

In [8]:
def print_loader_samples(loader, loader_name, num_batches=1):
    print(f"\n{loader_name}:")
    for i, data in enumerate(loader):
        print(f"Batch {i+1}:")
        if isinstance(data, tuple) or isinstance(data, list):
            images, labels = data
            print(f"Images shape: {images.shape}")
            print(f"Labels: {labels}")
        else:
            images = data
            print(f"Images shape: {images.shape}")
            print("Labels: None")
        if i+1 == num_batches:
            break

# Assuming you have already loaded your data loaders
train_loader, val_loader, test_loader, class_names = load_preprocess_resnet18()

# Print the class names
print("Class Names:", class_names)

# Print samples from each loader
print_loader_samples(train_loader, "Train Loader", num_batches=1)
print_loader_samples(val_loader, "Validation Loader", num_batches=1)
print_loader_samples(test_loader, "Test Loader", num_batches=1)

Class Names: ['airfield', 'airplane_cabin', 'airport_terminal', 'alcove', 'alley', 'amphitheater', 'amusement_arcade', 'amusement_park', 'apartment_building', 'aquarium', 'aqueduct', 'arcade', 'arch', 'archaelogical_excavation', 'archive', 'arena', 'army_base', 'art_gallery', 'art_school', 'art_studio', 'artists_loft', 'assembly_line', 'athletic_field', 'atrium', 'attic', 'auditorium', 'auto_factory', 'auto_showroom', 'badlands', 'bakery', 'balcony', 'ball_pit', 'ballroom', 'bamboo_forest', 'bank_vault', 'banquet_hall', 'bar', 'barn', 'barndoor', 'baseball_field', 'basement', 'basketball_court', 'bathroom', 'bazaar', 'beach', 'beach_house', 'beauty_salon', 'bedchamber', 'bedroom', 'beer_garden', 'beer_hall', 'berth', 'biology_laboratory', 'boardwalk', 'boat_deck', 'boathouse', 'bookstore', 'booth', 'botanical_garden', 'bow_window', 'bowling_alley', 'boxing_ring', 'bridge', 'building_facade', 'bullring', 'burial_chamber', 'bus_interior', 'bus_station', 'butchers_shop', 'butte', 'cabin',

In [9]:
class ResNet18_CNN(nn.Module):
    def __init__(self, num_classes=343, no=128, kernel_size=1, freeze_resnet=True):
        super(ResNet18_CNN, self).__init__()
        
        # Load the pre-trained ResNet18 model with default weights
        self.resnet18 = models.resnet18(weights=ResNet18_Weights.DEFAULT)
        
        if freeze_resnet:
            # Freeze all ResNet18 layers
            for param in self.resnet18.parameters():
                param.requires_grad = False
        
        # Unfreeze the last block (layer4) for fine-tuning
        for param in self.resnet18.layer4.parameters():
            param.requires_grad = True
        
        # Remove the original fully connected layer and the average pool
        self.features = nn.Sequential(*list(self.resnet18.children())[:-2])  # Output: [batch, 512, 7, 7]
        
        # Add custom convolutional layer
        self.conv = nn.Conv2d(in_channels=512, out_channels=no, kernel_size=kernel_size, padding=kernel_size//2)
        self.relu = nn.ReLU()
        
        # Add Dropout for regularization
        self.dropout = nn.Dropout(p=0.5)
        
        # Add global average pooling and a fully connected layer
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(no, num_classes)
    
    def forward(self, x):
        x = self.features(x)          # [batch, 512, 7, 7]
        x = self.conv(x)              # [batch, no, 7, 7]
        x = self.relu(x)
        x = self.dropout(x)           # Apply Dropout
        x = self.avgpool(x)           # [batch, no, 1, 1]
        x = torch.flatten(x, 1)       # [batch, no]
        x = self.fc(x)                # [batch, num_classes]
        return x

In [10]:
def train_val_cnn(num_epochs=20, learning_rate=0.001, patience=5, max_batches_per_epoch=1000):
    model = ResNet18_CNN()
    model = model.to(device)

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)
    scaler = amp.GradScaler()

    writer = SummaryWriter('runs/resnet18_experiment')

    best_val_accuracy = 0.0
    best_model_state = None
    epochs_no_improve = 0

    num_train_samples = len(train_loader.dataset)
    subset_size = max_batches_per_epoch * train_loader.batch_size  # e.g., 1000 * 128 = 128,000

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        # Generate a random subset of indices for this epoch
        indices = np.random.choice(num_train_samples, subset_size, replace=False)
        sampler = SubsetRandomSampler(indices)
        train_subset_loader = DataLoader(train_loader.dataset, batch_size=train_loader.batch_size, sampler=sampler, num_workers=4, pin_memory=True)

        progress_bar = tqdm(enumerate(train_subset_loader), total=max_batches_per_epoch, desc=f"Epoch {epoch+1}/{num_epochs} - Training")

        for batch_idx, (inputs, targets) in progress_bar:
            if batch_idx >= max_batches_per_epoch:
                break  # This condition is redundant here but kept for safety

            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()

            with amp.autocast():
                outputs = model(inputs)
                loss = criterion(outputs, targets)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

            avg_loss = running_loss / (batch_idx + 1)
            avg_accuracy = 100 * correct / total

            progress_bar.set_postfix(loss=f"{avg_loss:.4f}", acc=f"{avg_accuracy:.2f}%")

        avg_train_loss = running_loss / max_batches_per_epoch
        train_accuracy = 100 * correct / total

        # Validation phase
        model.eval()
        running_val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            num_val_batches = len(val_loader)
            val_progress_bar = tqdm(enumerate(val_loader), total=num_val_batches, desc=f"Epoch {epoch+1}/{num_epochs} - Validation")
            for batch_idx, (inputs, targets) in val_progress_bar:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                val_loss = criterion(outputs, targets)

                running_val_loss += val_loss.item()
                _, val_predicted = torch.max(outputs.data, 1)
                val_total += targets.size(0)
                val_correct += (val_predicted == targets).sum().item()

                avg_val_loss = running_val_loss / (batch_idx + 1)
                avg_val_accuracy = 100 * val_correct / val_total

                val_progress_bar.set_postfix(val_loss=f"{avg_val_loss:.4f}", val_acc=f"{avg_val_accuracy:.2f}%")

        avg_val_loss = running_val_loss / len(val_loader)
        val_accuracy = 100 * val_correct / val_total

        # Step the scheduler
        scheduler.step(avg_val_loss)

        # Check for improvement
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            best_model_state = model.state_dict()
            torch.save(best_model_state, 'best_model.pth')  # Save the best model
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        # Log to TensorBoard
        writer.add_scalar('Loss/Train', avg_train_loss, epoch)
        writer.add_scalar('Loss/Validation', avg_val_loss, epoch)
        writer.add_scalar('Accuracy/Train', train_accuracy, epoch)
        writer.add_scalar('Accuracy/Validation', val_accuracy, epoch)

        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {avg_train_loss:.4f}, '
              f'Train Accuracy: {train_accuracy:.2f}%, '
              f'Val Loss: {avg_val_loss:.4f}, '
              f'Val Accuracy: {val_accuracy:.2f}%')

        # Early stopping
        if epochs_no_improve >= patience:
            print(f'Early stopping triggered after {epoch+1} epochs.')
            break

    writer.close()
    print("Training complete. Best Validation Accuracy: {:.2f}%".format(best_val_accuracy))

    # Load the best model
    model.load_state_dict(torch.load('best_model.pth'))
    return model

def test_model(model, test_loader, criterion, device, class_names):
    model.eval()
    running_test_loss = 0.0
    test_correct = 0
    test_total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        num_test_batches = len(test_loader)
        test_progress_bar = tqdm(enumerate(test_loader), total=num_test_batches, desc="Testing")
        for batch_idx, (inputs, targets) in test_progress_bar:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            test_loss = criterion(outputs, targets)

            running_test_loss += test_loss.item()
            _, test_predicted = torch.max(outputs.data, 1)
            test_total += targets.size(0)
            test_correct += (test_predicted == targets).sum().item()
            all_preds.extend(test_predicted.cpu().numpy())
            all_labels.extend(targets.cpu().numpy())

            avg_test_loss = running_test_loss / (batch_idx + 1)
            avg_test_accuracy = 100 * test_correct / test_total

            test_progress_bar.set_postfix(test_loss=f"{avg_test_loss:.4f}", test_acc=f"{avg_test_accuracy:.2f}%")

    avg_test_loss = running_test_loss / len(test_loader)
    test_accuracy = 100 * test_correct / test_total

    print(f'Test Loss: {avg_test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')
    print("Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=class_names))

# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load data loaders with batch_size=128
train_loader, val_loader, test_loader, class_names = load_preprocess_resnet18(batch_size=128)

# Print the class names
print("Class Names:", class_names)

# Train and validate the model with limited batches per epoch
trained_model = train_val_cnn(num_epochs=20, learning_rate=0.001, patience=5, max_batches_per_epoch=1000)

# Define criterion for testing
criterion = torch.nn.CrossEntropyLoss()

# Test the model
test_model(trained_model, test_loader, criterion, device, class_names)

Using device: cuda
Class Names: ['airfield', 'airplane_cabin', 'airport_terminal', 'alcove', 'alley', 'amphitheater', 'amusement_arcade', 'amusement_park', 'apartment_building', 'aquarium', 'aqueduct', 'arcade', 'arch', 'archaelogical_excavation', 'archive', 'arena', 'army_base', 'art_gallery', 'art_school', 'art_studio', 'artists_loft', 'assembly_line', 'athletic_field', 'atrium', 'attic', 'auditorium', 'auto_factory', 'auto_showroom', 'badlands', 'bakery', 'balcony', 'ball_pit', 'ballroom', 'bamboo_forest', 'bank_vault', 'banquet_hall', 'bar', 'barn', 'barndoor', 'baseball_field', 'basement', 'basketball_court', 'bathroom', 'bazaar', 'beach', 'beach_house', 'beauty_salon', 'bedchamber', 'bedroom', 'beer_garden', 'beer_hall', 'berth', 'biology_laboratory', 'boardwalk', 'boat_deck', 'boathouse', 'bookstore', 'booth', 'botanical_garden', 'bow_window', 'bowling_alley', 'boxing_ring', 'bridge', 'building_facade', 'bullring', 'burial_chamber', 'bus_interior', 'bus_station', 'butchers_shop'

  return t.to(
Epoch 1/20 - Training: 100%|██████████| 1000/1000 [03:16<00:00,  5.09it/s, acc=25.01%, loss=2.6211]
Epoch 1/20 - Validation:   0%|          | 0/286 [00:00<?, ?it/s]


ValueError: too many values to unpack (expected 2)