In [1]:
import os
import random
import numpy as np
# Employ PIL for effective image processing capabilities
from PIL import Image
# Integrate tqdm to include progress bars for enhanced visibility in loops
from tqdm import tqdm
# Utilize Matplotlib and seaborn for comprehensive plotting capabilities
import matplotlib.pyplot as plt
# PyTorch for constructing and training neural networks
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
# Leverage torchvision for seamless management of image data and utilization of pretrained models
from torchvision import transforms
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from torchvision.models import resnet50, resnet152, ResNet50_Weights, ResNet152_Weights

## Initialization and Data Loading

In [2]:
# Function to set up the computation device
def initialize_device():
    computation_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Computation Device: {computation_device}")
    return computation_device

# Function to load dataset
def load_dataset(data_file, label_file):
    data = np.load(data_file)
    label_set = np.load(label_file)
    image_count = data.shape[1]
    reshaped_data = data.reshape(image_count, 3, 300, 300).transpose(3, 0, 1, 2)
    # return reshaped_data, label_set
    return data, label_set

# Initialize device
device = initialize_device()

# Load data and labels
train_data_file = 'data_train.npy'
train_label_file = 'labels_train.npy'
train_data, train_labels = load_dataset(train_data_file, train_label_file)


Computation Device: cuda


In [3]:
train_data.shape

(270000, 8443)

## Transformations and Dataset

In [4]:
# Function to define transformation pipeline
def create_transformations():
    image_transformations = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.1, contrast=0.1),
        transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    return image_transformations

# Custom dataset class
class ImageDataset(Dataset):
    def __init__(self, images, labels, transformations=None):
        self.images = images
        self.labels = labels
        self.transformations = transformations

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        image_data = self.images[:, index].reshape(300, 300, 3)
        pil_image = Image.fromarray(image_data)
        if self.transformations:
            transformed_image = self.transformations(pil_image)
        return transformed_image, self.labels[index]

# Create transformations
data_transformations = create_transformations()


## Dataset Splitting and Data Loaders

In [5]:
# Function to split dataset
def split_dataset(dataset, train_split=0.7, validate_split=0.15):
    training_size = int(train_split * len(dataset))
    remaining_size = len(dataset) - training_size
    validation_size = int(validate_split * remaining_size)
    test_size = remaining_size - validation_size

    training_dataset, remaining_dataset = random_split(dataset, [training_size, remaining_size])
    validation_dataset, test_dataset = random_split(remaining_dataset, [validation_size, test_size])
    return training_dataset, validation_dataset, test_dataset

# Function to create data loaders
def create_data_loaders(train_set, val_set, test_set, batch_size=128):
    loaders = {
        'train': DataLoader(train_set, batch_size=batch_size, shuffle=True),
        'validate': DataLoader(val_set, batch_size=batch_size),
        'test': DataLoader(test_set, batch_size=batch_size)
    }
    return loaders

# Function to modify ResNet152 model
def custom_resnet152(num_output_classes):
    custom_model = resnet152(weights=ResNet152_Weights.DEFAULT)
    features_in_last_layer = custom_model.fc.in_features
    custom_model.fc = nn.Linear(features_in_last_layer, num_output_classes)
    return custom_model

# Split the dataset
dataset = ImageDataset(train_data, train_labels, data_transformations)

train_set, val_set, test_set = split_dataset(dataset)

# Create data loaders
data_loaders = create_data_loaders(train_set, val_set, test_set)

# Modify the ResNet152 model
output_classes = 9
resnet_model = custom_resnet152(output_classes)
resnet_model.to(device)


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

## Model Modification and Functions

In [6]:
# Function to define model save path
def define_model_save_path(save_directory):
    os.makedirs(save_directory, exist_ok=True)
    return save_directory

# Function for predictions with a confidence threshold
def predictions_with_threshold(outputs, threshold=0.45):
    prob_scores = torch.nn.functional.softmax(outputs, dim=1)
    max_probs, pred_classes = torch.max(prob_scores, dim=1)
    pred_classes[max_probs < threshold] = -1  # -1 for low confidence predictions
    return pred_classes

# Define the path where the model will be saved
model_save_path = define_model_save_path('model/')

# Define loss function and optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet_model.parameters(), lr=0.0001, weight_decay=1e-05)


##  Training Loop and Model Evaluation

In [7]:
# Function for a single training epoch
def perform_training_epoch(epoch, model, train_loader, device, optimizer, loss_function):
    model.train()  # Set model to training mode
    total_loss, total_correct, total_samples = 0.0, 0, 0
    
    for batch_images, batch_labels in tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs} [Training]"):
        batch_images, batch_labels = batch_images.to(device, dtype=torch.float), batch_labels.to(device, dtype=torch.long)
        predictions = model(batch_images)
        loss = loss_function(predictions, batch_labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted_labels = torch.max(predictions.data, 1)
        total_samples += batch_labels.size(0)
        total_correct += (predicted_labels == batch_labels).sum().item()

    return total_loss / len(train_loader), 100 * total_correct / total_samples

# Function for a single validation epoch
def perform_validation_epoch(epoch, model, val_loader, device, loss_function):
    model.eval()  # Set model to evaluation mode
    total_loss, total_correct, total_samples, total_unknown = 0.0, 0, 0, 0
    all_labels, all_probabilities = [], []
    
    with torch.no_grad():
        for batch_images, batch_labels in tqdm(val_loader, desc=f"Epoch {epoch}/{num_epochs} [Validation]"):
            batch_images, batch_labels = batch_images.to(device, dtype=torch.float), batch_labels.to(device, dtype=torch.long)
            predictions = model(batch_images)
            loss = loss_function(predictions, batch_labels)

            total_loss += loss.item()
            probabilities = torch.nn.functional.softmax(predictions, dim=1)

            all_labels.extend(batch_labels.cpu().numpy())
            all_probabilities.extend(probabilities.cpu().numpy())
            
            predicted_labels = predictions_with_threshold(predictions, threshold=0.7)
            total_unknown += (predicted_labels == -1).sum().item()

            known_predictions = predicted_labels[predicted_labels != -1]
            known_labels = batch_labels[predicted_labels != -1]
            total_samples += known_labels.size(0)
            total_correct += (known_predictions == known_labels).sum().item()

    accuracy = 100 * total_correct / total_samples if total_samples > 0 else 0
    return total_loss / len(val_loader), accuracy, total_unknown, all_labels, all_probabilities

# Main training loop
best_accuracy = 0
num_epochs = 20  
for epoch in range(1, num_epochs + 1):
    train_loss, train_accuracy = perform_training_epoch(epoch, resnet_model, data_loaders['train'], device, optimizer, loss_function)
    val_loss, val_accuracy, val_unknown, saved_labels, saved_probabilities = perform_validation_epoch(epoch, resnet_model, data_loaders['validate'], device, loss_function)

    np.save("saved_labels.npy", np.array(saved_labels))
    np.save("saved_probabilities.npy", np.array(saved_probabilities))

    print(f"Epoch [{epoch}/{num_epochs}], Train Loss: {train_loss:.2f}, Train Acc: {train_accuracy:.2f}%, Val Loss: {val_loss:.2f}, Val Acc: {val_accuracy:.2f}%, Unknown Predictions: {val_unknown}")

    if val_accuracy > best_accuracy:
        torch.save(resnet_model.state_dict(), os.path.join(model_save_path, 'resnet_model_final.pth'))
        print(f"Model saved for epoch {epoch}")
        best_accuracy = val_accuracy



  return F.conv2d(input, weight, bias, self.stride,
Epoch 1/20 [Training]: 100%|██████████| 47/47 [00:48<00:00,  1.03s/it]
Epoch 1/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.25it/s]


Epoch [1/20], Train Loss: 1.03, Train Acc: 70.46%, Val Loss: 0.21, Val Acc: 96.07%, Unknown Predictions: 23
Model saved for epoch 1


Epoch 2/20 [Training]: 100%|██████████| 47/47 [00:47<00:00,  1.01s/it]
Epoch 2/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.30it/s]


Epoch [2/20], Train Loss: 0.09, Train Acc: 97.31%, Val Loss: 0.12, Val Acc: 98.36%, Unknown Predictions: 13
Model saved for epoch 2


Epoch 3/20 [Training]: 100%|██████████| 47/47 [00:49<00:00,  1.05s/it]
Epoch 3/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.29it/s]


Epoch [3/20], Train Loss: 0.05, Train Acc: 98.54%, Val Loss: 0.10, Val Acc: 98.11%, Unknown Predictions: 9


Epoch 4/20 [Training]: 100%|██████████| 47/47 [00:49<00:00,  1.05s/it]
Epoch 4/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.27it/s]


Epoch [4/20], Train Loss: 0.04, Train Acc: 98.97%, Val Loss: 0.09, Val Acc: 97.86%, Unknown Predictions: 5


Epoch 5/20 [Training]: 100%|██████████| 47/47 [00:44<00:00,  1.05it/s]
Epoch 5/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.36it/s]


Epoch [5/20], Train Loss: 0.01, Train Acc: 99.61%, Val Loss: 0.08, Val Acc: 98.13%, Unknown Predictions: 4


Epoch 6/20 [Training]: 100%|██████████| 47/47 [00:45<00:00,  1.03it/s]
Epoch 6/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.38it/s]


Epoch [6/20], Train Loss: 0.02, Train Acc: 99.54%, Val Loss: 0.06, Val Acc: 98.93%, Unknown Predictions: 5
Model saved for epoch 6


Epoch 7/20 [Training]: 100%|██████████| 47/47 [00:50<00:00,  1.07s/it]
Epoch 7/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.49it/s]


Epoch [7/20], Train Loss: 0.01, Train Acc: 99.58%, Val Loss: 0.07, Val Acc: 98.14%, Unknown Predictions: 3


Epoch 8/20 [Training]: 100%|██████████| 47/47 [00:51<00:00,  1.09s/it]
Epoch 8/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.34it/s]


Epoch [8/20], Train Loss: 0.02, Train Acc: 99.42%, Val Loss: 0.13, Val Acc: 96.78%, Unknown Predictions: 6


Epoch 9/20 [Training]: 100%|██████████| 47/47 [00:53<00:00,  1.14s/it]
Epoch 9/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.28it/s]


Epoch [9/20], Train Loss: 0.01, Train Acc: 99.63%, Val Loss: 0.09, Val Acc: 98.40%, Unknown Predictions: 5


Epoch 10/20 [Training]: 100%|██████████| 47/47 [00:47<00:00,  1.00s/it]
Epoch 10/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.27it/s]


Epoch [10/20], Train Loss: 0.01, Train Acc: 99.68%, Val Loss: 0.08, Val Acc: 98.40%, Unknown Predictions: 4


Epoch 11/20 [Training]: 100%|██████████| 47/47 [00:44<00:00,  1.05it/s]
Epoch 11/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.38it/s]


Epoch [11/20], Train Loss: 0.01, Train Acc: 99.71%, Val Loss: 0.11, Val Acc: 97.86%, Unknown Predictions: 5


Epoch 12/20 [Training]: 100%|██████████| 47/47 [00:45<00:00,  1.04it/s]
Epoch 12/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.32it/s]


Epoch [12/20], Train Loss: 0.01, Train Acc: 99.66%, Val Loss: 0.08, Val Acc: 98.13%, Unknown Predictions: 5


Epoch 13/20 [Training]: 100%|██████████| 47/47 [00:46<00:00,  1.01it/s]
Epoch 13/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.29it/s]


Epoch [13/20], Train Loss: 0.00, Train Acc: 99.90%, Val Loss: 0.11, Val Acc: 97.35%, Unknown Predictions: 1


Epoch 14/20 [Training]: 100%|██████████| 47/47 [00:45<00:00,  1.03it/s]
Epoch 14/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.08it/s]


Epoch [14/20], Train Loss: 0.00, Train Acc: 99.95%, Val Loss: 0.09, Val Acc: 98.39%, Unknown Predictions: 6


Epoch 15/20 [Training]: 100%|██████████| 47/47 [00:49<00:00,  1.05s/it]
Epoch 15/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.37it/s]


Epoch [15/20], Train Loss: 0.01, Train Acc: 99.81%, Val Loss: 0.07, Val Acc: 98.67%, Unknown Predictions: 3


Epoch 16/20 [Training]: 100%|██████████| 47/47 [00:45<00:00,  1.04it/s]
Epoch 16/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.16it/s]


Epoch [16/20], Train Loss: 0.01, Train Acc: 99.76%, Val Loss: 0.06, Val Acc: 98.93%, Unknown Predictions: 4
Model saved for epoch 16


Epoch 17/20 [Training]: 100%|██████████| 47/47 [00:48<00:00,  1.04s/it]
Epoch 17/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.37it/s]


Epoch [17/20], Train Loss: 0.01, Train Acc: 99.75%, Val Loss: 0.10, Val Acc: 98.41%, Unknown Predictions: 2


Epoch 18/20 [Training]: 100%|██████████| 47/47 [00:43<00:00,  1.08it/s]
Epoch 18/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.31it/s]


Epoch [18/20], Train Loss: 0.01, Train Acc: 99.76%, Val Loss: 0.07, Val Acc: 98.92%, Unknown Predictions: 7


Epoch 19/20 [Training]: 100%|██████████| 47/47 [00:43<00:00,  1.08it/s]
Epoch 19/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.21it/s]


Epoch [19/20], Train Loss: 0.01, Train Acc: 99.76%, Val Loss: 0.09, Val Acc: 98.41%, Unknown Predictions: 1


Epoch 20/20 [Training]: 100%|██████████| 47/47 [00:45<00:00,  1.02it/s]
Epoch 20/20 [Validation]: 100%|██████████| 3/3 [00:02<00:00,  1.38it/s]

Epoch [20/20], Train Loss: 0.01, Train Acc: 99.54%, Val Loss: 0.10, Val Acc: 98.13%, Unknown Predictions: 5



