In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip /content/drive/MyDrive/cs444-final-project/project/data/small_data/train.zip


In [None]:
!unzip /content/drive/MyDrive/cs444-final-project/project/data/small_data/test.zip


In [4]:
MODEL_DIR = '/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/'

In [None]:
!pip install efficientnet-pytorch

In [None]:
!pip install timm

In [5]:
from torchvision import datasets, models, transforms
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class LandmarkImageDataset(Dataset):
    def __init__(self, annotations_file, transform=None, id_path_mapping="", is_pt=False):
        """
        Args:
            annotations_file (string): Path to the CSV file with annotations.
            img_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied on a sample.
            file_extension (string, optional): Extension of the image files in the directory.
        """
        self.img_labels = pd.read_csv(annotations_file)
        self.id_to_label = {str(row[0]): row[3] for row in self.img_labels.values}
        self.transform = transform
        self.id_path_mapping = pd.read_csv(id_path_mapping)
        self.length = len(self.id_path_mapping)
        self.is_pt = is_pt


    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        # Retrieve the image ID and path from the mapping
        image_id = self.id_path_mapping.iloc[idx]['id']
        image_path = self.id_path_mapping.iloc[idx]['path']

        label = self.id_to_label[str(image_id)]

        if self.is_pt:
            tensor = torch.load(image_path)
            if self.transform:
                tensor = self.transform(tensor)
            return tensor, label
        else:
            # Load the image
            image = Image.open(image_path)
            # Apply transform if any
            if self.transform:
                image = self.transform(image)
            return image, label


# Metrics

In [6]:
class RunningAverage:
    '''
      Computes and keeps track of the running average and the current value of metrics
    '''
    def __init__(self) -> None:
        self.val = 0.0
        self.avg = 0.0
        self.sum = 0.0
        self.count = 0

    def update(self, val: float, n: int = 1) -> None:
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def GAP(predicts: torch.Tensor, confs: torch.Tensor, targets: torch.Tensor) -> float:
    """
    Calculates the simplified Global Average Precision.
    https://www.kaggle.com/competitions/landmark-recognition-2020/overview/evaluation

    Args:
        predicts (torch.Tensor): Predicted labels of shape (n_samples,).
        confs (torch.Tensor): Confidence scores associated with predictions, of shape (n_samples,).
        targets (torch.Tensor): Ground truth labels, of shape (n_samples,).

    Returns:
        float: The GAP score for the given predictions and targets.
    """
    assert len(predicts.shape) == 1 and len(confs.shape) == 1 and len(targets.shape) == 1 and predicts.shape == confs.shape and confs.shape == targets.shape

    _, indices = torch.sort(confs, descending=True)

    confs = confs.cpu().numpy()
    predicts = predicts[indices].cpu().numpy()
    targets = targets[indices].cpu().numpy()

    res, true_pos = 0.0, 0

    for i, (c, p, t) in enumerate(zip(confs, predicts, targets)):
        rel = int(p == t)
        true_pos += rel

        res += true_pos / (i + 1) * rel

    res /= targets.shape[0]
    return res

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
loss_fn = torch.nn.CrossEntropyLoss()

if torch.cuda.is_available():
  print("CUDA USED")
else:
  print("CPU USED")

CUDA USED


# Test and Eavluation

In [8]:
from tqdm import tqdm
def evaluate_model(model, dataloader, loss_fn, device):
    model.eval()
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0  # Track total samples processed
    avg_gap = RunningAverage()
    with torch.no_grad():
        progress_bar = tqdm(enumerate(dataloader), total=len(dataloader), desc="Evaluating", leave=True)
        for i, (inputs, labels) in progress_bar:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)

            running_loss += loss.item()
            confs, preds = torch.max(outputs, 1)
            correct_predictions += torch.sum(preds == labels.data)
            total_samples += labels.size(0)

            # Calculate GAP for this batch and update the running average
            avg_gap.update(GAP(preds, confs, labels))

            # Update the progress bar with average loss and accuracy
            average_loss = running_loss / total_samples
            average_accuracy = correct_predictions.double() / total_samples
            progress_bar.set_postfix({
                'avg_loss': f'{average_loss:.4f}',
                'avg_acc': f'{average_accuracy:.2f}',
                'gap_score': f'{avg_gap.avg:.4f}'
            })


    total_loss = running_loss / len(dataloader)
    accuracy = correct_predictions.double() / len(dataloader.dataset)
    gap_score = avg_gap.avg
    # print(f'Validation Loss: {total_loss}, Accuracy: {accuracy}')
    return total_loss, accuracy, gap_score


# Prepares Input

In [9]:
import os
# Define transformations for the training data
test_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


label_csv = "/content/drive/MyDrive/cs444-final-project/project/data/small_data/small_data.csv"
test_img_id_to_path_mapping = "/content/drive/MyDrive/cs444-final-project/project/data/small_data/test_img_id_to_path_mapping_drive.csv"
# Create an instance of the dataset
test_dataset = LandmarkImageDataset(
    annotations_file=label_csv,
    transform=test_transform,
    id_path_mapping=test_img_id_to_path_mapping
)


test_loader = DataLoader(dataset=test_dataset, batch_size=4, shuffle=False, num_workers=2)

model_files = [f for f in os.listdir(MODEL_DIR) if f.endswith('.pth')]

for model_file in model_files:
    model_path = os.path.join(MODEL_DIR, model_file)
    print(model_path)


/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/resnet-50_best_model.pth
/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/vit_base_224_ep=10_best_model.pth
/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/MobileNetV2_ep=24_best_model.pth
/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/DenseNet_ep=21_best_model.pth
/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/vit_base_224_ep=30_best_model.pth
/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/resnet-18_best_model.pth
/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/resnet-50_ep30_lr=0001_best_model.pth
/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/SqueezeNet_best_model.pth
/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/DenseNet_best_model.pth
/c

# ResNet-18

In [None]:
from torchvision import models
import torch

model_path = "/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/resnet-18_best_model.pth"
def initialize_model(num_classes):
    # Load a pretrained ResNet-18 model
    model = models.resnet18(pretrained=True)
    num_ftrs = model.fc.in_features
    model.fc = torch.nn.Linear(num_ftrs, num_classes)
    return model

# Determine the number of unique classes
num_classes = 25
model = initialize_model(num_classes=num_classes)

model.load_state_dict(torch.load(model_path))
model.to(device)
total_loss, accuracy, gap_score = evaluate_model(model, test_loader, loss_fn, device)
print(f'ResNet-18: Total Loss: {total_loss}, Accuracy: {accuracy}, GAP Score: {gap_score}')

  self.pid = os.fork()
Evaluating: 100%|██████████| 184/184 [00:03<00:00, 60.27it/s, avg_loss=0.4228, avg_acc=0.67, gap_score=0.6423]


ResNet-18: Total Loss: 1.6888280155482602, Accuracy: 0.6748299319727891, GAP Score: 0.6423233695652176


# ResNet-50

In [None]:
from torchvision import models
import torch

model_path = "/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/resnet-50_best_model.pth"
def initialize_model(num_classes):
    # Load a pretrained ResNet-18 model
    model = models.resnet50(pretrained=True)
    num_ftrs = model.fc.in_features
    model.fc = torch.nn.Linear(num_ftrs, num_classes)
    return model

# Determine the number of unique classes
num_classes = 25
model = initialize_model(num_classes=num_classes)

model.load_state_dict(torch.load(model_path))
model.to(device)
total_loss, accuracy, gap_score = evaluate_model(model, test_loader, loss_fn, device)
print(f'ResNet-50: Total Loss: {total_loss}, Accuracy: {accuracy}, GAP Score: {gap_score}')

Evaluating: 100%|██████████| 184/184 [00:03<00:00, 59.89it/s, avg_loss=0.0992, avg_acc=0.88, gap_score=0.8697]

ResNet-50: Total Loss: 1.1964374558253558, Accuracy: 0.7429931972789115, GAP Score: 0.7496784420289856





# Efficient Net B0

In [None]:
from torchvision import models
import torch
import torch.nn as nn
import efficientnet_pytorch
class EfficientNetEncoderHead(nn.Module):
    """
    Defining an EfficientNet encoder head for image classification.

    Attributes:
        depth (int): The depth version of the EfficientNet.
        num_classes (int): The number of classes for the classifier output.
        base (EfficientNet): The base EfficientNet model preloaded with pretrained weights.
        avg_pool (nn.AdaptiveAvgPool2d): Adaptive average pooling to reduce spatial dimensions to 1x1.
        output_filter (int): The number of output features from the EfficientNet.
        classifier (nn.Linear): The linear classifier that outputs the class probabilities.

    Methods:
        forward(x): Defines the forward pass of the model.
    """
    def __init__(self, depth, num_classes):
        super(EfficientNetEncoderHead, self).__init__()
        self.depth = depth
        self.base = efficientnet_pytorch.EfficientNet.from_pretrained(f'efficientnet-b{self.depth}')
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.output_filter = self.base._fc.in_features
        self.classifier = nn.Linear(self.output_filter, num_classes)
    def forward(self, x):
        x = self.base.extract_features(x)
        x = self.avg_pool(x).squeeze(-1).squeeze(-1)
        x = self.classifier(x)
        return x


model_path = "/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/efficient_net_b0_best_model.pth"

# Determine the number of unique classes
num_classes = 25
model = EfficientNetEncoderHead(depth=0, num_classes=num_classes)
model.load_state_dict(torch.load(model_path))
model.to(device)
total_loss, accuracy, gap_score = evaluate_model(model, test_loader, loss_fn, device)
print(f'EfficientNet-b0: Total Loss: {total_loss}, Accuracy: {accuracy}, GAP Score: {gap_score}')

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth
100%|██████████| 20.4M/20.4M [00:00<00:00, 363MB/s]


Loaded pretrained weights for efficientnet-b0


Evaluating: 100%|██████████| 184/184 [00:03<00:00, 48.11it/s, avg_loss=0.2300, avg_acc=0.81, gap_score=0.7980]


EfficientNet-b0: Total Loss: 0.9187016650719907, Accuracy: 0.8136054421768707, GAP Score: 0.7980072463768116


# Efficient Net B7

In [None]:
from torchvision import models
import torch
import torch.nn as nn
import efficientnet_pytorch
class EfficientNetEncoderHead(nn.Module):
    """
    Defining an EfficientNet encoder head for image classification.

    Attributes:
        depth (int): The depth version of the EfficientNet.
        num_classes (int): The number of classes for the classifier output.
        base (EfficientNet): The base EfficientNet model preloaded with pretrained weights.
        avg_pool (nn.AdaptiveAvgPool2d): Adaptive average pooling to reduce spatial dimensions to 1x1.
        output_filter (int): The number of output features from the EfficientNet.
        classifier (nn.Linear): The linear classifier that outputs the class probabilities.

    Methods:
        forward(x): Defines the forward pass of the model.
    """
    def __init__(self, depth, num_classes):
        super(EfficientNetEncoderHead, self).__init__()
        self.depth = depth
        self.base = efficientnet_pytorch.EfficientNet.from_pretrained(f'efficientnet-b{self.depth}')
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.output_filter = self.base._fc.in_features
        self.classifier = nn.Linear(self.output_filter, num_classes)
    def forward(self, x):
        x = self.base.extract_features(x)
        x = self.avg_pool(x).squeeze(-1).squeeze(-1)
        x = self.classifier(x)
        return x


model_path = "/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/efficient_net_b7_best_model.pth"

# Determine the number of unique classes
num_classes = 25
model = EfficientNetEncoderHead(depth=7, num_classes=num_classes)
model.load_state_dict(torch.load(model_path))
model.to(device)
total_loss, accuracy, gap_score = evaluate_model(model, test_loader, loss_fn, device)
print(f'EfficientNet-b7: Total Loss: {total_loss}, Accuracy: {accuracy}, GAP Score: {gap_score}')

Loaded pretrained weights for efficientnet-b7


  self.pid = os.fork()
Evaluating: 100%|██████████| 184/184 [00:09<00:00, 18.56it/s, avg_loss=0.2228, avg_acc=0.83, gap_score=0.8196]


EfficientNet-b7: Total Loss: 0.8899004632821546, Accuracy: 0.8812925170068026, GAP Score: 0.8696331521739131


# ViT

In [None]:
from torchvision import models
import torch
import timm

model_path = "/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/vit_base_224_ep=30_best_model.pth"
model = timm.create_model('vit_base_patch16_224', pretrained=True, num_classes=25)

model.load_state_dict(torch.load(model_path))
model.to(device)
total_loss, accuracy, gap_score = evaluate_model(model, test_loader, loss_fn, device)
print(f'ViT: Total Loss: {total_loss}, Accuracy: {accuracy}, GAP Score: {gap_score}')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Evaluating: 100%|██████████| 184/184 [00:04<00:00, 42.93it/s, avg_loss=0.5278, avg_acc=0.74, gap_score=0.7137]

ViT: Total Loss: 1.082392482979726, Accuracy: 0.8387755102040816, GAP Score: 0.8136548913043481





# SWIN Transformer

In [None]:
from torchvision import models
import torch
import timm

model_path = "/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/swin_ep=30_best_model.pth"
model = timm.create_model('swin_tiny_patch4_window7_224', pretrained=True, num_classes=25)

model.load_state_dict(torch.load(model_path))
model.to(device)
total_loss, accuracy, gap_score = evaluate_model(model, test_loader, loss_fn, device)
print(f'SWIN: GAP Score: {gap_score}')

Evaluating: 100%|██████████| 184/184 [00:03<00:00, 55.67it/s, avg_loss=0.2962, avg_acc=0.81, gap_score=0.7935]


SWIN: GAP Score: 0.7934782608695654


# MobileNetV2


In [12]:
from torchvision import models
import torch
import torch.nn as nn

model_path = "/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/MobileNetV2_ep=24_best_model.pth"
model = models.mobilenet_v2(pretrained=True)
model.classifier = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(in_features=model.classifier[1].in_features, out_features=25)
)

model.load_state_dict(torch.load(model_path))
model.to(device)
total_loss, accuracy, gap_score = evaluate_model(model, test_loader, loss_fn, device)
print(f'SWIN: GAP Score: {gap_score}')

  self.pid = os.fork()
Evaluating: 100%|██████████| 184/184 [00:04<00:00, 39.26it/s, avg_loss=0.3947, avg_acc=0.70, gap_score=0.6675]

SWIN: GAP Score: 0.6674592391304349





# DenseNet


In [13]:
from torchvision import models
import torch
import torch.nn as nn

model_path = '/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/DenseNet_best_model.pth'
model = models.densenet121(pretrained=True)
model.classifier = nn.Linear(model.classifier.in_features, 25)

model.load_state_dict(torch.load(model_path))
model.to(device)
total_loss, accuracy, gap_score = evaluate_model(model, test_loader, loss_fn, device)
print(f'SWIN: GAP Score: {gap_score}')

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 153MB/s]
Evaluating: 100%|██████████| 184/184 [00:05<00:00, 35.73it/s, avg_loss=0.2915, avg_acc=0.73, gap_score=0.7077]

SWIN: GAP Score: 0.7076539855072465





# SqueezeNet

In [14]:
from torchvision import models
import torch
import torch.nn as nn

model_path = '/content/drive/MyDrive/cs444-final-project/project/models/baselines/saved_models/SqueezeNet_best_model.pth'
model = models.squeezenet1_1(pretrained=True)
final_conv = nn.Conv2d(512, 25, kernel_size=(1,1))
model.classifier[1] = final_conv
model.num_classes = 25

model.load_state_dict(torch.load(model_path))
model.to(device)
total_loss, accuracy, gap_score = evaluate_model(model, test_loader, loss_fn, device)
print(f'SWIN: GAP Score: {gap_score}')

Downloading: "https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth" to /root/.cache/torch/hub/checkpoints/squeezenet1_1-b8a52dc0.pth
100%|██████████| 4.73M/4.73M [00:00<00:00, 84.9MB/s]
  self.pid = os.fork()
Evaluating: 100%|██████████| 184/184 [00:03<00:00, 55.63it/s, avg_loss=0.4734, avg_acc=0.64, gap_score=0.6042]

SWIN: GAP Score: 0.604166666666667





## Plot

In [None]:
import matplotlib.pyplot as plt

def plot_metrics_acc(train_losses, train_accuracies, test_accuracies):
    plt.figure(figsize=(12, 6))

    # Create the first axis for the training loss
    ax1 = plt.gca()  # Get current axis
    line1, = ax1.plot(train_losses, label='Training Loss', color='red', marker='o')
    ax1.set_xlabel('Epochs')  # Common x-axis label
    ax1.set_ylabel('Loss', color='red')  # Set y-axis label for loss
    ax1.tick_params(axis='y', labelcolor='red')  # Set the color of y-axis ticks to red

    # Create a second y-axis for training accuracy using twinx()
    ax2 = ax1.twinx()  # Create a second y-axis that shares the same x-axis
    line2, = ax2.plot(train_accuracies, label='Training Accuracy', color='blue', marker='x')
    ax2.set_ylabel('Accuracy', color='blue')  # Set y-axis label for accuracy
    ax2.tick_params(axis='y', labelcolor='blue')  # Set the color of y-axis ticks to blue

    # Create a third y-axis using a new set of axes positioned to the right
    ax3 = ax1.twinx()  # New axis that shares x-axis
    ax3.spines['right'].set_position(('outward', 60))  # Move the third axis out to avoid overlap
    line3, = ax3.plot(test_accuracies, label='Test Accuracy', color='green', marker='s')
    ax3.set_ylabel('Test Accuracy', color='green')  # Set y-axis label for test accuracy
    ax3.tick_params(axis='y', labelcolor='green')  # Set the color of y-axis ticks to green

    # Title and grid
    plt.title('ResNet-18: Training Loss, Training Accuracy, and Test Accuracy')
    ax1.grid(True)

    # Create legends and position them appropriately
    lines = [line1, line2, line3]
    labels = [l.get_label() for l in lines]
    plt.legend(lines, labels, loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=3)  # Adjust legend position

    plt.show()


# Save logs

In [None]:
import json

data = {
    "train_loss": train_losses,
    "train_accuracy": train_acc,
    "train_gap": train_gap,
    "test_accuracy": test_acc
}
LOG_FILE
with open(LOG_FILE, "w") as json_file:
    json.dump(data, json_file, indent=4)
