# Project 3

#### Importing libraries

In [None]:
import numpy as np
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
import os
import torch
from torchvision import transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
# from torchvision.models import resnet50, ResNet50_Weights
# from torchvision.models import convnext_small, ConvNeXt_Small_Weights
from torchvision.models import regnet_y_16gf, RegNet_Y_16GF_Weights
# from torchvision.models import regnet_y_128gf, RegNet_Y_128GF_Weights
import torch.optim as optim

In [None]:
# The device is automatically set to GPU if available, otherwise CPU
# If you want to force the device to CPU, you can change the line to
# device = torch.device("cpu")
# When using the GPU, it is important that your model and all data are on the 
# same device.
device = torch.device("mps")

In [None]:
"""
Transform, resize and normalize the images and then use a pretrained model to extract 
the embeddings.
"""
# TODO: define a transform to pre-process the images
# The required pre-processing depends on the pre-trained model you choose 
# below. 
# See https://pytorch.org/vision/stable/models.html#using-the-pre-trained-models
train_transforms = RegNet_Y_16GF_Weights.IMAGENET1K_SWAG_E2E_V1.transforms()
train_dataset = datasets.ImageFolder(root="dataset/", transform=train_transforms)
# Hint: adjust batch_size and num_workers to your PC configuration, so that you don't 
# run out of memory (VRAM if on GPU, RAM if on CPU)
batch = 64
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch,
                          shuffle=False,
                          pin_memory=True,
                          num_workers=10)

# TODO: define a model for extraction of the embeddings (Hint: load a pretrained model,
# more info here: https://pytorch.org/vision/stable/models.html)
model = regnet_y_16gf(weights=RegNet_Y_16GF_Weights.IMAGENET1K_SWAG_E2E_V1)
# print(model)
embedding_size = 3024 # Dummy variable, replace with the actual embedding size once you pick your model
num_images = len(train_dataset)


In [None]:
# embeddings = np.zeros((num_images, embedding_size))
# # TODO: Use the model to extract the embeddings. Hint: remove the last layers of the 
# # model to access the embeddings the model generates. 
# model.fc = nn.Identity()
# # model.classifier[2] = nn.Identity()
# model.to(device)
# i = 0
# with torch.no_grad():
#     for inputs, _ in train_loader:
#         inputs = inputs.to(device)
#         outputs = model(inputs)
#         embeddings[batch*i : batch*(i+1)] = outputs.cpu().numpy()
#         del inputs
#         del outputs
#         print(i, end="--")
#         i += 1
# np.save('dataset/embeddings.npy', embeddings)

In [None]:
def get_data(file, train=True):
    """
    Load the triplets from the file and generate the features and labels.

    input: file: string, the path to the file containing the triplets
           train: boolean, whether the data is for training or testing

    output: X: numpy array, the features
            y: numpy array, the labels
    """
    triplets = []
    with open(file) as f:
        for line in f:
            triplets.append(line)

    # generate training data from triplets
    train_dataset = datasets.ImageFolder(root="dataset/",
                                         transform=None)
    filenames = [s[0].split('/')[-1].replace('.jpg', '') for s in train_dataset.samples]
    embeddings = np.load('dataset/embeddings.npy')
    # TODO: Normalize the embeddings
    embeddings = (embeddings - np.mean(embeddings, axis=1)[:, np.newaxis]) / np.std(embeddings, axis=1)[:, np.newaxis]
    file_to_embedding = {}
    for i in range(len(filenames)):
        file_to_embedding[filenames[i]] = embeddings[i]
    X = []
    y = []
    # use the individual embeddings to generate the features and labels for triplets
    for t in triplets:
        emb = [file_to_embedding[a] for a in t.split()]
        X.append(np.hstack([emb[0], emb[1], emb[2]]))
        y.append(1)
        # Generating negative samples (data augmentation)
        if train:
            X.append(np.hstack([emb[0], emb[2], emb[1]]))
            y.append(0)
    X = np.vstack(X)
    y = np.hstack(y)
    return X, y

In [None]:
def create_loader_from_np(X, y = None, train = True, batch_size=batch, shuffle=True, num_workers = 10):
    """
    Create a torch.utils.data.DataLoader object from numpy arrays containing the data.

    input: X: numpy array, the features
           y: numpy array, the labels
    
    output: loader: torch.data.util.DataLoader, the object containing the data
    """
    if train:
        # Attention: If you get type errors you can modify the type of the
        # labels here
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float), 
                                torch.from_numpy(y).type(torch.float))
    else:
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float))
    loader = DataLoader(dataset=dataset,
                        batch_size=batch_size,
                        shuffle=shuffle,
                        pin_memory=True, num_workers=num_workers)
    return loader

In [None]:
TRAIN_TRIPLETS = 'train_triplets.txt'
# load the training data
X, y = get_data(TRAIN_TRIPLETS)
# Create data loaders for the training data
train_loader = create_loader_from_np(X[0:round(0.8*X.shape[0])], y[0:round(0.8*len(y))], train = True, batch_size=batch)
valid_loader = create_loader_from_np(X[round(0.8*X.shape[0]):], y[round(0.8*len(y)):], train = True, batch_size=batch)
train_loader_final = create_loader_from_np(X, y, train = True, batch_size=batch)
# delete the loaded training data to save memory, as the data loader copies
del X
del y

TODO: define a model. Here, the basic structure is defined, but you need to fill in the details

In [59]:
layer1_size = 1024
layer2_size = 64
dropout_prop = 0.3

class Net(nn.Module):
    """
    The model class, which defines our classifier.
    """
    def __init__(self):
        """
        The constructor of the model.
        """
        super().__init__()

        self.dropout0 = nn.Dropout(dropout_prop)

        self.fc1 = nn.Linear(3*embedding_size, layer1_size)
        nn.init.kaiming_normal_(self.fc1.weight, mode='fan_in', nonlinearity='relu')
        self.bn1 = nn.BatchNorm1d(layer1_size)
        self.dropout1 = nn.Dropout(dropout_prop)

        self.fc2 = nn.Linear(layer1_size, layer2_size)
        nn.init.kaiming_normal_(self.fc2.weight, mode='fan_in', nonlinearity='relu')
        self.bn2 = nn.BatchNorm1d(layer2_size)
        self.dropout2 = nn.Dropout(dropout_prop)
        
        self.fc3 = nn.Linear(layer2_size, 1)
        nn.init.kaiming_normal_(self.fc3.weight, mode='fan_in', nonlinearity='relu')

    def forward(self, x):
        """
        The forward pass of the model.

        input: x: torch.Tensor, the input to the model

        output: x: torch.Tensor, the output of the model
        """
        x = self.dropout0(x)

        x = self.fc1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.dropout1(x)

        x = self.fc2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.dropout2(x)

        x = self.fc3(x)
        return x

In [60]:
"""
The training procedure of the model; it accepts the training data, defines the model 
and then trains it.

input: train_loader: torch.data.util.DataLoader, the object containing the training data
    
compute: model: torch.nn.Module, the trained model
"""
model = Net()
model.train()
model.to(device)
n_epochs = 10
patience = 2
min_delta = 0.01
best_val_loss = float('inf')
epochs_no_improve = 0
# TODO: define a loss function, optimizer and proceed with training. Hint: use the part 
# of the training data as a validation split. After each epoch, compute the loss on the 
# validation split and print it out. This enables you to see how your model is performing 
# on the validation data before submitting the results on the server. After choosing the 
# best model, train it on the whole training data.
loss_function = nn.BCEWithLogitsLoss()
L2_lambda = 0.0002
learn_rate = 0.001
optimizer = optim.Adam(model.parameters(), lr=learn_rate, weight_decay=L2_lambda)

for epoch in range(n_epochs):

    for batch_id, (X, y) in enumerate(train_loader):
        X = X.to(device)
        y = y.to(device)
        output = model(X)
        loss = loss_function(torch.flatten(output), y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch_id % 200 == 0:
            print('Epoch {}, Batch id {}, training loss {}'.format(epoch+1, batch_id, loss.item()))

    valid_losses = []
    correct = 0
    with torch.no_grad():
        for X, y in valid_loader: 
            X = X.to(device)   
            y = y.to(device)        
            output_valid = model(X)
            valid_loss = loss_function(torch.flatten(output_valid), y).cpu().numpy()
            valid_losses.append(valid_loss)
            output_valid = output_valid.cpu().numpy()
            output_valid[output_valid >= 0.5] = 1
            output_valid[output_valid < 0.5] = 0
            y = y.cpu().numpy()[:, np.newaxis]
            correct += np.count_nonzero(output_valid == y)
        print('Epoch {}, valid loss {}'.format(epoch+1, np.mean(valid_losses)))
        print('Epoch {}, accuracy: ({:.0f}%)'.format(epoch+1, 100. * correct / len(valid_loader.dataset)))

    if np.mean(valid_losses) < best_val_loss - min_delta:
        best_val_loss = np.mean(valid_losses)
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1

    # Check if early stopping criteria met
    if epochs_no_improve >= patience:
        print(f'Early stopping after {epoch+1} epochs.')
        break

Epoch 1, Batch id 0, training loss 0.8238517045974731
Epoch 1, Batch id 200, training loss 0.5929189920425415
Epoch 1, Batch id 400, training loss 0.6181861162185669
Epoch 1, Batch id 600, training loss 0.5212967395782471
Epoch 1, Batch id 800, training loss 0.506117582321167
Epoch 1, Batch id 1000, training loss 0.4672303795814514
Epoch 1, Batch id 1200, training loss 0.5662831664085388
Epoch 1, Batch id 1400, training loss 0.5545528531074524
Epoch 1, valid loss 0.5430616140365601
Epoch 1, accuracy: (70%)
Epoch 2, Batch id 0, training loss 0.4670337438583374
Epoch 2, Batch id 200, training loss 0.6115789413452148
Epoch 2, Batch id 400, training loss 0.6194772720336914
Epoch 2, Batch id 600, training loss 0.5318030714988708
Epoch 2, Batch id 800, training loss 0.51119065284729
Epoch 2, Batch id 1000, training loss 0.5637558698654175
Epoch 2, Batch id 1200, training loss 0.43741393089294434
Epoch 2, Batch id 1400, training loss 0.4386410713195801
Epoch 2, valid loss 0.524290919303894
Ep

In [61]:
model = Net()
model.train()
model.to(device)
n_epochs = 8
optimizer = optim.Adam(model.parameters(), lr=learn_rate, weight_decay=L2_lambda)
for epoch in range(n_epochs):
    for batch_id, (X, y) in enumerate(train_loader_final):
        X = X.to(device)
        y = y.to(device)
        output = model(X)
        loss = loss_function(torch.flatten(output), y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch_id % 200 == 0:
            print('Epoch {}, Batch id {}, training loss {}'.format(epoch+1, batch_id, loss.item()))

Epoch 1, Batch id 0, training loss 1.0154438018798828
Epoch 1, Batch id 200, training loss 0.5264649391174316
Epoch 1, Batch id 400, training loss 0.5525493621826172
Epoch 1, Batch id 600, training loss 0.5857006907463074
Epoch 1, Batch id 800, training loss 0.6069153547286987
Epoch 1, Batch id 1000, training loss 0.5840517282485962
Epoch 1, Batch id 1200, training loss 0.5669487714767456
Epoch 1, Batch id 1400, training loss 0.5469427108764648
Epoch 1, Batch id 1600, training loss 0.4686805009841919
Epoch 1, Batch id 1800, training loss 0.42942625284194946
Epoch 2, Batch id 0, training loss 0.4790855646133423
Epoch 2, Batch id 200, training loss 0.5497839450836182
Epoch 2, Batch id 400, training loss 0.570398211479187
Epoch 2, Batch id 600, training loss 0.48341870307922363
Epoch 2, Batch id 800, training loss 0.49982285499572754
Epoch 2, Batch id 1000, training loss 0.5324530005455017
Epoch 2, Batch id 1200, training loss 0.513380765914917
Epoch 2, Batch id 1400, training loss 0.4732

In [None]:
TEST_TRIPLETS = 'test_triplets.txt'
# repeat for testing data
X_test, y_test = get_data(TEST_TRIPLETS, train=False)
test_loader = create_loader_from_np(X_test, train = False, batch_size=2048, shuffle=False)
del X_test
del y_test

In [62]:
"""
The testing procedure of the model; it accepts the testing data and the trained model and 
then tests the model on it.

input: model: torch.nn.Module, the trained model
       loader: torch.data.util.DataLoader, the object containing the testing data
        
compute: None, the function saves the predictions to a results.txt file
"""
model.eval()
predictions = []
preds_orig = []
# Iterate over the test data
with torch.no_grad(): # We don't need to compute gradients for testing
    for [x_batch] in test_loader:
        x_batch = x_batch.to(device)
        predicted_vals = model(x_batch)
        predicted = predicted_vals.cpu().numpy()
        pred_orig = predicted_vals.cpu().numpy()
        preds_orig.append(pred_orig)
        # Rounding the predictions to 0 or 1
        predicted[predicted >= 0.5] = 1
        predicted[predicted < 0.5] = 0
        predictions.append(predicted)
    predictions = np.vstack(predictions)
np.savetxt("results.txt", predictions, fmt='%i')
print("Results saved to results.txt")

Results saved to results.txt
