# Project 3

#### Importing libraries

In [142]:
import numpy as np
from torchvision import transforms
from torch.utils.data import DataLoader, TensorDataset
import os
import torch
from torchvision import transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
# from torchvision.models import resnet50, ResNet50_Weights
# from torchvision.models import convnext_small, ConvNeXt_Small_Weights
from torchvision.models import regnet_y_16gf, RegNet_Y_16GF_Weights
import torch.optim as optim
from sklearn.model_selection import KFold

In [22]:
# The device is automatically set to GPU if available, otherwise CPU
# If you want to force the device to CPU, you can change the line to
# device = torch.device("cpu")
# When using the GPU, it is important that your model and all data are on the 
# same device.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [20]:
"""
Transform, resize and normalize the images and then use a pretrained model to extract 
the embeddings.
"""
# TODO: define a transform to pre-process the images
# The required pre-processing depends on the pre-trained model you choose 
# below. 
# See https://pytorch.org/vision/stable/models.html#using-the-pre-trained-models
train_transforms = RegNet_Y_16GF_Weights.IMAGENET1K_SWAG_E2E_V1.transforms()
train_dataset = datasets.ImageFolder(root="dataset/", transform=train_transforms)
# Hint: adjust batch_size and num_workers to your PC configuration, so that you don't 
# run out of memory (VRAM if on GPU, RAM if on CPU)
batch = 64
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch,
                          shuffle=False,
                          pin_memory=True,
                          num_workers=10)
# TODO: define a model for extraction of the embeddings (Hint: load a pretrained model,
# more info here: https://pytorch.org/vision/stable/models.html)
model = regnet_y_16gf(weights=RegNet_Y_16GF_Weights.IMAGENET1K_SWAG_E2E_V1)
# print(model)
embedding_size = 3024 # Dummy variable, replace with the actual embedding size once you pick your model
num_images = len(train_dataset)
embeddings = np.zeros((num_images, embedding_size))
# TODO: Use the model to extract the embeddings. Hint: remove the last layers of the 
# model to access the embeddings the model generates. 
model.fc = nn.Identity()
# model.classifier[2] = nn.Identity()
model.to(device)
i = 0
with torch.no_grad():
    for inputs, _ in train_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        embeddings[batch*i : batch*(i+1)] = outputs.cpu().numpy()
        del inputs
        del outputs
        print(i, end="--")
        i += 1
np.save('dataset/embeddings.npy', embeddings)

0--1--2--3--4--5--6--7--8--9--10--11--12--13--14--15--16--17--18--19--20--21--22--23--24--25--26--27--28--29--30--31--32--33--34--35--36--37--38--39--40--41--42--43--44--45--46--47--48--49--50--51--52--53--54--55--56--57--58--59--60--61--62--63--64--65--66--67--68--69--70--71--72--73--74--75--76--77--78--79--80--81--82--83--84--85--86--87--88--89--90--91--92--93--94--95--96--97--98--99--100--101--102--103--104--105--106--107--108--109--110--111--112--113--114--115--116--117--118--119--120--121--122--123--124--125--126--127--128--129--130--131--132--133--134--135--136--137--138--139--140--141--142--143--144--145--146--147--148--149--150--151--152--153--154--155--156--

In [39]:
def get_data(file, train=True):
    """
    Load the triplets from the file and generate the features and labels.

    input: file: string, the path to the file containing the triplets
           train: boolean, whether the data is for training or testing

    output: X: numpy array, the features
            y: numpy array, the labels
    """
    triplets = []
    with open(file) as f:
        for line in f:
            triplets.append(line)

    # generate training data from triplets
    train_dataset = datasets.ImageFolder(root="dataset/",
                                         transform=None)
    filenames = [s[0].split('\\')[-1].replace('.jpg', '') for s in train_dataset.samples]
    embeddings = np.load('dataset/embeddings.npy')
    # TODO: Normalize the embeddings
    embeddings = (embeddings - np.mean(embeddings, axis=1)[:, np.newaxis]) / np.std(embeddings, axis=1)[:, np.newaxis]
    file_to_embedding = {}
    for i in range(len(filenames)):
        file_to_embedding[filenames[i]] = embeddings[i]
    X = []
    y = []
    # use the individual embeddings to generate the features and labels for triplets
    for t in triplets:
        emb = [file_to_embedding[a] for a in t.split()]
        X.append(np.hstack([emb[0], emb[1], emb[2]]))
        y.append(1)
        # Generating negative samples (data augmentation)
        if train:
            X.append(np.hstack([emb[0], emb[2], emb[1]]))
            y.append(0)
    X = np.vstack(X)
    y = np.hstack(y)
    return X, y

In [16]:
def create_loader_from_np(X, y = None, train = True, batch_size=batch, shuffle=True, num_workers = 10):
    """
    Create a torch.utils.data.DataLoader object from numpy arrays containing the data.

    input: X: numpy array, the features
           y: numpy array, the labels
    
    output: loader: torch.data.util.DataLoader, the object containing the data
    """
    if train:
        # Attention: If you get type errors you can modify the type of the
        # labels here
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float), 
                                torch.from_numpy(y).type(torch.float))
    else:
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float))
    loader = DataLoader(dataset=dataset,
                        batch_size=batch_size,
                        shuffle=shuffle,
                        pin_memory=True, num_workers=num_workers)
    return loader

In [149]:
TRAIN_TRIPLETS = 'train_triplets.txt'

# load the training data
X, y = get_data(TRAIN_TRIPLETS)
# Create data loaders for the training data
train_loader = create_loader_from_np(X[0:round(0.9*X.shape[0])], y[0:round(0.9*len(y))], train = True, batch_size=batch, shuffle=False)
valid_loader = create_loader_from_np(X[round(0.9*X.shape[0]):], y[round(0.9*len(y)):], train = True, batch_size=batch, shuffle=False)
# delete the loaded training data to save memory, as the data loader copies
del X
del y

In [41]:
TEST_TRIPLETS = 'test_triplets.txt'

# repeat for testing data
X_test, y_test = get_data(TEST_TRIPLETS, train=False)
test_loader = create_loader_from_np(X_test, train = False, batch_size=2048, shuffle=False)
del X_test
del y_test

TODO: define a model. Here, the basic structure is defined, but you need to fill in the details

In [140]:
class Net(nn.Module):
    """
    The model class, which defines our classifier.
    """
    def __init__(self):
        """
        The constructor of the model.
        """
        super().__init__()
        self.fc1 = nn.Linear(3*embedding_size, 256)
        self.dropout1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(256, 24)
        self.dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(24, 1)

    def forward(self, x):
        """
        The forward pass of the model.

        input: x: torch.Tensor, the input to the model

        output: x: torch.Tensor, the output of the model
        """
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

In [150]:
"""
The training procedure of the model; it accepts the training data, defines the model 
and then trains it.

input: train_loader: torch.data.util.DataLoader, the object containing the training data
    
compute: model: torch.nn.Module, the trained model
"""
model = Net()
model.train()
model.to(device)
n_epochs = 10
patience = 2
min_delta = 0.01
best_val_loss = float('inf')
epochs_no_improve = 0
# TODO: define a loss function, optimizer and proceed with training. Hint: use the part 
# of the training data as a validation split. After each epoch, compute the loss on the 
# validation split and print it out. This enables you to see how your model is performing 
# on the validation data before submitting the results on the server. After choosing the 
# best model, train it on the whole training data.
loss_function = nn.BCEWithLogitsLoss()
L2_lambda = 0.0003
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=L2_lambda)
for epoch in range(n_epochs): 
    for batch_id, (X, y) in enumerate(train_loader):
        X = X.to(device)
        y = y.to(device)
        output = model(X)
        loss = loss_function(torch.flatten(output), y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch_id % 200 == 0:
            print('Epoch {}, Batch id {}, training loss {}'.format(epoch, batch_id, loss.item()))

    valid_losses = []
    correct = 0
    with torch.no_grad():
        for X, y in valid_loader: 
            X = X.to(device)   
            y = y.to(device)        
            output_valid = model(X)
            valid_loss = loss_function(torch.flatten(output_valid), y).cpu().numpy()
            valid_losses.append(valid_loss)
            output_valid = output_valid.cpu().numpy()
            output_valid[output_valid >= 0.5] = 1
            output_valid[output_valid < 0.5] = 0
            y = y.cpu().numpy()[:, np.newaxis]
            correct += np.count_nonzero(output_valid == y)
        print('Epoch {}, valid loss {}'.format(epoch, np.mean(valid_losses)))
        print('Epoch {}, accuracy: ({:.0f}%)'.format(epoch, 100. * correct / len(valid_loader.dataset)))
        
    if np.mean(valid_losses) < best_val_loss - min_delta:
        best_val_loss = np.mean(valid_losses)
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1

    # Check if early stopping criteria met
    if epochs_no_improve >= patience:
        print(f'Early stopping after {epoch+1} epochs.')
        break



Epoch 0, Batch id 0, training loss 0.680663526058197
Epoch 0, Batch id 200, training loss 0.5810276865959167
Epoch 0, Batch id 400, training loss 0.683556318283081
Epoch 0, Batch id 600, training loss 0.5561156272888184
Epoch 0, Batch id 800, training loss 0.5641984939575195
Epoch 0, Batch id 1000, training loss 0.400823175907135
Epoch 0, Batch id 1200, training loss 0.43358874320983887
Epoch 0, Batch id 1400, training loss 0.689350962638855
Epoch 0, Batch id 1600, training loss 0.5215140581130981
Epoch 0, valid loss 0.5016735196113586
Epoch 0, accuracy: (73%)
Epoch 1, Batch id 0, training loss 0.5540558099746704
Epoch 1, Batch id 200, training loss 0.4811534881591797
Epoch 1, Batch id 400, training loss 0.40108951926231384
Epoch 1, Batch id 600, training loss 0.4215273857116699
Epoch 1, Batch id 800, training loss 0.49112823605537415
Epoch 1, Batch id 1000, training loss 0.29144996404647827
Epoch 1, Batch id 1200, training loss 0.37641462683677673
Epoch 1, Batch id 1400, training loss

In [151]:
# load the training data
X, y = get_data(TRAIN_TRIPLETS)
# Create data loaders for the training data
train_loader = create_loader_from_np(X, y, train = True, batch_size=batch)
# delete the loaded training data to save memory, as the data loader copies
del X
del y

n_epochs = 2
for epoch in range(n_epochs):
    for batch_id, (X, y) in enumerate(train_loader):
        X = X.to(device)
        y = y.to(device)
        output = model(X)
        loss = loss_function(torch.flatten(output), y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch_id % 200 == 0:
            print('Epoch {}, Batch id {}, training loss {}'.format( epoch, batch_id, loss.item()))
            print('Epoch {}, accuracy: ({:.0f}%)'.format(epoch, 100. * correct / len(valid_loader.dataset)))

Epoch 0, Batch id 0, training loss 0.07816499471664429
Epoch 0, Batch id 200, training loss 0.08063235878944397
Epoch 0, Batch id 400, training loss 0.09774332493543625
Epoch 0, Batch id 600, training loss 0.052607256919145584
Epoch 0, Batch id 800, training loss 0.22301548719406128
Epoch 0, Batch id 1000, training loss 0.09982940554618835
Epoch 0, Batch id 1200, training loss 0.09971988201141357
Epoch 0, Batch id 1400, training loss 0.11164381355047226
Epoch 0, Batch id 1600, training loss 0.1568707376718521
Epoch 0, Batch id 1800, training loss 0.10312601178884506
Epoch 1, Batch id 0, training loss 0.1043592095375061
Epoch 1, Batch id 200, training loss 0.128819540143013
Epoch 1, Batch id 400, training loss 0.09211992472410202
Epoch 1, Batch id 600, training loss 0.08813406527042389
Epoch 1, Batch id 800, training loss 0.11949218064546585
Epoch 1, Batch id 1000, training loss 0.0943281427025795
Epoch 1, Batch id 1200, training loss 0.13478833436965942
Epoch 1, Batch id 1400, training

In [139]:
"""
The testing procedure of the model; it accepts the testing data and the trained model and 
then tests the model on it.

input: model: torch.nn.Module, the trained model
       loader: torch.data.util.DataLoader, the object containing the testing data
        
compute: None, the function saves the predictions to a results.txt file
"""
model.eval()
predictions = []
# Iterate over the test data
with torch.no_grad(): # We don't need to compute gradients for testing
    for [x_batch] in test_loader:
        x_batch = x_batch.to(device)
        predicted = model(x_batch)
        predicted = predicted.cpu().numpy()
        # Rounding the predictions to 0 or 1
        predicted[predicted >= 0.5] = 1
        predicted[predicted < 0.5] = 0
        predictions.append(predicted)
    predictions = np.vstack(predictions)
np.savetxt("results_Nicola_13.txt", predictions, fmt='%i')
print("Results saved to results.txt")

Results saved to results.txt
