## Imports

In [1]:
import numpy as np
import os
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision import models
from torchvision.models import resnet50, ResNet50_Weights
from sklearn.model_selection import train_test_split
from PIL import Image
from tqdm import tqdm
import time


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
seed = 111
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)



cuda:0


## Get Data + EDA

In [2]:
%%time
def generate_embeddings(batch_size):
    """
    Transform, resize and normalize the images and then use a pretrained model to extract 
    the embeddings.
    """
    # Check if GPU is available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    
    # TODO: define a transform to pre-process the images
    train_transforms = transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    #torchvision.transforms.RandomHorizontalFlip(p=0.5),
    #torchvision.transforms.RandomVerticalFlip(p=0.5),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]) # tune this

    train_dataset = datasets.ImageFolder(root="/kaggle/input/imltask3/dataset/dataset/", transform=train_transforms)
    # Hint: adjust batch_size and num_workers to your PC configuration, so that you don't 
    # run out of memory
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size, # tune this
                              shuffle=False,
                              pin_memory=True, num_workers=2)
    # TODO: define a model for extraction of the embeddings (Hint: load a pretrained model,
    #  more info here: https://pytorch.org/vision/stable/models.html)
    model = resnet50(weights="IMAGENET1K_V2")
    # model.to(device)

    embeddings = []
    embedding_size = list(model.children())[-1].in_features # 2048
    num_images = len(train_dataset)
    embeddings = np.zeros((num_images, embedding_size))

    # TODO: Use the model to extract the embeddings. Hint: remove the last layers of the 
    # model to access the embeddings the model generates.
     
    model.eval() 

    model = nn.Sequential(*list(model.children())[:-1]) # remove last layer of the model

    print('Extracting features:')
    with torch.no_grad(): 
        for batch_idx, (image, image_idx) in enumerate(tqdm(train_loader)):
            embed_features = model(image) # get features from pretrained model  
            embed_features = embed_features.squeeze().cpu().numpy() # get to shape (256, 2048)
            embeddings[batch_idx * train_loader.batch_size : (batch_idx + 1) * train_loader.batch_size] = embed_features           
            
    np.save(f'embeddings_{batch_size}_cropped.npy', embeddings)
    
#generate_embeddings(batch_size=768) # -> both embeddings code and modelling do not work bc of memory issues

CPU times: user 5 µs, sys: 1e+03 ns, total: 6 µs
Wall time: 10 µs


In [3]:
def get_data(file, train=True):
    """
    Load the triplets from the file and generate the features and labels.

    input: file: string, the path to the file containing the triplets
          train: boolean, whether the data is for training or testing

    output: X: numpy array, the features
            y: numpy array, the labels
    """
    triplets = []
    with open(file) as f:
        for line in f:
            triplets.append(line)

    # generate training data from tripletsfiles = os.listdir(os.path.join(inputfolder,'dataset/food'))
    #files = os.listdir(os.path.join('dataset/food'))
    #filenames = [s[0].split('/')[-1].replace('.jpg', '') for s in train_dataset.samples]
    
    filenames = np.loadtxt("/kaggle/input/imltask3/filenames.txt", dtype=str) # if run on kaggle
    embeddings = np.load('/kaggle/input/imltask3/embeddings_512.npy') # if run on kaggle
    # TODO: Normalize the embeddings across the dataset
    
    embeddings = (embeddings - np.mean(embeddings, axis=0)) / np.std(embeddings, axis=0)
    
    file_to_embedding = {}
    for i in range(len(filenames)):
        file_name = filenames[i]
        file_to_embedding[file_name] = embeddings[i]
        
    X = []
    y = []
    # use the individual embeddings to generate the features and labels for triplets
    for t in tqdm(triplets):
        emb = [file_to_embedding[a] for a in t.split()]
        X.append(np.hstack([emb[0], emb[1], emb[2]]))
        y.append(1)
        # Generating negative samples (data augmentation) 
            # -> basically swap image1 with image2 which will get output 0
                # can we augment it even more? (not sure)
        if train:
            X.append(np.hstack([emb[0], emb[2], emb[1]]))
            y.append(0)
    X = np.vstack(X)
    y = np.hstack(y)
    return X, y


TRAIN_TRIPLETS = '/kaggle/input/imltask3/train_triplets.txt'
TEST_TRIPLETS = '/kaggle/input/imltask3/test_triplets.txt'

X, y = get_data(TRAIN_TRIPLETS)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
X_train = X_train.reshape(-1, 512, 4, 3) # resize to 4dTensor for CNNs, maybe correct shape is (-1, 256, 6, 4), not sure
X_valid = X_valid.reshape(-1, 512, 4, 3)
X_test, _ = get_data(TEST_TRIPLETS, train=False)
X_test = X_test.reshape(-1, 512, 4, 3) # resize to 4dTensor for CNNs

100%|██████████| 59515/59515 [00:04<00:00, 14819.41it/s]
100%|██████████| 59544/59544 [00:00<00:00, 78547.85it/s]


## Create loader

In [4]:
# Hint: adjust batch_size and num_workers to your PC configuration, so that you don't run out of memory
def create_loader_from_np(X, y = None, train = True, batch_size=64, shuffle=True, num_workers = 2):
    """
    Create a torch.utils.data.DataLoader object from numpy arrays containing the data.

    input: X: numpy array, the features
           y: numpy array, the labels
    
    output: loader: torch.data.util.DataLoader, the object containing the data
    """
    print("Load data")
    if train:
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float),
                                torch.from_numpy(y).type(torch.long))
    else:
        dataset = TensorDataset(torch.from_numpy(X).type(torch.float))
    loader = DataLoader(dataset=dataset,
                        batch_size=batch_size,
                        shuffle=shuffle,
                        num_workers=num_workers)
    return loader


train_loader = create_loader_from_np(X_train, y_train, train = True, batch_size=64)
valid_loader = create_loader_from_np(X_valid, y_valid, train = True, batch_size=64)
test_loader = create_loader_from_np(X_test, train = False, batch_size=2048, shuffle=False)

Load data
Load data
Load data


## Model

In [5]:
# TODO: define a model. Here, the basic structure is defined, but you need to fill in the details

class Net(nn.Module):
    """
    The model class, which defines our classifier.
    """
    def __init__(self, dropout=True, dropout_p=0.5): #0.4 -> tune this
        """
        The constructor of the model.
        """
        super().__init__()
        self.convlayer1 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.convlayer2 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=1, stride=1)
        )
        self.convlayer3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=8, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(8),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=1, stride=1)
        )
        
        self.fullycon1 = nn.Sequential(nn.Linear(8 * 2 * 1, 120), nn.ReLU())
        self.fullycon2 = nn.Sequential(nn.Linear(120, 84), nn.ReLU())
        if dropout:
            self.fullycon3 = nn.Sequential(nn.Dropout(p=dropout_p), nn.Linear(84, 64))
        else:
            self.fullycon3 = nn.Linear(84, 64)
            
    def forward(self, x):
        """
        The forward pass of the model.

        input: x: torch.Tensor, the input to the model

        output: x: torch.Tensor, the output of the model
        """
        x = self.convlayer1(x)
        x = self.convlayer2(x)
        x = self.convlayer3(x)
        x = x.view(-1, 8 * 2 * 1)
        x = self.fullycon1(x)
        x = self.fullycon2(x)
        x = self.fullycon3(x)
        return x

def train_model(train_loader):
    """
    The training procedure of the model; it accepts the training data, defines the model 
    and then trains it.

    input: train_loader: torch.data.util.DataLoader, the object containing the training data
    
    output: model: torch.nn.Module, the trained model
    """
    model = Net()
    model.train()
    model.to(device)
    print('device: ', device)
    n_epochs = 1 # tune this
    batch_size = 256 # and this

    losses = []
    acc = []
    valid_losses = []
    valid_acc = []
    # TODO: define a loss function, optimizer and proceed with training. Hint: use the part 
    # of the training data as a validation split. After each epoch, compute the loss on the 
    # validation
    # split and print it out. This enables you to see how your model is performing 
    # on the validation data before submitting the results on the server. After choosing the 
    # best model, train it on the whole training data.

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) # tune lr

    print("Train model")
    
    model.train()
    for epoch in range(n_epochs):
        train_loss_epoch = []
        train_acc_epoch = []
        valid_loss_epoch = []
        valid_acc_epoch = []
        with tqdm(train_loader, unit="batch") as tepoch:
            for data, target in tepoch:
                tepoch.set_description(f"Epoch {epoch}")

                data, target = data.to(device), target.to(device)
                optimizer.zero_grad()
                output = model(data)
                predictions = output.argmax(dim=1, keepdim=True).squeeze()
                loss = criterion(output, target)
                train_loss_epoch.append(loss.item())
                correct = (predictions == target).sum().item()
                accuracy = correct / len(predictions)
                train_acc_epoch.append(accuracy)

                loss.backward()
                optimizer.step()
                
                train_loss_avg = np.sum(train_loss_epoch) / len(train_loss_epoch)
                train_acc_avg = np.sum(train_acc_epoch) / len(train_acc_epoch)
                tepoch.set_postfix({'Train loss': train_loss_avg, 'Train accuracy': 100. * train_acc_avg})
                
            with torch.no_grad():
                with tqdm(valid_loader, unit="batch") as tepoch:
                    for valid_data, valid_target in tepoch:
                        tepoch.set_description(f"Epoch {epoch} valid")
                        valid_data, valid_target = valid_data.to(device), valid_target.to(device)
                        valid_output = model(valid_data)
                        valid_predictions = valid_output.argmax(dim=1, keepdim=True).squeeze()
                        valid_loss = criterion(valid_output, valid_target)
                        valid_loss_epoch.append(valid_loss.item())
                        valid_correct = (valid_predictions == valid_target).sum().item()
                        valid_accuracy = valid_correct / len(valid_predictions)
                        valid_acc_epoch.append(valid_accuracy)
                        
                        valid_loss_avg = np.sum(valid_loss_epoch) / len(valid_loss_epoch)
                        valid_acc_avg = np.sum(valid_acc_epoch) / len(valid_acc_epoch)
                        tepoch.set_postfix({'Val loss': valid_loss.item(), 'Val accuracy': 100. * valid_accuracy})
        
        losses.append(train_loss_avg)
        acc.append(train_acc_avg)
        valid_losses.append(valid_loss_avg)
        valid_acc.append(valid_acc_avg)
        
        print('Final train accuracy: ', train_acc_avg, 'Final valid accuracy: ', valid_acc_avg,
             '\n Final train loss: ', train_loss_avg, 'Final valid loss: ', valid_loss_avg)
        
    return model, losses, acc, valid_losses, valid_acc

model, losses, acc, valid_losses, valid_acc = train_model(train_loader)

device:  cuda:0
Train model


Epoch 0: 100%|██████████| 1488/1488 [00:27<00:00, 53.56batch/s, Train loss=0.594, Train accuracy=68.5]
Epoch 0 valid: 100%|██████████| 372/372 [00:03<00:00, 113.46batch/s, Val loss=0.45, Val accuracy=82.3] 

Final train accuracy:  0.6852558563748079 Final valid accuracy:  0.7386863727020464 
 Final train loss:  0.5940255130170494 Final valid loss:  0.5299263106238458





In [6]:
import plotly.graph_objects as go

x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x,
    y=losses,
    name = 'Train Loss',
    connectgaps=True
))
fig.add_trace(go.Scatter(
    x=x,
    y=acc,
    name='Train Accuracy',
))

fig.add_trace(go.Scatter(
    x=x,
    y=valid_losses,
    name = 'Valid Loss',
    connectgaps=True
))
fig.add_trace(go.Scatter(
    x=x,
    y=valid_acc,
    name='Valid Accuracy',
))

fig.show()

## Test model

In [8]:
def test_model(model, loader):
    """
    The testing procedure of the model; it accepts the testing data and the trained model and 
    then tests the model on it.

    input: model: torch.nn.Module, the trained model
           loader: torch.data.util.DataLoader, the object containing the testing data
        
    output: None, the function saves the predictions to a results.txt file
    """
    model.eval()
    predictions = []
    # Iterate over the test data
    with torch.no_grad(): # We don't need to compute gradients for testing
        for [x_batch] in loader:
            x_batch = x_batch.to(device)
            predicted = model(x_batch)
            predicted = predicted.argmax(dim=1, keepdim=True).squeeze().cpu().numpy()
            predicted[predicted >= 0.5] = 1
            predicted[predicted < 0.5] = 0
            predictions.append(predicted)
        predictions = np.hstack(predictions)
    np.savetxt("results.txt", predictions, fmt='%i')
    
test_model(model, test_loader)
print("Results saved to results.txt")

Results saved to results.txt
