In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import pickle


In [None]:
# function for loading the training data:

def load_data(file):
    """
    This function loads the data from the specified pickle file and returns a dictionary with the data
    :param filename: the pickle file
    :return: dict with data -- keys and values differ for the train data and test data for each task.
     Please see the cells with example code below for explanations and examples of the data structure per data set.
    """
    with open(file, 'rb') as f:
        data_dict = pickle.load(f)
    return data_dict

In [None]:
train_data = load_data('/kaggle/input/data-deep/data_A1_2AMM10_2023_2024/train_data.pkl')

In [None]:
type(train_data)

In [None]:
# the structure of the training data is a dict, where the keys are strings indicating the alphabet.
# The values are again dicts, with the keys being the character and the values being a list of images of that character.

# see the code below for examples of working with the train data

alphabets = list(train_data.keys())


print('example alphabet names:', alphabets[:5])
print('\n')
print('how to get an example image for a specific character:')

alphabet_id = 4
alphabet = alphabets[alphabet_id]  # a dict
characters_for_this_alphabet = list(train_data[alphabet].keys())
character_id = 5
character = characters_for_this_alphabet[character_id]
image_id = 2

print(f'shape of image {image_id} of character {character} of alphabet {alphabet}:', train_data[alphabet][character][image_id].shape)

In [None]:
print("Loaded training data keys:", train_data.keys())

In [None]:
list(train_data.values())[2].keys()

In [None]:
np.array(list(list(train_data.values())[2].values())[0]).shape

In [None]:
# function for plotting some examples:

def plot_example_data(data_dict):
    """
    This function plots some examples of the data
    :param data_dict: dict with as keys a string specifying the alphabet, and as values a dict with as keys the character of the alphabet, and as values a list om images of the alphabet
    """
    fig, axs = plt.subplots(2, 5, figsize=(15, 6))
    alphabets_to_plot = np.random.choice(list(data_dict.keys()), size=10, replace=False)
    
    for i, alphabet in enumerate(alphabets_to_plot):
        characters = data_dict[alphabet]
        character_to_plot = np.random.choice(list(characters.keys()), size=1)[0]
        images = characters[character_to_plot]
        im_idx = np.random.choice(len(images), size=1)[0]
        axs[i//5, i%5].imshow(images[im_idx].permute(1, 2, 0))
        axs[i//5, i%5].set_title(alphabet + '\n' + character_to_plot, fontsize=8)
        axs[i//5, i%5].axis('off')
    # plt.show()


In [None]:
plt.figure()
plot_example_data(train_data)
# plt.savefig('example_data.png', dpi=600)
plt.show()

# Task 1: character recognition

In [None]:
# our solution:

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

In [None]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import random
import torchvision.transforms as transforms
import numpy as np

# Data Transforms
data_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

def create_character_dataset(data_dict):
    dataset = []
    total_chars_so_far = 0
    for alphabet, images in data_dict.items():
        chars = list(images.keys())
        for char, img_list in images.items():
            label = total_chars_so_far + chars.index(char)
            for img in img_list:
                dataset.append((img, label))
        total_chars_so_far += len(chars)
    return dataset

class TripletCharacterDataset(Dataset):
    def __init__(self, character_dataset, transform=None):
        self.data = character_dataset
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_anchor, label_anchor = self.data[idx]
        
        # Select a positive pair
        while True:
            img_positive, label_positive = random.choice(self.data)
            if label_anchor == label_positive:
                break
        
        # Select a negative pair
        while True:
            img_negative, label_negative = random.choice(self.data)
            if label_anchor != label_negative:
                break
        
        if self.transform:
            img_anchor = self.transform(img_anchor)
            img_positive = self.transform(img_positive)
            img_negative = self.transform(img_negative)
            
        return img_anchor, img_positive, img_negative

# Prepare the training dataset and dataloader
train_dataset = TripletCharacterDataset(create_character_dataset(train_data), transform=data_transforms)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Inspect the dataset
for img_anchor, img_positive, img_negative in train_loader:
    print("Batch of anchor images shape:", img_anchor.shape)
    print("Batch of positive images shape:", img_positive.shape)
    print("Batch of negative images shape:", img_negative.shape)
    break


In [None]:
def compute_top1_accuracy(model, annotated_images, annotated_labels, unseen_images, unseen_labels):
    model.eval()
    correct = 0
    with torch.no_grad():
        annotated_embeddings = model(annotated_images.to(device)).cpu().numpy()
        unseen_embeddings = model(unseen_images.to(device)).cpu().numpy()
        for i, unseen_embedding in enumerate(unseen_embeddings):
            distances = np.linalg.norm(annotated_embeddings - unseen_embedding, axis=1)
            top1_idx = np.argmin(distances)
            if annotated_labels[top1_idx] == unseen_labels[i]:
                correct += 1
    return correct / len(unseen_labels)

data_dict_test = load_data('/kaggle/input/data-deep/data_A1_2AMM10_2023_2024/test_data_task1.pkl')
test_alphabet = 'Mongolian'
annotated_images = torch.stack([data_transforms(img) for img in data_dict_test['annotated_images'][test_alphabet]])
annotated_labels = data_dict_test['annotated_images_labels'][test_alphabet]
unseen_images = torch.stack([data_transforms(img) for img in data_dict_test['unseen_images'][test_alphabet]])
unseen_labels = data_dict_test['unseen_images_labels'][test_alphabet]

# Encode labels
label_encoder = LabelEncoder()
encoded_annotated_labels = label_encoder.fit_transform(annotated_labels)
encoded_unseen_labels = label_encoder.transform(unseen_labels)

# Convert to tensor
annotated_labels_tensor = torch.tensor(encoded_annotated_labels, dtype=torch.long)
unseen_labels_tensor = torch.tensor(encoded_unseen_labels, dtype=torch.long)

# Prepare test data for evaluation
test_data = (annotated_images, annotated_labels_tensor, unseen_images, unseen_labels_tensor)


In [None]:
def initialize_weights(m):
    if isinstance(m, nn.Conv2d):
        nn.init.uniform_(m.weight.data)
        if m.bias is not None:
            nn.init.uniform_(m.bias.data)
    elif isinstance(m, nn.Linear):
        nn.init.uniform_(m.weight.data)
        nn.init.uniform_(m.bias.data)

# Define the Network
class CharacterNetwork(nn.Module):
    def __init__(self):
        super(CharacterNetwork, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(128, 256, 3, padding=1),
            nn.ReLU(),
#             nn.MaxPool2d(2),
#             nn.Conv2d(256, 512, 3, padding=1),
#             nn.ReLU(),
            nn.AdaptiveAvgPool2d(1)
        )
        self.fc = nn.Sequential(
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, 32)  # Embedding size
        )
    
    def forward(self, x):
        x = self.cnn(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

# Triplet Loss
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
    
    def forward(self, anchor, positive, negative):
        positive_distance = F.pairwise_distance(anchor, positive)
        negative_distance = F.pairwise_distance(anchor, negative)
        loss = torch.mean(F.relu(positive_distance - negative_distance + self.margin))
        return loss

# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CharacterNetwork().to(device)
model.apply(initialize_weights)

criterion = TripletLoss(margin=1.0)
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

# Function for training the model
def train_triplet(model, train_loader, criterion, optimizer, scheduler, num_epochs=10, test_data=None):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch')
        if epoch == int(num_epochs * 2 / 3):
            train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
        for img_anchor, img_positive, img_negative in progress_bar:
            img_anchor, img_positive, img_negative = img_anchor.to(device), img_positive.to(device), img_negative.to(device)
            
            optimizer.zero_grad()
            output_anchor = model(img_anchor)
            output_positive = model(img_positive)
            output_negative = model(img_negative)
            loss = criterion(output_anchor, output_positive, output_negative)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * img_anchor.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
        scheduler.step(epoch_loss)
        if test_data:
            annotated_images, annotated_labels, unseen_images, unseen_labels = test_data
            top1_accuracy = compute_top1_accuracy(model, annotated_images, annotated_labels, unseen_images, unseen_labels)
            print(f'Top-1 Accuracy: {top1_accuracy:.4f}')

        for param_group in optimizer.param_groups:
            print(f'Learning Rate: {param_group["lr"]}')

train_triplet(model, train_loader, criterion, optimizer, scheduler, num_epochs=25, test_data=test_data)


In [None]:
def compute_topk_accuracy(model, annotated_images, annotated_labels, unseen_images, unseen_labels, k=1):
    model.eval()
    correct = 0
    with torch.no_grad():
        annotated_embeddings = model(annotated_images.to(device)).cpu().numpy()
        unseen_embeddings = model(unseen_images.to(device)).cpu().numpy()
        for i, unseen_embedding in enumerate(unseen_embeddings):
            distances = np.linalg.norm(annotated_embeddings - unseen_embedding, axis=1)
            topk_idx = np.argsort(distances)[:k]
            if unseen_labels[i] in annotated_labels[topk_idx]:
                correct += 1
    return correct / len(unseen_labels)

# Load the test data
data_dict_test = load_data('/kaggle/input/data-deep/data_A1_2AMM10_2023_2024/test_data_task1.pkl')
test_data = []
for alphabet in data_dict_test['annotated_images'].keys():
    annotated_images = torch.stack([data_transforms(img) for img in data_dict_test['annotated_images'][alphabet]])
    annotated_labels = data_dict_test['annotated_images_labels'][alphabet]
    unseen_images = torch.stack([data_transforms(img) for img in data_dict_test['unseen_images'][alphabet]])
    unseen_labels = data_dict_test['unseen_images_labels'][alphabet]

    # Encode labels
    label_encoder = LabelEncoder()
    encoded_annotated_labels = label_encoder.fit_transform(annotated_labels)
    encoded_unseen_labels = label_encoder.transform(unseen_labels)

    # Convert to tensor
    annotated_labels_tensor = torch.tensor(encoded_annotated_labels, dtype=torch.long)
    unseen_labels_tensor = torch.tensor(encoded_unseen_labels, dtype=torch.long)

    # Prepare test data for evaluation
    test_data.append((annotated_images, annotated_labels_tensor, unseen_images, unseen_labels_tensor))

k_values = [1, 2, 4, 8]
accuracies = {k: [] for k in k_values}

for data in test_data:
    for k in k_values:
        accuracies[k].append(compute_topk_accuracy(model, data[0], data[1], data[2], data[3], k=k))

for k in k_values:
    print(f'Mean Top-{k} Accuracy: {np.mean(accuracies[k]):.4f}')

In [None]:
# example: let's get some annotated images and their labels for an alphabet in the test data:

alphabets_test = list(data_dict_test['annotated_images'].keys())
alphabet_id = np.random.randint(0, len(alphabets_test))
alphabet = alphabets_test[alphabet_id]

alphabet_annotated = data_dict_test['annotated_images'][alphabet]  # a tensor of shape (num_images, 1, height, width)
print(f'Shape of {alphabet} annotated images:', alphabet_annotated.shape)

alphabet_annotated_labels = data_dict_test['annotated_images_labels'][alphabet]  # a list of length num_images
print(f'Number of {alphabet} annotated labels:', len(alphabet_annotated_labels))  # equals num_images

alphabet_unseen = data_dict_test['unseen_images'][alphabet]  # a tensor of shape (num_images, 1, height, width)
print(f'Shape of {alphabet} unseen images:', alphabet_unseen.shape)

alphabet_unseen_labels = data_dict_test['unseen_images_labels'][alphabet]  # a list of length num_images
print(f'Number of {alphabet} unseen labels: {len(alphabet_unseen_labels)}. Use the unseen labels only for evaluating your model!')  # equals num_images

# Task 2: rotation problem

In [None]:
# function for plotting some examples:

def plot_example_data(data_dict):
    """
    This function plots some examples of the data
    :param data_dict: dict with as keys a string specifying the alphabet, and as values a dict with as keys the character of the alphabet, and as values a list om images of the alphabet
    """
    fig, axs = plt.subplots(2, 5, figsize=(15, 6))
    alphabets_to_plot = list(data_dict.keys())[:5]
    
    for i, alphabet in enumerate(alphabets_to_plot):
        characters = data_dict[alphabet]
#         character_to_plot = characters[:5]
        images = characters[:5]
        im_idx = np.random.choice(len(images), size=1)[0]
        axs[i//5, i%5].imshow(images[0].permute(1, 2, 0))
#         axs[i//5, i%5].set_title(alphabet + '\n' + character_to_plot, fontsize=8)
        axs[i//5, i%5].axis('off')
    # plt.show()


In [None]:
# load the test data for task 2:
# the structure of the test data of task 2 is exactly the same as for task 1,
# but now the images are rotated by an unknown angle between 0 and 360 degrees.
# data_dict_test_task1 = load_data('/kaggle/input/data-deep/data_A1_2AMM10_2023_2024/test_data_task1.pkl')
data_dict_test_task2 = load_data('/kaggle/input/data-deep/data_A1_2AMM10_2023_2024/test_data_task2.pkl')

# plot_example_data(data_dict_test_task1['annotated_images'])
plot_example_data(data_dict_test_task2['annotated_images'])

In [None]:
data_dict_test_task2['annotated_images'].keys()

In [None]:
def compute_topk_accuracy(model, annotated_images, annotated_labels, unseen_images, unseen_labels, k=1):
    model.eval()
    correct = 0
    with torch.no_grad():
        annotated_embeddings = model(annotated_images.to(device)).cpu().numpy()
        unseen_embeddings = model(unseen_images.to(device)).cpu().numpy()
        for i, unseen_embedding in enumerate(unseen_embeddings):
            distances = np.linalg.norm(annotated_embeddings - unseen_embedding, axis=1)
            topk_idx = np.argsort(distances)[:k]
            if unseen_labels[i] in annotated_labels[topk_idx]:
                correct += 1
    return correct / len(unseen_labels)

# Load the test data
data_dict_test = load_data('/kaggle/input/data-deep/data_A1_2AMM10_2023_2024/test_data_task2.pkl')
test_data = []
for alphabet in data_dict_test['annotated_images'].keys():
    annotated_images = torch.stack([data_transforms(img) for img in data_dict_test['annotated_images'][alphabet]])
    annotated_labels = data_dict_test['annotated_images_labels'][alphabet]
    unseen_images = torch.stack([data_transforms(img) for img in data_dict_test['unseen_images'][alphabet]])
    unseen_labels = data_dict_test['unseen_images_labels'][alphabet]

    # Encode labels
    label_encoder = LabelEncoder()
    encoded_annotated_labels = label_encoder.fit_transform(annotated_labels)
    encoded_unseen_labels = label_encoder.transform(unseen_labels)

    # Convert to tensor
    annotated_labels_tensor = torch.tensor(encoded_annotated_labels, dtype=torch.long)
    unseen_labels_tensor = torch.tensor(encoded_unseen_labels, dtype=torch.long)

    # Prepare test data for evaluation
    test_data.append((annotated_images, annotated_labels_tensor, unseen_images, unseen_labels_tensor))

k_values = [1, 2, 4, 8]
accuracies = {k: [] for k in k_values}

for data in test_data:
    for k in k_values:
        accuracies[k].append(compute_topk_accuracy(model, data[0], data[1], data[2], data[3], k=k))

for k in k_values:
    print(f'Mean Top-{k} Accuracy: {np.mean(accuracies[k]):.4f}')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import random
import torchvision.transforms as transforms
import numpy as np

# Data Transforms with Rotation Only
data_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(90),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

def create_character_dataset(data_dict):
    dataset = []
    total_chars_so_far = 0
    for alphabet, images in data_dict.items():
        chars = list(images.keys())
        for char, img_list in images.items():
            label = total_chars_so_far + chars.index(char)
            for img in img_list:
                dataset.append((img, label))
        total_chars_so_far += len(chars)
    return dataset

class TripletCharacterDataset(Dataset):
    def __init__(self, character_dataset, transform=None):
        self.data = character_dataset
        self.transform = transform
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img_anchor, label_anchor = self.data[idx]
        
        # Select a positive pair
        while True:
            img_positive, label_positive = random.choice(self.data)
            if label_anchor == label_positive:
                break
        
        # Select a negative pair
        while True:
            img_negative, label_negative = random.choice(self.data)
            if label_anchor != label_negative:
                break
        
        if self.transform:
            img_anchor = self.transform(img_anchor)
            img_positive = self.transform(img_positive)
            img_negative = self.transform(img_negative)
        
        return img_anchor, img_positive, img_negative

# Prepare the training dataset and dataloader
train_dataset = TripletCharacterDataset(create_character_dataset(train_data), transform=data_transforms)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Inspect the dataset
for img_anchor, img_positive, img_negative in train_loader:
    print("Batch of anchor images shape:", img_anchor.shape)
    print("Batch of positive images shape:", img_positive.shape)
    print("Batch of negative images shape:", img_negative.shape)
    break


In [None]:
def compute_top1_accuracy(model, annotated_images, annotated_labels, unseen_images, unseen_labels):
    model.eval()
    correct = 0
    with torch.no_grad():
        annotated_embeddings = model(annotated_images.to(device)).cpu().numpy()
        unseen_embeddings = model(unseen_images.to(device)).cpu().numpy()
        for i, unseen_embedding in enumerate(unseen_embeddings):
            distances = np.linalg.norm(annotated_embeddings - unseen_embedding, axis=1)
            top1_idx = np.argmin(distances)
            if annotated_labels[top1_idx] == unseen_labels[i]:
                correct += 1
    return correct / len(unseen_labels)

data_dict_test = load_data('/kaggle/input/data-deep/data_A1_2AMM10_2023_2024/test_data_task2.pkl')
test_alphabet = 'Mongolian'
annotated_images = torch.stack([data_transforms(img) for img in data_dict_test['annotated_images'][test_alphabet]])
annotated_labels = data_dict_test['annotated_images_labels'][test_alphabet]
unseen_images = torch.stack([data_transforms(img) for img in data_dict_test['unseen_images'][test_alphabet]])
unseen_labels = data_dict_test['unseen_images_labels'][test_alphabet]

# Encode labels
label_encoder = LabelEncoder()
encoded_annotated_labels = label_encoder.fit_transform(annotated_labels)
encoded_unseen_labels = label_encoder.transform(unseen_labels)

# Convert to tensor
annotated_labels_tensor = torch.tensor(encoded_annotated_labels, dtype=torch.long)
unseen_labels_tensor = torch.tensor(encoded_unseen_labels, dtype=torch.long)

# Prepare test data for evaluation
valid_data = (annotated_images, annotated_labels_tensor, unseen_images, unseen_labels_tensor)


In [None]:
def initialize_weights(m):
    if isinstance(m, nn.Conv2d):
        nn.init.uniform_(m.weight.data)
        if m.bias is not None:
            nn.init.uniform_(m.bias.data)
    elif isinstance(m, nn.Linear):
        nn.init.uniform_(m.weight.data)
        nn.init.uniform_(m.bias.data)

# Define the Network
class CharacterNetwork(nn.Module):
    def __init__(self):
        super(CharacterNetwork, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=5, padding=0),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.ZeroPad2d(1),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 256, kernel_size=5, padding=0),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.3),

            nn.Conv2d(256, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 256, kernel_size=3, padding=0),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.3),

            nn.Conv2d(256, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 512, kernel_size=3, padding=0),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.3),

            nn.AdaptiveAvgPool2d((1, 1)),

            nn.Flatten(),
            nn.Linear(512, 32),
#             nn.BatchNorm1d(32),
#             nn.Lambda(lambda x: F.normalize(x, p=2, dim=1))
        )
            
    def forward(self, x):
        x = self.cnn(x)
        return x

# Triplet Loss
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
    
    def forward(self, anchor, positive, negative):
        positive_distance = F.pairwise_distance(anchor, positive)
        negative_distance = F.pairwise_distance(anchor, negative)
        loss = torch.mean(F.relu(positive_distance - negative_distance + self.margin))
        return loss

# Initialize the model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CharacterNetwork().to(device)
model.apply(initialize_weights)

criterion = TripletLoss(margin=0.5)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=4, verbose=True)

# Function for training the model
def train_triplet(model, train_loader, criterion, optimizer, scheduler, num_epochs=10, test_data=None):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}', unit='batch')

        for img_anchor, img_positive, img_negative in progress_bar:
            img_anchor, img_positive, img_negative = img_anchor.to(device), img_positive.to(device), img_negative.to(device)
            
            optimizer.zero_grad()
            output_anchor = model(img_anchor)
            output_positive = model(img_positive)
            output_negative = model(img_negative)
            loss = criterion(output_anchor, output_positive, output_negative)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * img_anchor.size(0)
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')
        scheduler.step(epoch_loss)
        if test_data:
            annotated_images, annotated_labels, unseen_images, unseen_labels = test_data
            top1_accuracy = compute_top1_accuracy(model, annotated_images, annotated_labels, unseen_images, unseen_labels)
            print(f'Top-1 Accuracy: {top1_accuracy:.4f}')

        for param_group in optimizer.param_groups:
            print(f'Learning Rate: {param_group["lr"]}')

train_triplet(model, train_loader, criterion, optimizer, scheduler, num_epochs=40, test_data=valid_data)


In [None]:
data_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [None]:

def compute_topk_accuracy(model, annotated_images, annotated_labels, unseen_images, unseen_labels, k=1):
    model.eval()
    correct = 0
    with torch.no_grad():
        annotated_embeddings = model(annotated_images.to(device)).cpu().numpy()
        unseen_embeddings = model(unseen_images.to(device)).cpu().numpy()
        for i, unseen_embedding in enumerate(unseen_embeddings):
            distances = np.linalg.norm(annotated_embeddings - unseen_embedding, axis=1)
            topk_idx = np.argsort(distances)[:k]
            if unseen_labels[i] in annotated_labels[topk_idx]:
                correct += 1
    return correct / len(unseen_labels)

# Load the test data
data_dict_test = load_data('/kaggle/input/data-deep/data_A1_2AMM10_2023_2024/test_data_task2.pkl')
test_data = []
for alphabet in data_dict_test['annotated_images'].keys():
    annotated_images = torch.stack([data_transforms(img) for img in data_dict_test['annotated_images'][alphabet]])
    annotated_labels = data_dict_test['annotated_images_labels'][alphabet]
    unseen_images = torch.stack([data_transforms(img) for img in data_dict_test['unseen_images'][alphabet]])
    unseen_labels = data_dict_test['unseen_images_labels'][alphabet]

    # Encode labels
    label_encoder = LabelEncoder()
    encoded_annotated_labels = label_encoder.fit_transform(annotated_labels)
    encoded_unseen_labels = label_encoder.transform(unseen_labels)

    # Convert to tensor
    annotated_labels_tensor = torch.tensor(encoded_annotated_labels, dtype=torch.long)
    unseen_labels_tensor = torch.tensor(encoded_unseen_labels, dtype=torch.long)

    # Prepare test data for evaluation
    test_data.append((annotated_images, annotated_labels_tensor, unseen_images, unseen_labels_tensor))

k_values = [1, 2, 4, 8]
accuracies = {k: [] for k in k_values}

for data in test_data:
    for k in k_values:
        accuracies[k].append(compute_topk_accuracy(model, data[0], data[1], data[2], data[3], k=k))

for k in k_values:
    print(f'Mean Top-{k} Accuracy: {np.mean(accuracies[k]):.4f}')

In [None]:
def compute_topk_accuracy(model, annotated_images, annotated_labels, unseen_images, unseen_labels, k=1):
    model.eval()
    correct = 0
    with torch.no_grad():
        annotated_embeddings = model(annotated_images.to(device)).cpu().numpy()
        unseen_embeddings = model(unseen_images.to(device)).cpu().numpy()
        for i, unseen_embedding in enumerate(unseen_embeddings):
            distances = np.linalg.norm(annotated_embeddings - unseen_embedding, axis=1)
            topk_idx = np.argsort(distances)[:k]
            if unseen_labels[i] in annotated_labels[topk_idx]:
                correct += 1
    return correct / len(unseen_labels)

# Load the test data
data_dict_test = load_data('/kaggle/input/data-deep/data_A1_2AMM10_2023_2024/test_data_task1.pkl')
test_data = []
for alphabet in data_dict_test['annotated_images'].keys():
    annotated_images = torch.stack([data_transforms(img) for img in data_dict_test['annotated_images'][alphabet]])
    annotated_labels = data_dict_test['annotated_images_labels'][alphabet]
    unseen_images = torch.stack([data_transforms(img) for img in data_dict_test['unseen_images'][alphabet]])
    unseen_labels = data_dict_test['unseen_images_labels'][alphabet]

    # Encode labels
    label_encoder = LabelEncoder()
    encoded_annotated_labels = label_encoder.fit_transform(annotated_labels)
    encoded_unseen_labels = label_encoder.transform(unseen_labels)

    # Convert to tensor
    annotated_labels_tensor = torch.tensor(encoded_annotated_labels, dtype=torch.long)
    unseen_labels_tensor = torch.tensor(encoded_unseen_labels, dtype=torch.long)

    # Prepare test data for evaluation
    test_data.append((annotated_images, annotated_labels_tensor, unseen_images, unseen_labels_tensor))

k_values = [1, 2, 4, 8]
accuracies = {k: [] for k in k_values}

for data in test_data:
    for k in k_values:
        accuracies[k].append(compute_topk_accuracy(model, data[0], data[1], data[2], data[3], k=k))

for k in k_values:
    print(f'Mean Top-{k} Accuracy: {np.mean(accuracies[k]):.4f}')

# Task 3: Domain knowledge injection

In [None]:
# load the test data for task 3:
# the structure of the data of task 3 is exactly the same as for task 1, but now our the loaded dictionary contains some additional keys.
# These additional keys will be explained in the cells below:

data_dict_test_task3 = load_data('/kaggle/input/data-deep/data_A1_2AMM10_2023_2024/test_data_task3.pkl')
print(data_dict_test_task3.keys())

In [None]:
data_dict_test_task3['unseen_images_preceding_types']['Angelic'][:10]

In [None]:
data_dict_test_task3['character_to_type_mapping']['Angelic']

In [None]:
data_dict_test_task3['type_following_probs']['Angelic']

In [None]:
# The keys 'annotated_images', 'annotated_images_labels', 'unseen_images', 'unseen_images_labels' are the same as for task 1, and the structure of the data is exactly the same. 

# The key 'unseen_images_preceding_types' maps to the type of the preceding character in the sequence where the unseen image was observed, for each alphabet.
# The key 'character_to_type_mapping' maps to the mapping of each character to its type, for each alphabet.
# The key 'type_following_probs' maps to the probabilities of each character type being followed by another character type, for each alphabet.

In [None]:
# examples:

alphabet = np.random.choice(list(data_dict_test_task3['unseen_images_preceding_types'].keys()))
print(f'Alphabet: {alphabet}')


preceding_character_types_alphabet = data_dict_test_task3["unseen_images_preceding_types"][alphabet]  # a list
print(f'Some character types that preceded unseen images from the {alphabet} alphabet: {np.random.choice(preceding_character_types_alphabet, size=5)}')
print(f'There are {len(preceding_character_types_alphabet)} preceding character types in the {alphabet} alphabet, and {len(data_dict_test_task3["unseen_images"][alphabet])} unseen images.')


character_to_type_mapping_alphabet = data_dict_test_task3["character_to_type_mapping"][alphabet]  
# this is a dict, with as keys the characters and as values the types
random_character = np.random.choice(list(character_to_type_mapping_alphabet.keys()))
print(f'Type of {random_character} from the {alphabet} alphabet: {character_to_type_mapping_alphabet[random_character]}')



type_following_probs_alphabet = data_dict_test_task3["type_following_probs"][alphabet]  # a dict of dicts
preceding_type = np.random.choice(list(type_following_probs_alphabet.keys()))
following_type = np.random.choice(list(type_following_probs_alphabet[preceding_type].keys()))
print(f'Probability of a character of type {following_type} following a character of type {preceding_type} in the {alphabet} alphabet: {type_following_probs_alphabet[preceding_type][following_type]}')


In [None]:
data_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
def compute_topk_accuracy(model, annotated_images, annotated_labels, unseen_images, unseen_labels, k=1):
    model.eval()
    correct = 0
    with torch.no_grad():
        annotated_embeddings = model(annotated_images.to(device)).cpu().numpy()
        unseen_embeddings = model(unseen_images.to(device)).cpu().numpy()
        for i, unseen_embedding in enumerate(unseen_embeddings):
            distances = np.linalg.norm(annotated_embeddings - unseen_embedding, axis=1)
            topk_idx = np.argsort(distances)[:k]
            if unseen_labels[i] in annotated_labels[topk_idx]:
                correct += 1
    return correct / len(unseen_labels)

# Load the test data
data_dict_test = load_data('/kaggle/input/data-deep/data_A1_2AMM10_2023_2024/test_data_task3.pkl')
test_data = []
for alphabet in data_dict_test['annotated_images'].keys():
    annotated_images = torch.stack([data_transforms(img) for img in data_dict_test['annotated_images'][alphabet]])
    annotated_labels = data_dict_test['annotated_images_labels'][alphabet]
    unseen_images = torch.stack([data_transforms(img) for img in data_dict_test['unseen_images'][alphabet]])
    unseen_labels = data_dict_test['unseen_images_labels'][alphabet]

    # Encode labels
    label_encoder = LabelEncoder()
    encoded_annotated_labels = label_encoder.fit_transform(annotated_labels)
    encoded_unseen_labels = label_encoder.transform(unseen_labels)

    # Convert to tensor
    annotated_labels_tensor = torch.tensor(encoded_annotated_labels, dtype=torch.long)
    unseen_labels_tensor = torch.tensor(encoded_unseen_labels, dtype=torch.long)

    # Prepare test data for evaluation
    test_data.append((annotated_images, annotated_labels_tensor, unseen_images, unseen_labels_tensor))

k_values = [1, 2, 4, 8]
accuracies = {k: [] for k in k_values}

for data in test_data:
    for k in k_values:
        accuracies[k].append(compute_topk_accuracy(model, data[0], data[1], data[2], data[3], k=k))

for k in k_values:
    print(f'Mean Top-{k} Accuracy: {np.mean(accuracies[k]):.4f}')

In [None]:
from sklearn.metrics.pairwise import euclidean_distances

# Function to generate embeddings
def generate_embeddings(model, images, device):
    model.eval()
    with torch.no_grad():
        embeddings = model(images.to(device)).cpu().numpy()
    return embeddings

# Function to adjust distances based on type-following probabilities
def adjust_distances(distances, annotated_labels, unseen_types, character_to_type, type_following_probs, label_encoder):
    adjusted_distances = distances.copy()
    for i, unseen_type in enumerate(unseen_types):
        for j, annotated_label in enumerate(annotated_labels):
            character_label = label_encoder.inverse_transform([annotated_label])[0]
            annotated_type = character_to_type[character_label]
            prob = type_following_probs[unseen_type][annotated_type]
            adjusted_distances[i, j] *= (1 - prob)  # Adjusting distances inversely proportional to the probability
    
    return adjusted_distances

# Function to classify unseen images with type-following probabilities
def classify_unseen_images_with_probs(model, annotated_images, annotated_labels, unseen_images, unseen_types, character_to_type, type_following_probs, label_encoder, k=1):
    annotated_embeddings = generate_embeddings(model, annotated_images, device)
    unseen_embeddings = generate_embeddings(model, unseen_images, device)
    
    distances = euclidean_distances(unseen_embeddings, annotated_embeddings)
    adjusted_distances = adjust_distances(distances, annotated_labels, unseen_types, character_to_type, type_following_probs, label_encoder)
    
    top_k_indices = adjusted_distances.argsort(axis=1)[:, :k]
    top_k_predictions = np.array([[annotated_labels[idx] for idx in indices] for indices in top_k_indices])
    
    return top_k_predictions

# Function to calculate top-k accuracy
def top_k_accuracy(true_labels, top_k_predictions, k):
    correct = 0
    for true_label, pred_labels in zip(true_labels, top_k_predictions):
        if true_label in pred_labels[:k]:
            correct += 1
    return correct / len(true_labels)

# Load the test data for task 3
data_dict_test_task3 = load_data('/kaggle/input/data-deep/data_A1_2AMM10_2023_2024/test_data_task3.pkl')

# Initialize variables to store results
k_values = [1, 2, 4, 8]
top_k_accuracies = {k: [] for k in k_values}

# Loop through each alphabet in the test data
for alphabet in (data_dict_test_task3['annotated_images'].keys()):
    annotated_images = torch.stack([data_transforms(img) for img in data_dict_test_task3['annotated_images'][alphabet]])
    annotated_labels = data_dict_test_task3['annotated_images_labels'][alphabet]
    unseen_images = torch.stack([data_transforms(img) for img in data_dict_test_task3['unseen_images'][alphabet]])
    unseen_labels = data_dict_test_task3['unseen_images_labels'][alphabet]
    unseen_types = data_dict_test_task3['unseen_images_preceding_types'][alphabet]
    character_to_type = data_dict_test_task3['character_to_type_mapping'][alphabet]
    type_following_probs = data_dict_test_task3['type_following_probs'][alphabet]

    # Encode labels
    label_encoder = LabelEncoder()
    encoded_annotated_labels = label_encoder.fit_transform(annotated_labels)
    encoded_unseen_labels = label_encoder.transform(unseen_labels)

    # Convert to tensor
    annotated_labels_tensor = torch.tensor(encoded_annotated_labels, dtype=torch.long)
    unseen_labels_tensor = torch.tensor(encoded_unseen_labels, dtype=torch.long)

    # Prepare test data for evaluation
    for k in k_values:
        top_k_predictions = classify_unseen_images_with_probs(model, annotated_images, annotated_labels_tensor, unseen_images, unseen_types, character_to_type, type_following_probs, label_encoder, k=k)
        accuracy_k = top_k_accuracy(unseen_labels_tensor.numpy(), top_k_predictions, k=k)
        top_k_accuracies[k].append(accuracy_k)

# Compute mean top-k accuracy
for k in k_values:
    mean_accuracy_k = np.mean(top_k_accuracies[k])
    print(f'Mean Top-{k} Accuracy: {mean_accuracy_k:.4f}')


In [None]:
torch.save(model.state_dict(), 'model.pt')