In [2]:
from src.model import TripletNetwork, FasterRCNNEmbedder
from src.data import *
from src.transforms import albumentations_transform

from torch.nn import TripletMarginLoss
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger, CSVLogger

# Initialize feature extractor, model, loss, optimizer, lr_scheduler

model = FasterRCNNEmbedder()
loss = TripletMarginLoss(margin=1.0, p=2)
optimizer = Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
lr_sceduler = CosineAnnealingLR(optimizer, T_max=10, eta_min=0.0001, last_epoch=-1)

# initialize TripletNetwork for training
network = TripletNetwork(model,
                          loss,
                          optimizer,
                          lr_sceduler)

# initialize datamodule

dm = TripletDataModule(data_dir='/home/georg/projects/university/C5/task3/dataset/COCO',
                          json_file='/home/georg/projects/university/C5/task3/dataset/COCO/mcv_image_retrieval_annotations.json',
                          batch_size=96,
                          #transforms=albumentations_transform(),
                          num_workers=16,
                          dims=(224, 224))

# Initialize callbacks 
checkpointer = ModelCheckpoint(
    monitor="val_loss", save_top_k=1, mode="min", save_weights_only=True)
early_stopper = EarlyStopping(monitor="val_loss", patience=3, mode="min")
logger = CSVLogger("logs", name="TripletNetworkCSV")



In [3]:
# Train the network

trainer = pl.Trainer(max_epochs=20, 
                    devices=1,
                    accelerator='gpu',
                    callbacks=[checkpointer, early_stopper],
                    logger=logger,
                    num_sanity_val_steps=0) 
trainer.fit(network, dm)


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params
-------------------------------------------------
0 | model     | FasterRCNNEmbedder | 44.8 M
1 | criterion | TripletMarginLoss  | 0     
-------------------------------------------------
44.5 M    Trainable params
225 K     Non-trainable params
44.8 M    Total params
179.056   Total estimated model params size (MB)
Preparing train data: 100%|██████████| 80/80 [01:06<00:00,  1.21it/s]
Preparing

Epoch 11: 100%|██████████| 855/855 [07:46<00:00,  1.83it/s, v_num=19, train_loss_step=0.195, val_loss_step=0.951, val_loss_epoch=0.682, train_loss_epoch=0.200] 


In [3]:
# define helper functions to order the data 

def get_img_file_name(img_id, set):
    return 'COCO_{}2014_{:012d}.jpg'.format(set, img_id)

def prepare_data(json_file, mode):
        with open(json_file, 'r') as file:
            # Load the JSON data
            data = json.load(file)[mode]
        print(f'Loaded {len(data)} classes from {json_file}')
        img_ids = []
        labels = []
        # loop over classes 
        for key in tqdm(data.keys(), desc=f'Preparing {mode} data'):
            class_ = key
            images_with_class = data[key]
            # loop over images with the class
            for image_id in images_with_class:
                # if it's a new image, add it to the list of images and create a label list for it
                if image_id not in img_ids:
                    img_ids.append(image_id)
                    labels.append([])
            # loop over images and add the class to the label list if it's in the list of images
            for i, img_id in enumerate(img_ids):
                if img_id in images_with_class:
                    labels[i].append(int(class_))

        data_split = 'train' if mode in ['train', 'database'] else 'val'

        img_files = [get_img_file_name(img_id, data_split) for img_id in img_ids]
        return img_files, labels

In [4]:
import torch 

#load model from checkpoint and set to eval mode
model.load_state_dict(torch.load('/home/georg/projects/university/C5/task3/task_3e/logs/TripletNetworkCSV/version_19/checkpoints/epoch=8-step=7695.ckpt'), strict=False)
model.eval()

# specify json file path
data_json = '/home/georg/projects/university/C5/task3/dataset/COCO/mcv_image_retrieval_annotations.json'

In [12]:
import json
from tqdm import tqdm
import os 
from PIL import Image
from src.transforms import preprocess
import numpy as np

# define helper functions to extract embeddings from images using the model
def extract_embeddings(img_files, imgs_path, model):
    embeddings = []
    for img_file in tqdm(img_files):
        img_path = os.path.join(imgs_path, img_file)
        image = Image.open(img_path).convert('RGB')
        image = preprocess([224,224])(image)
        image = image.unsqueeze(0)
        pred = model(image)
        embeddings.append(pred.squeeze(0).cpu().detach().numpy())
    return np.array(embeddings)


# extract embeddings from the training images
train_imgs_path = '/home/georg/projects/university/C5/task3/dataset/COCO/train2014'
train_img_files, train_labels = prepare_data(json_file=data_json, mode='database')
train_embeddings = extract_embeddings(train_img_files, train_imgs_path, model)

Loaded 80 classes from /home/georg/projects/university/C5/task3/dataset/COCO/mcv_image_retrieval_annotations.json


Preparing database data: 100%|██████████| 80/80 [00:00<00:00, 3512.41it/s]
  0%|          | 0/1959 [00:00<?, ?it/s]

100%|██████████| 1959/1959 [01:26<00:00, 22.61it/s]


In [13]:
# Create FAISS index and add the training embeddings to it
import faiss  
             
index = faiss.IndexFlatL2(1024)   # build the index, d=size of vectors 
faiss.normalize_L2(train_embeddings)
print(train_embeddings.shape)
index.add(train_embeddings)                 # add vectors to the index
print(index.ntotal)


(1959, 1024)
1959


In [14]:
# Extract embeddings from the test/val images (can be configured using 'mode')

val_imgs_path = '/home/georg/projects/university/C5/task3/dataset/COCO/val2014'
val_img_files, val_labels = prepare_data(json_file=data_json, mode='test')
val_embeddings = extract_embeddings(val_img_files, val_imgs_path, model)

# Search for similar vectors k in the FAISS index
k = 5                       # we want 4 similar vectors
D, I = index.search(val_embeddings, k)     # actual search
print(I)

Loaded 80 classes from /home/georg/projects/university/C5/task3/dataset/COCO/mcv_image_retrieval_annotations.json


Preparing test data:   0%|          | 0/80 [00:00<?, ?it/s]

Preparing test data: 100%|██████████| 80/80 [00:00<00:00, 4848.49it/s]
100%|██████████| 1917/1917 [01:21<00:00, 23.47it/s]


[[  25   26   47   43   90]
 [1125   38 1118 1117 1122]
 [ 386  836  777  371  778]
 ...
 [ 400 1484 1535 1374 1722]
 [ 403 1656 1281 1892  670]
 [ 854 1506 1079  570  443]]


In [15]:
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score


# Calculate precision, recall, f1, accuracy, mAP @k=1
targets = val_labels
preds = []

preds = [train_labels[i[0]] for i in I]

targets = [[1 if i in target else 0 for i in range(80)] for target in targets]
preds = [[1 if i in pred else 0 for i in range(80)] for pred in preds]

precision, recall, f1, _ = precision_recall_fscore_support(targets, preds, average='micro')
accuracy = accuracy_score(targets, preds)
print(f'Precision: {precision}, \nRecall: {recall}, \nF1: {f1}, \nAccuracy: {accuracy}')

# get mean average precision
from sklearn.metrics import average_precision_score
average_precision = average_precision_score(targets, preds, average='micro')
print(f'Average precision: {average_precision}')

Precision: 0.2465753424657534, 
Recall: 0.2398609501738123, 
F1: 0.2431718061674009, 
Accuracy: 0.25508607198748046
Average precision: 0.06769882984041536


In [16]:
from sklearn.metrics import recall_score, precision_score

# Calculate precision, recall, f1, accuracy, mAP @k=5

preds = []
targets = []
def get_predicted_classes(indices):
    classes = []
    for index in indices:
        classes.extend(train_labels[index])
    classes = list(set(classes))
    return classes

for pred, target in zip(I, val_labels):
    predicted_classes = get_predicted_classes(pred)
    target_classes = target
    predicted_classes = [1 if i in predicted_classes else 0 for i in range(80)]
    target_classes = [1 if i in target_classes else 0 for i in range(80)]
    preds.append(predicted_classes)
    targets.append(target_classes)
    

precision = precision_score(targets, preds, average='micro' )
recall = recall_score(targets, preds, average='micro')
accuracy = accuracy_score(targets, preds)
f1 = 2 * (precision * recall) / (precision + recall)
print(f'Precision: {precision}, \nRecall: {recall}, \nAccuracy: {accuracy}, \nF1: {f1}')
average_precision = average_precision_score(targets, preds, average='micro')
print(f'Average precision: {average_precision}')

Precision: 0.13432369038311182, 
Recall: 0.4976825028968714, 
Accuracy: 0.048513302034428794, 
F1: 0.21155030168698438
Average precision: 0.07250391506044937
