# Contrastive Loss

In [3]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf

import matplotlib.pyplot as plt
from ipywidgets import interact

import tensorflow_addons as tfa
# from tensorflow_addons.losses import TripletSemiHardLoss, TripletHardLoss

from beeid.utils import sensitivity_map

from code.models import simple_cnnv2, ContrastiveLearning
from code.data_utils import load_tf_pair_dataset, load_tf_dataset
from code.viz import show_sensitivity_maps
from code.evaluation import cmc_evaluation, plot_cmc
from code.evaluation import get_interactive_plot_query_gallery

# IMAGE_FOLDER = "/mnt/storage/work/jchan/normalized_uncensored_dataset/images/"
# DATASET_CSV = "/mnt/storage/work/jchan/body_dataset2/dataset3.csv"

### Prepare dataset

Select the dates for Training, Validation and Testing datasets.

In [4]:
train_df = pd.read_csv("data/train.csv")
valid_df = pd.read_csv("data/valid.csv")
test_df = pd.read_csv("data/test.csv")

## Evaluation

In [None]:
def cmc_evaluation(model, df, iterations=100, gallery_size=10):
    """
    model: keras model
    df: a dataframe with the image to evaluate
    
    """
    cdf = df.copy()
    
    query_df = cdf.groupby("track_tag_id").filter(lambda x: len(x["global_track_id"].unique()) > 1)
    dfGroupedbyTagId = cdf.groupby("track_tag_id")
    
    ranks = np.zeros((iterations, gallery_size))

    for it in tqdm(range(iterations)):
        queries = query_df.groupby("track_tag_id").sample()
        queries_and_galleries = list()
        for i, query_data in queries.iterrows():
            query_gallery =  get_query_gallery(query_data, query_df, dfGroupedbyTagId, limit=gallery_size)
            queries_and_galleries.append(query_gallery)

        queries_and_galleries = np.array(queries_and_galleries).ravel()

        images = filename2image(queries_and_galleries)
        predictions = model.predict(images.batch(32))

        query_gallery_size = gallery_size + 2
        queries_emb = predictions[::query_gallery_size]

        pred_idx = np.arange(0, len(predictions))
        galleries_emb = predictions[np.mod(pred_idx, query_gallery_size) != 0]

        queries_emb = queries_emb.reshape(len(queries), 1, -1)
        galleries_emb = galleries_emb.reshape(len(queries), query_gallery_size - 1, -1 )

        # Calucluate distance
        cos_dist = tf.matmul(queries_emb, galleries_emb, transpose_b=True).numpy()
        euclid_dist = -(cos_dist - 1)

        # Calculate Rank
        r = np.argmin(np.argsort(euclid_dist), axis=2)
        r = np.squeeze(r)

        for i in range(gallery_size):
            ranks[it][i] = np.mean(r < (i + 1))
    return np.mean(ranks, axis=0)

In [None]:
ITERATIONS=100
GALLERY_SIZE=10

### Evaluation on ids shared with the training set (validation set)

In [None]:
train_ids = train_df.track_tag_id.unique()

valid_with_shared_ids = valid_df[valid_df.track_tag_id.isin(train_ids)]

valid_with_shared_ids_ranks_means = cmc_evaluation(model, valid_with_shared_ids, iterations=ITERATIONS, gallery_size=GALLERY_SIZE)

plot_cmc(valid_with_shared_ids_ranks_means)

### Evaluation on ids shared with the whole validation set

In [None]:
valid_ranks_means = cmc_evaluation(model, valid_df, iterations=ITERATIONS, gallery_size=GALLERY_SIZE)

plot_cmc(valid_ranks_means)

### Evaluation on test set

In [None]:
test_ranks_means = cmc_evaluation(model, test_df, iterations=ITERATIONS, gallery_size=GALLERY_SIZE)

plot_cmc(test_ranks_means)

In [None]:
train_ids = train_df.track_tag_id.unique()
test_ids = test_df.track_tag_id.unique()


intersection = set(train_ids) & set(test_ids)

print("Test set has {} Ids.".format(len(test_ids)))
print("Intersection of train and test set {}".format(len(intersection)))

In [None]:
test_disjoint_train = test_df[~test_df.track_tag_id.isin(train_ids)]

test_disjoint_train_ranks_means = cmc_evaluation(model, test_disjoint_train, iterations=ITERATIONS, gallery_size=GALLERY_SIZE)

plot_cmc(test_disjoint_train_ranks_means)

#### Saving results

In [None]:
metric_dict = dict()

metric_dict["valid_cmc_only_train_ids"] = valid_with_shared_ids_ranks_means 
metric_dict["valid_cmc"] = valid_ranks_means
metric_dict["test_cmc"] = test_ranks_means
metric_dict["test_cmc_no_ids_overlap"] = test_disjoint_train_ranks_means

metric_df = pd.DataFrame(metric_dict)
metric_df.to_csv("results/contrastive_lossT1.csv")

In [None]:
metric_df

#### Interactive Query Plotting

In [17]:
get_interactive_plot_query_gallery(model, valid_df)

interactive(children=(IntSlider(value=17, description='query_id', max=34), Checkbox(value=False, description='…