Task 1: Implement a program which,

*   for each unique label l, computes the corresponding k latent semantics (of your choice) associated with the even
numbered Caltec101 images, and
*   for the odd numbered images, predicts the most likely labels using distances/similarities computed under the
label-specific latent semantics.

The system should also output per-label precision, recall, and F1-score values as well as output an overall accuracy
value.




In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from PIL import Image, ImageDraw, ImageFont
import torch
from torchvision.models import resnet50
from torchvision import transforms as T
from torchvision.datasets import Caltech101
from torch.utils.data import Dataset, DataLoader
import os
from tqdm import tqdm

import glob
import numpy as np
import cv2
import json
import pandas as pd

import torchvision
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict

In [4]:
def load_json(file_path):
  with open(f"/content/drive/MyDrive/CSE515_Phase3/{file_path}","r") as f:
    data = json.load(f)
  return data

In [5]:
caltech_dataset = Caltech101("/content/drive/MyDrive/CSE515_Phase3/data", download=False)

features = load_json("feature_descriptors.json")

In [6]:
odd_image_features = load_json("feature_descriptors_odd_images.json")

In [7]:
actual_labels = [caltech_dataset.y[i] for i in range(1,8677,2)]

In [8]:
data = pd.DataFrame(features).T
data = data[['label','avg_pool','layer_3']]

In [9]:
def get_svd_latent_features(k, feature_vector, data ):

    feature_vectors = np.vstack(data[feature_vector].to_numpy())

    ATA = np.dot(feature_vectors.T, feature_vectors)


    #Calculates eigenvalues and eigenvectors of an already symmetric matrix (eigenvectors give us V matrix)
    eigenvalues_ATA, eigenvectors_ATA = np.linalg.eigh(ATA)

    sorted_indices = eigenvalues_ATA.argsort()[::-1]
    eigenvalues_ATA = eigenvalues_ATA[sorted_indices]
    eigenvectors_ATA = eigenvectors_ATA[:, sorted_indices]

    singular_values = np.sqrt(eigenvalues_ATA)
    right_singular_vectors = eigenvectors_ATA

    # The below code will help us compute the U matrix which is the latent space that we will be using
    left_singular_vectors = np.dot(feature_vectors, right_singular_vectors)
    for i in range(left_singular_vectors.shape[1]):
        left_singular_vectors[:, i] /= singular_values[i]

    right_singular_vectors = right_singular_vectors[:,:k]

    latent_weights = left_singular_vectors


    svd_weights = {}
    for i in range(len(latent_weights)):
        svd_weights[i*2] = np.real(latent_weights[i][:k]).tolist()

    # V and sigma matrices are stored to later use in the latent projection of odd images
    return svd_weights,{"V":np.real(right_singular_vectors).tolist(),"sigma":np.real(singular_values).tolist(), "k":k}

In [10]:
def calculate_per_label_metrics(actual_labels, predicted_labels):
    unique_labels = set(actual_labels + predicted_labels)
    label_to_index = {label: i for i, label in enumerate(unique_labels)}
    label_count = len(unique_labels)

    true_positives = [0] * label_count
    false_positives = [0] * label_count
    false_negatives = [0] * label_count

    for actual, predicted in zip(actual_labels, predicted_labels):
        actual_index = label_to_index[actual]
        predicted_index = label_to_index[predicted]

        if actual == predicted:
            true_positives[actual_index] += 1
        else:
            false_positives[predicted_index] += 1
            false_negatives[actual_index] += 1

    precision = [true_positives[i] / (true_positives[i] + false_positives[i] + 1e-10) for i in range(label_count)]
    recall = [true_positives[i] / (true_positives[i] + false_negatives[i] + 1e-10) for i in range(label_count)]
    f1_score = [2 * (p * r) / (p + r + 1e-10) for p, r in zip(precision, recall)]

    return precision, recall, f1_score

def calculate_accuracy(actual_labels, predicted_labels):
    correct_predictions = sum(1 for a, p in zip(actual_labels, predicted_labels) if a == p)
    accuracy = correct_predictions / len(actual_labels)
    return accuracy

def calculate_classification_metrics(actual_labels, predicted_labels):
  unique_labels = list(range(101))
  per_label_precision, per_label_recall, per_label_f1_score = calculate_per_label_metrics(actual_labels, predicted_labels)
  for label, precision, recall, f1 in zip(unique_labels, per_label_precision, per_label_recall, per_label_f1_score):
      print(f"Label {label}: Precision={precision:.2f}, Recall={recall:.2f}, F1 Score={f1:.2f}")

  overall_accuracy = calculate_accuracy(actual_labels, predicted_labels)

  print(f"Over all accuracy for the model is = {round(overall_accuracy*100,2)} %")


In [11]:
def transform_odd_images_to_latent_space_svd(latent_space, image_feature_vector):
    """
    Transforms a new image to latent space by using the V and sigma matrices stored during the even images svd process
    """
    transformed_latent_space_vector = np.dot(np.array(image_feature_vector).reshape(1,-1), np.array(latent_space["V"]))
    for i in range(transformed_latent_space_vector.shape[1]):
        transformed_latent_space_vector[:, i] /= latent_space["sigma"][i]
    return transformed_latent_space_vector.squeeze()

In [12]:
def get_label_for_odd_images_comb( odd_image_features, latent_feature_space, inherent_dim, mean_label_latent_vector,actual_labels):
    """
    Transforms the images to the latent space and compares with all the mean label latent vectors to get the most appropriate label
    """
    predicted_labels = []
    i = 0
    for _,odd_feature in odd_image_features.items():
        latent_vector = transform_odd_images_to_latent_space_svd(latent_feature_space, odd_feature)
        similarities = cosine_similarity([latent_vector], mean_label_latent_vector)[0]
        sorted_indices = np.argsort(similarities)[::-1]
        image_label = sorted_indices[0]
        predicted_labels.append(int(sorted_indices[0]))
        i += 1
    return predicted_labels

In [13]:
def get_label_for_odd_images_label_wise( odd_image_features, latent_feature_space, inherent_dim, mean_label_latent_vector,actual_labels):
    """
    Transforms the images to the label wise latent space and compares with the respective mean label latent vectors to get the most appropriate label
    """
    predicted_labels = []
    i = 0
    for _,odd_feature in odd_image_features.items():
        latent_vectors = []
        similarities = []
        for idx,dim in enumerate(inherent_dim):
            latent_vector = transform_odd_images_to_latent_space_svd(latent_feature_space[idx], odd_feature)
            similarities.append(cosine_similarity([np.array(latent_vector).round(3)], [np.array(mean_label_latent_vector[idx]).round(3)])[0][0])
        sorted_indices = np.argsort(similarities)[::-1]
        predicted_labels.append(int(sorted_indices[0]))
        i += 1
    return predicted_labels

In [14]:
def calculate_latent_semantics(k, semantics, data, odd_image_features, actual_labels):
  feature_space = "layer_3"
  inherent_dim = [k for _ in range(101)]

  if semantics == "combined":
    weights, latent_feature_space = get_svd_latent_features(k,feature_space,data)

    label_wise_latent_features = {i:[] for i in range(101)}

    for image_id in weights:
        corr_label = caltech_dataset.y[int(image_id)]
        label_wise_latent_features[corr_label].append(weights[image_id][:inherent_dim[corr_label]])

    mean_label_latent_vector = [np.mean([value for value in label_wise_latent_features[i]],axis=0) for i in range(101)]

    pred_labels = get_label_for_odd_images_comb(odd_image_features, latent_feature_space, inherent_dim, mean_label_latent_vector, actual_labels)

  else:
    data_gb = data.groupby('label')
    label_groups = [data_gb.get_group(x) for x in data_gb.groups]

    label_latent_weights = []
    latent_feature_space = []

    for idx,k in enumerate(inherent_dim):
        weights, latent_feature = get_svd_latent_features(k,feature_space,label_groups[idx])
        latent_feature_space.append(latent_feature)
        label_latent_weights.append(weights)

    mean_label_latent_vector = [np.mean([value for key,value in label_latent_weights[i].items()],axis=0) for i in range(101)]

    pred_labels = get_label_for_odd_images_label_wise(odd_image_features, latent_feature_space, inherent_dim, mean_label_latent_vector, actual_labels)

  with open(f"/content/drive/MyDrive/CSE515_Phase3/svd_{feature_space}_{k}_task_1_{semantics}_predicted_labels.json","w") as f:
    json.dump(pred_labels,f)

  return pred_labels

In [18]:
def predict_label_classes(k, choice, data, odd_image_features, actual_labels, semantics):
  if choice == 1:
    feature_space = "layer_3"
    if semantics  == 1:
        semantics = "comb"
    else:
      semantics = "sep"
    pred_labels = load_json(f"svd_{feature_space}_{k}_task_1_{semantics}_predicted_labels.json")
  else:
    pred_labels = calculate_latent_semantics(k, semantics, data, odd_image_features, actual_labels)

  calculate_classification_metrics(actual_labels, pred_labels)

In [20]:
k = int(input('Enter the value of k for latent semantics : '))
choice = int(input('Choose one: 1. saved data 2. calculate latent semantics (LONGER TIME) : '))
semantics = int(input("Compute under 1. combined latent semantics 2. label wise latent semantics : "))
predict_label_classes(k, choice, data, odd_image_features, actual_labels,semantics)

Enter the value of k for latent semantics : 5
Choose one: 1. saved data 2. calculate latent semantics (LONGER TIME) : 1
Compute under 1. combined latent semantics 2. label wise latent semantics2
Label 0: Precision=0.00, Recall=0.00, F1 Score=0.00
Label 1: Precision=0.33, Recall=0.00, F1 Score=0.01
Label 2: Precision=0.00, Recall=0.00, F1 Score=0.00
Label 3: Precision=0.00, Recall=0.00, F1 Score=0.00
Label 4: Precision=0.02, Recall=0.04, F1 Score=0.02
Label 5: Precision=0.00, Recall=0.00, F1 Score=0.00
Label 6: Precision=0.00, Recall=0.00, F1 Score=0.00
Label 7: Precision=0.00, Recall=0.00, F1 Score=0.00
Label 8: Precision=0.00, Recall=0.00, F1 Score=0.00
Label 9: Precision=0.00, Recall=0.00, F1 Score=0.00
Label 10: Precision=0.00, Recall=0.00, F1 Score=0.00
Label 11: Precision=0.00, Recall=0.06, F1 Score=0.01
Label 12: Precision=0.00, Recall=0.00, F1 Score=0.00
Label 13: Precision=0.00, Recall=0.00, F1 Score=0.00
Label 14: Precision=0.00, Recall=0.00, F1 Score=0.00
Label 15: Precision=