### Implement a program which computes and prints the **inherent dimensionality** associated with each unique label of the even numbered Caltec101 images.


In [None]:
#Drive Mounting code
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

#Imports needed to run the code
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
import numpy as np
from torchvision import models, transforms
from scipy.signal import convolve2d
import json
import matplotlib.pyplot as plt
import json
from scipy.special import softmax
import time
from sklearn.metrics import euclidean_distances

In [None]:
# dataset
dataset = torchvision.datasets.Caltech101('/content/drive/MyDrive/CSE515_Phase3/data', download = True) # Caltech101 Dataset will be downloaded in the mentioned path, if already downloaded it will not download again

# JSON file paths

fd_json_file_path = '/content/drive/MyDrive/CSE515_Phase3/feature_descriptors.json'     # path of the file used to store the data
label_image_mapping_json_file_path = '/content/drive/MyDrive/CSE515_Phase3/label_image_map.json' # path of the file used to store the label to image mapping
inherent_dimensionality_for_labels_info_json_file_path = '/content/drive/MyDrive/CSE515_Phase3/task0/inherent_dimensionality_for_labels_info_json_file_path.json' # path of the file used to store the label to image mapping


Files already downloaded and verified


In [None]:
# Function to save the data to a JSON file. If the file is not available it will create a file in the root project
def save_data_to_json(data, json_file):
    with open(json_file, 'w') as file:
        json.dump(data, file, indent=4)


# Function to load the existing data from a JSON file
def load_data_from_json(json_file):
    try:
        with open(json_file, 'r') as file:
            data = json.load(file)
    except FileNotFoundError:
        data = {}  # Initialize with an empty dictionary if the file doesn't exist
    return data


In [None]:
#Loading feature descriptors and Label to image mapping values from files saved during phase 2
fd_data = load_data_from_json(fd_json_file_path)
labelsWithImages = load_data_from_json(label_image_mapping_json_file_path)

In [None]:
#Stress function which computes the normalized difference between the original dimensions and MDS dimensions
def stress_function(original_distances, reduced_distances):
    stress = np.sqrt(((reduced_distances.ravel() - original_distances.ravel()) ** 2).sum() / ((original_distances.ravel() ** 2).sum()))
    return stress


In [None]:
#Applying MultiDimensional Scaling when number of dimensions is given
def MDS_with_dimension(data, num_dimensions):

    #Getting the number of images and number of dimensions in the feature
    num_images, num_features = len(data), len(data[0])
    #Initializing data in random position
    X = np.random.rand(num_images, num_dimensions)
    # Setting max number of iterations. 300 is industry standard
    max_iterations = 300

    #Computing original distances using L2-metric
    original_distances = euclidean_distances(data)
    #Replacing the 0 values in original distances with 1e-5 to avoid division by zero.
    original_distances[original_distances == 0] = 1e-5

    # Optimization loop
    for iteration in range(max_iterations):
        #Computing reduced distances using L2-metric
        reduced_distances = euclidean_distances(X)
        # Compute the current stress value
        current_stress = stress_function(original_distances, reduced_distances)
        #Stopping the MDS when the current stress is less than 0.05. i.e. loss is less than 5%
        if current_stress < 0.05:
            break
        #Replacing the 0 values in original distances with 1e-5 to avoid division by zero.
        reduced_distances[reduced_distances == 0] = 1e-5
        #Computing the weight of reduced distances with respect to original distance to perform guttman transformation
        ratio = original_distances / reduced_distances
        B = -ratio
        # Guttman Transformation
        B[np.arange(len(B)), np.arange(len(B))] += ratio.sum(axis=1)
        X = 1.0 / original_distances.shape[0] * np.dot(B, X)
        reduced_distances = np.sqrt((X**2).sum(axis=1)).sum()
    return X, current_stress, iteration + 1, num_dimensions


In [None]:
#Function to compute intrinsic dimensionality on a data i.e. when number of dimensions is not given
def MDS_scratch(data, num_dimensions=None):
    #When itrinsic dimensions is given we apply MDS for that dimensions
    if num_dimensions is not None:
        return MDS_with_dimension(data, num_dimensions)
    #Applying MDS on whole Data
    else:
        best_stress = None
        best_iteration = None
        inherent_dimensionality = None
        # Applying MDS for dimensions with an offset of 20
        for num_dimensions in range(1, 1000, 20):
            reduced_features, stress, iteration, num_dimensions = MDS_with_dimension(data, num_dimensions)
            #Updating the stress whenever we have a better stress
            if best_stress is None or stress < best_stress:
                # Applying MDS in the range where the stress becomes less than 0.05
                if best_stress is not None and best_stress > 0.05 and stress < 0.05:
                    for dim in range(inherent_dimensionality + 1, num_dimensions + 2):
                        # Applying MDS for each dimension in the range
                        reduced_features, stress, iteration, num_dimensions = MDS_with_dimension(data, dim)
                        # The first dimension where the stess becomes less than 0.05
                        if stress < 0.05:
                            best_stress = stress
                            best_iteration = iteration
                            inherent_dimensionality = num_dimensions
                            break
                    break
                # Updating stress and inherent_dimensionality values with the local bests
                best_stress = stress
                best_iteration = iteration
                inherent_dimensionality = num_dimensions
    return reduced_features, best_stress, best_iteration, inherent_dimensionality


##### WE ARE USING MULTI DIMENSIONALITY SCALING TO FIND THE INHERENT DIMENSIONALITY OF EACH LABEL IN THE LAYER 3 VECTOR SPACE

In [None]:
# Applying MDS on each label with the layer 3 vector space
print("Applying MDS on each label using the layer 3 feature space")
inherent_dimensionality_data = {}
# Applying MDS on each label in the dataset
for label in labelsWithImages.keys():
  # Retrieving image Ids for the specific label
  imageids = labelsWithImages[label]
  # Retrieving layer 3 feature vectors for the image_ids in the label
  values = [fd_data[str(image_id)]['layer_3'] for image_id in imageids if str(image_id) in fd_data]
  values = np.array(values)
  # Applying MDS on the layer 3 vectors of each label
  reduced_features, best_stress, best_iteration, inherent_dimensionality = MDS_scratch(values)
  # Printing inherent dimensionality for each label
  print("The inherent dimensionality for label", label, "is: ", inherent_dimensionality, "Stress is:", best_stress)

  # Saving information related to inherent dimensionality for each label in a dictionary for future reference
  inherent_dimensionality_data[label] = {}
  inherent_dimensionality_data[label]['inherent_dimensionality'] = inherent_dimensionality
  inherent_dimensionality_data[label]['stress'] = best_stress
  inherent_dimensionality_data[label]['reduced_features'] = reduced_features.flatten().tolist()

# Saving information related to inherent dimensionality for each label in a json file for future reference
save_data_to_json(inherent_dimensionality_data, inherent_dimensionality_for_labels_info_json_file_path)
print("Data saved successfully at ", inherent_dimensionality_for_labels_info_json_file_path)


Applying MDS on each label using the layer 3 feature space
The inherent dimensionality for label 0 is:  42 Stress is: 0.049983253360575804
The inherent dimensionality for label 1 is:  36 Stress is: 0.04999230155012195
The inherent dimensionality for label 2 is:  30 Stress is: 0.049988214148081876
The inherent dimensionality for label 3 is:  40 Stress is: 0.049986138798274335
The inherent dimensionality for label 4 is:  17 Stress is: 0.04999294415955246
The inherent dimensionality for label 5 is:  46 Stress is: 0.049992344582400504
The inherent dimensionality for label 6 is:  14 Stress is: 0.049971113117689946
The inherent dimensionality for label 7 is:  14 Stress is: 0.04998132089434046
The inherent dimensionality for label 8 is:  15 Stress is: 0.049990933533091295
The inherent dimensionality for label 9 is:  16 Stress is: 0.04999733652943044
The inherent dimensionality for label 10 is:  15 Stress is: 0.04996041250462308
The inherent dimensionality for label 11 is:  13 Stress is: 0.049

In [None]:
# Code to calculate inherent dimensionality for input label
query_label = input("Enter any label to check the inherent dimensionality")

#Retrieving imageIds for the input label
imageids = labelsWithImages[query_label]

# Retrieving layer 3 for images under the input label
values = [fd_data[str(image_id)]['layer_3'] for image_id in imageids if str(image_id) in fd_data]
values = np.array(values)

# Applying MDS to find the inherent dimensionality for the input label
reduced_features, best_stress, best_iteration, inherent_dimensionality = MDS_scratch(values)

# Displaying the inherent dimensionality and features at the inherent dimensionality along with the stress at inherent dimensionality for the label and shape of the features in the reduced inherent dimensionality
print("The inherent dimensionality for label", query_label, "is: ", inherent_dimensionality, "Stress is:", best_stress, "reduced features are: ", reduced_features)
print("The shape of features at the inherent dimensionality is ", reduced_features.shape)

Enter any label to check the inherent dimensionality39
The inherent dimensionality for label 39 is:  1 Stress is: 0.5687719933219711 reduced features are:  [[-6.67817634e-02  7.22349884e-02  6.11608371e-02  7.28054967e-02
   4.46713117e-02 -2.39850366e-02  7.43768526e-02 -2.77798946e-02
  -3.35790555e-02  2.50124106e-02  5.11170190e-02  1.10453348e-02
   6.26379896e-02 -1.07418214e-01 -8.14807776e-03  1.18954112e-02
   7.58910198e-04  1.31861637e-03  4.66593277e-03 -4.35772669e-04
  -5.07868081e-03]
 [ 2.53660142e-02 -3.21956620e-04  8.73952197e-03 -1.03851583e-01
   4.19899630e-02  3.10743729e-02  3.84347681e-02 -2.61882052e-02
  -1.56955549e-02  6.53339417e-02 -8.08655055e-02 -3.69019368e-02
   3.01026726e-02  4.72780188e-02  7.93402856e-02  2.74781242e-03
   6.93270060e-02 -7.83247961e-02  8.13084524e-02  5.57350262e-02
  -4.12234689e-02]
 [-9.75903167e-02 -3.89100593e-02  1.85886360e-02 -7.85034221e-02
  -2.55189497e-02  3.70022091e-02  4.79350956e-02  4.97613344e-02
   7.27188069e