 Implement a program which computes and

---

prints the **inherent dimensionality** associated with the even numbered Caltec101 images.

In [None]:
#Drive Mounting code
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Imports needed to run the code
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
import numpy as np
from torchvision import models, transforms
from scipy.signal import convolve2d
import json
import matplotlib.pyplot as plt
import json
from scipy.special import softmax
import time
from sklearn.metrics import euclidean_distances


In [None]:
# dataset
dataset = torchvision.datasets.Caltech101('/content/drive/MyDrive/CSE515_Phase3/data', download = True) # Caltech101 Dataset will be downloaded in the mentioned path, if already downloaded it will not download again

# JSON file paths
fd_json_file_path = '/content/drive/MyDrive/CSE515_Phase3/feature_descriptors.json'     # path of the file used to store the data
label_image_mapping_json_file_path = '/content/drive/MyDrive/CSE515_Phase3/label_image_map.json' # path of the file used to store the label to image mapping
inherent_dimensionality_for_even_images_info_json_file_path = '/content/drive/MyDrive/CSE515_Phase3/task0/inherent_dimensionality_for_even_images_info_json_file_path.json' # path of the file used to store the label to image mapping


Files already downloaded and verified


In [None]:
# Function to save the data to a JSON file. If the file is not available it will create a file in the root project
def save_data_to_json(data, json_file):
    with open(json_file, 'w') as file:
        json.dump(data, file, indent=4)


# Function to load the existing data from a JSON file
def load_data_from_json(json_file):
    try:
        with open(json_file, 'r') as file:
            data = json.load(file)
    except FileNotFoundError:
        data = {}  # Initialize with an empty dictionary if the file doesn't exist
    return data


In [None]:
#Loading feature descriptors and Label to image mapping values from files saved during phase 2
fd_data = load_data_from_json(fd_json_file_path)

In [None]:
#Stress function which computes the normalized difference between the original dimensions and MDS dimensions
def stress_function(original_distances, reduced_distances):
    stress = np.sqrt(((reduced_distances.ravel() - original_distances.ravel()) ** 2).sum() / ((original_distances.ravel() ** 2).sum()))
    return stress


In [None]:
#Applying MultiDimensional Scaling when number of dimensions is given
def MDS_with_dimension(data, num_dimensions):

    #Getting the number of images and number of dimensions in the feature
    num_images, num_features = len(data), len(data[0])
    #Initializing data in random positin
    X = np.random.rand(num_images, num_dimensions)
    # Setting max number of iterations. 300 is industry standard
    max_iterations = 300

    #Computing original distances using L2-metric
    original_distances = euclidean_distances(data)
    #Replacing the 0 values in original distances with 1e-5 to avoid division by zero.
    original_distances[original_distances == 0] = 1e-5

    # Optimization loop
    for iteration in range(max_iterations):

        #Computing reduced distances using L2-metric
        reduced_distances = euclidean_distances(X)
        # Compute the current stress value
        current_stress = stress_function(original_distances, reduced_distances)
        # Displaying min stress we got for the dimensions using MDS for 300 iterations
        if (iteration + 1 == max_iterations):
            print("For dimensions %d, the minimum stress we achieved is %s " % (num_dimensions, current_stress))
        # Stopping the MDS when the current stress is less than 0.05. i.e. loss is less than 5%
        if current_stress < 0.05:
            print("breaking at iteration %d for dimensions %d with stress %s " % (iteration + 1, num_dimensions, current_stress))
            break

        #Replacing the 0 values in original distances with 1e-5 to avoid division by zero.
        reduced_distances[reduced_distances == 0] = 1e-5
        #Computing the weight of reduced distances with respect to original distance to perform guttman transformation
        ratio = original_distances / reduced_distances
        B = -ratio
        # Guttman Transformation
        B[np.arange(len(B)), np.arange(len(B))] += ratio.sum(axis=1)
        X = 1.0 / original_distances.shape[0] * np.dot(B, X)

        reduced_distances = np.sqrt((X**2).sum(axis=1)).sum()
    return X, current_stress, iteration + 1, num_dimensions


In [None]:
#Function to compute intrinsic dimensionality on a data i.e. when number of dimensions is not given
def MDS_scratch(data, num_dimensions=None):
    #When itrinsic dimensions is given we apply MDS for that dimensions
    if num_dimensions is not None:
        return MDS_with_dimension(data, num_dimensions)
    #Applying MDS on whole Data
    else:
        best_stress = None
        best_iteration = None
        inherent_dimensionality = None
        # Applying MDS for dimensions with an offset of 20
        for num_dimensions in range(1, 1000, 20):
            reduced_features, stress, iteration, num_dimensions = MDS_with_dimension(data, num_dimensions)
            #Updating the stress whenever we have a better stress
            if best_stress is None or stress < best_stress:
                # Applying MDS in the range where the stress becomes less than 0.05
                if best_stress is not None and best_stress > 0.05 and stress < 0.05:
                    for dim in range(inherent_dimensionality + 1, num_dimensions + 2):
                        # Applying MDS for each dimension in the range
                        reduced_features, stress, iteration, num_dimensions = MDS_with_dimension(data, dim)
                        # The first dimension where the stess becomes less than 0.05
                        if stress < 0.05:
                            best_stress = stress
                            best_iteration = iteration
                            inherent_dimensionality = num_dimensions
                            break
                    break
                # Updating stress and inherent_dimensionality values with the local bests
                best_stress = stress
                best_iteration = iteration
                inherent_dimensionality = num_dimensions
    return reduced_features, best_stress, best_iteration, inherent_dimensionality


##### WE ARE USING MULTI DIMENSIONALITY SCALING TO FIND THE INHERENT DIMENSIONALITY OF THE LAYER 3 VECTOR SPACE

In [None]:
# Applying MDS on even numbered images in the caltech101 dataset using the layer 3 vector space
print("Applying MDS on even numbered images in the data set using the layer 3 feature space")
# Retrieving layer 3 feature vectors for the even numbered images
data = [entry["layer_3"] for entry in fd_data.values()]
# Applying MDS on the layer 3 vectors of each label
reduced_features, best_stress, best_iteration, inherent_dimensionality = MDS_scratch(data)
# Displaying inherent dimensionality for the even numbered images in the dataset
print("The inherent dimensionality for the even numbered images in the dataset is: ", inherent_dimensionality, "Stress is:", best_stress)

# Saving information related to inherent dimensionality for even numbered images in a dictionary for future reference
inherent_dimensionality_data = {}
inherent_dimensionality_data['inherent_dimensionality'] = inherent_dimensionality
inherent_dimensionality_data['stress'] = best_stress
inherent_dimensionality_data['reduced_features'] = reduced_features.flatten().tolist()

# Saving information related to inherent dimensionality for even numbered images in a dictionary for future reference
save_data_to_json(inherent_dimensionality_data, inherent_dimensionality_for_even_images_info_json_file_path)
print("Data saved successfully at ", inherent_dimensionality_for_even_images_info_json_file_path)


Applying MDS on even numbered images in the data set using the layer 3 feature space
For dimensions 1, the minimum stress we achieved is 0.546624376850295 
For dimensions 21, the minimum stress we achieved is 0.08900753505841864 
For dimensions 41, the minimum stress we achieved is 0.06264350330027543 
For dimensions 61, the minimum stress we achieved is 0.05166135903758063 
breaking at iteration 202 for dimensions 81 with stress 0.049976522222918145 
For dimensions 62, the minimum stress we achieved is 0.05128292662286178 
For dimensions 63, the minimum stress we achieved is 0.051007878160138444 
For dimensions 64, the minimum stress we achieved is 0.05063536726396708 
For dimensions 65, the minimum stress we achieved is 0.05018859143556216 
breaking at iteration 295 for dimensions 66 with stress 0.049992907389770545 
The inherent dimensionality for the even numbered images in the dataset is:  66 Stress is: 0.049992907389770545
Data saved successfully at  /content/drive/MyDrive/CSE515