In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob
import os
import time
import itertools

# UMAP
# import umap

# ISOMAP
from sklearn.manifold import Isomap

from mpl_toolkits.mplot3d import Axes3D

In [2]:
# Locate and load data
data_directory = '/home/haqqeez/batch5/organized/ZHA029/PAL/dPAL/2021_12_13/'
data_files = glob.glob(f'{data_directory}*/**/*deconv.csv')

if len(data_files) > 1:
    print('Error: Too many files found')
elif len(data_files) == 0:
    print('Error: File not found')
else:
    data_file = data_files[0]
    data = pd.read_csv(data_file, index_col=0)
    print(f'Loaded data from {data_file}')

Loaded data from /home/haqqeez/batch5/organized/ZHA029/PAL/dPAL/2021_12_13/15_12_06/Miniscope_2/ZHA029_2021_12_13_1512_deconv.csv


In [None]:
list_of_n_neighbors = [10,20,40,80]
list_of_dist_metrics = ['cosine','euclidean']
list_of_n_components = [2,3,6]

# Generate all combinations of parameters
parameter_combinations = list(itertools.product(list_of_n_neighbors, list_of_dist_metrics, list_of_n_components))
print(f'Number of combinations: {len(list(parameter_combinations))}')

troubleshoot_frames = None # number of frames to use for troubleshooting; None will use all frames
downsample_factor = 15 # downsample factor for troubleshooting 1 = no downsampling, 2 = every other frame, 3 = every third frame, etc.

# downsample for troubleshooting
data_to_use = data.iloc[0:troubleshoot_frames, :].values # use only the first 5000 frames for troubleshooting
data_to_use = data_to_use[::downsample_factor, :] # downsample data

print(f'Data Shape: {data_to_use.shape[1]} cells by {data_to_use.shape[0]} frames')

Number of combinations: 24
Data Shape: 340 cells by 6076 frames


In [None]:
results = []

for n_neighbors, dist_metric, n_components in parameter_combinations:
    print(f'Running ISOMAP with n_neighbors={n_neighbors}, dist_metric={dist_metric}, n_components={n_components}')

    analysis_start_time = time.time()

    # Get embeddings
    reducer = Isomap(n_neighbors=n_neighbors, n_components=n_components, metric=dist_metric)
    fit = reducer.fit(data_to_use)
    embeddings = fit.transform(data_to_use)

    analysis_end_time = time.time()
    time_taken = analysis_end_time - analysis_start_time
    print(f'Analysis took {time_taken:.2f} seconds ({time_taken/60:.2f} minutes)')
    results.append((n_neighbors, dist_metric, n_components, time_taken))

    # Make plots based on the number of components
    if n_components == 2:
        fig, ax = plt.subplots()
        ax.scatter(embeddings[:, 0], embeddings[:, 1], c='r', marker='o')
        ax.set_xlabel('ISOMAP 1')
        ax.set_ylabel('ISOMAP 2')
        plt.title(f'ISOMAP n_neighbors={n_neighbors}, dist_metric={dist_metric}, n_components={n_components}')
        plt.show()
    elif n_components == 3:
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(embeddings[:, 0], embeddings[:, 1], embeddings[:, 2], c='r', marker='o')
        ax.set_xlabel('ISOMAP 1')
        ax.set_ylabel('ISOMAP 2')
        ax.set_zlabel('ISOMAP 3')
        plt.title(f'ISOMAP n_neighbors={n_neighbors}, dist_metric={dist_metric}, n_components={n_components}')
        plt.show()
    else:
        fig, axes = plt.subplots(n_components - 1, n_components - 1, figsize=(15, 15))
        for i in range(n_components):
            for j in range(i + 1, n_components):
                ax = axes[i, j - 1] if n_components > 2 else axes
                ax.scatter(embeddings[:, i], embeddings[:, j], c='r', marker='o')
                ax.set_xlabel(f'ISOMAP {i + 1}')
                ax.set_ylabel(f'ISOMAP {j + 1}')
        plt.suptitle(f'ISOMAP n_neighbors={n_neighbors}, dist_metric={dist_metric}, n_components={n_components}')
        plt.show()

Running ISOMAP with n_neighbors=10, dist_metric=cosine, n_components=2
Analysis took 30.57 seconds (0.51 minutes)
Running ISOMAP with n_neighbors=10, dist_metric=cosine, n_components=3
Analysis took 30.84 seconds (0.51 minutes)
Running ISOMAP with n_neighbors=10, dist_metric=cosine, n_components=6
Analysis took 31.24 seconds (0.52 minutes)
Running ISOMAP with n_neighbors=10, dist_metric=euclidean, n_components=2
Analysis took 28.86 seconds (0.48 minutes)
Running ISOMAP with n_neighbors=10, dist_metric=euclidean, n_components=3
Analysis took 28.60 seconds (0.48 minutes)
Running ISOMAP with n_neighbors=10, dist_metric=euclidean, n_components=6
Analysis took 28.78 seconds (0.48 minutes)
Running ISOMAP with n_neighbors=20, dist_metric=cosine, n_components=2
Analysis took 39.35 seconds (0.66 minutes)
Running ISOMAP with n_neighbors=20, dist_metric=cosine, n_components=3
Analysis took 39.30 seconds (0.66 minutes)
Running ISOMAP with n_neighbors=20, dist_metric=cosine, n_components=6
Analysis

In [17]:
# Print summary
print("\nSummary of analysis times:")
for n_neighbors, dist_metric, n_components, time_taken in results:
    print(f'n_neighbors={n_neighbors}, dist_metric={dist_metric}, n_components={n_components}: {time_taken:.2f} seconds ({time_taken/60:.2f} minutes)')

# print best results
fast_result = min(results, key=lambda x: x[3])
print(f'\nFastest run: n_neighbors={fast_result[0]}, dist_metric={fast_result[1]}, n_components={fast_result[2]}: {fast_result[3]:.2f} seconds ({fast_result[3]/60:.2f} minutes)')

# print slowest results
slow_result = max(results, key=lambda x: x[3])
print(f'\nSlowest run: n_neighbors={slow_result[0]}, dist_metric={slow_result[1]}, n_components={slow_result[2]}: {slow_result[3]:.2f} seconds ({slow_result[3]/60:.2f} minutes)')


Summary of analysis times:
n_neighbors=10, dist_metric=cosine, n_components=2: 30.57 seconds (0.51 minutes)
n_neighbors=10, dist_metric=cosine, n_components=3: 30.84 seconds (0.51 minutes)
n_neighbors=10, dist_metric=cosine, n_components=6: 31.24 seconds (0.52 minutes)
n_neighbors=10, dist_metric=euclidean, n_components=2: 28.86 seconds (0.48 minutes)
n_neighbors=10, dist_metric=euclidean, n_components=3: 28.60 seconds (0.48 minutes)
n_neighbors=10, dist_metric=euclidean, n_components=6: 28.78 seconds (0.48 minutes)
n_neighbors=20, dist_metric=cosine, n_components=2: 39.35 seconds (0.66 minutes)
n_neighbors=20, dist_metric=cosine, n_components=3: 39.30 seconds (0.66 minutes)
n_neighbors=20, dist_metric=cosine, n_components=6: 39.43 seconds (0.66 minutes)
n_neighbors=20, dist_metric=euclidean, n_components=2: 34.10 seconds (0.57 minutes)
n_neighbors=20, dist_metric=euclidean, n_components=3: 34.13 seconds (0.57 minutes)
n_neighbors=20, dist_metric=euclidean, n_components=6: 34.43 secon