In [None]:
import time
import numpy as np
import matplotlib.pyplot as plt
import umap
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
from matplotlib.cm import hsv, twilight_shifted
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from collections import defaultdict
%matplotlib inline

# Load data

In [None]:
dataset_name = 'COIL-20'
version='d1'
if dataset_name in ['COIL-20','COIL-100']:
    data = np.load(f'data/{dataset_name}/prepared/data.npy')
    try:
        labels = np.load(f'data/{dataset_name}/prepared/labels.npy')
    except FileNotFoundError:
        labels = np.ones(data.shape[0])
else:
    data = np.load(f'data/{dataset_name}/prepared/train_data.npy')
    try:
        labels = np.load(f'data/{dataset_name}/prepared/train_labels.npy')
    except FileNotFoundError:
        labels = np.ones(data.shape[0])

In [None]:
if len(data.shape) > 2:
    data = data.reshape(data.shape[0], -1)

In [None]:
data.shape

In [None]:
latent_dim = 16

# PCA and UMAP

In [None]:
data = data.astype('float32')

In [None]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.datasets import fetch_openml

# Load the MNIST dataset

# Normalize the data (optional, but recommended for PCA)
data /= 255.0

# Initialize and fit PCA with 16 components
n_components = latent_dim
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(data)

# Now, X_pca contains the MNIST data reduced to 16 dimensions


In [None]:
X_pca.shape

In [None]:
np.save(f'data/{dataset_name}/PCA_latent_output_{version}.npy', X_pca)

In [None]:
n_components_umap = latent_dim
umap_model = umap.UMAP(n_components=n_components_umap, random_state=42)
X_umap = umap_model.fit_transform(data)

# Now, X_tsne and X_umap contain the MNIST data reduced to 16 dimensions using t-SNE and UMAP, respectively

In [None]:
X_umap.shape

In [None]:
np.save(f'data/{dataset_name}/UMAP_latent_output_{version}.npy', X_umap)