In [11]:
import numpy as np
import os
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection
import umap
from tqdm import tqdm

## turn off warning
import warnings
warnings.filterwarnings("ignore")

## generating the list of datasets
datasets = []
for directory in os.listdir("./labeled-datasets/npy/"):
	if (
		directory != ".DS_Store"
		and directory != "README.md"
		and directory != ".gitignore"
		and not directory.endswith(".zip")
	):
		datasets.append(directory)

### Dataset (Embedding) Generation

In [12]:
for dataset in tqdm(datasets):
	raw = np.load(f"./labeled-datasets/npy/{dataset}/data.npy")
	umap_emb = umap.UMAP(n_components=2).fit_transform(raw)
	tsne_emb = TSNE(n_components=2).fit_transform(raw)
	pca_emb = PCA(n_components=2).fit_transform(raw)
	random_emb = SparseRandomProjection(n_components=2).fit_transform(raw)

	## if the directory does not exists, make it
	if not os.path.exists(f"./labeled-datasets_embedding/{dataset}"):
		os.makedirs(f"./labeled-datasets_embedding/{dataset}")
	
	np.save(f"./labeled-datasets_embedding/{dataset}/umap.npy", umap_emb)
	np.save(f"./labeled-datasets_embedding/{dataset}/tsne.npy", tsne_emb)
	np.save(f"./labeled-datasets_embedding/{dataset}/pca.npy", pca_emb)
	np.save(f"./labeled-datasets_embedding/{dataset}/random.npy", random_emb)


  1%|          | 1/96 [00:02<04:13,  2.67s/it]