<a href="https://colab.research.google.com/github/avionerman/computational_and_statistical/blob/main/CI%26SL3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import libs

In [3]:

# Core scientific computing
import numpy as np
from numpy.linalg import eigh, eig, svd, norm, inv
from scipy.sparse import csr_matrix, diags, eye as sparse_eye
from scipy.sparse.linalg import eigsh, eigs
from scipy.spatial.distance import cdist, pdist, squareform
from scipy.sparse.csgraph import shortest_path

# Machine Learning
from sklearn.neighbors import NearestNeighbors, kneighbors_graph
from sklearn.cluster import KMeans, SpectralClustering
from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import (
    adjusted_rand_score,
    normalized_mutual_info_score,
    homogeneity_score,
    completeness_score,
    v_measure_score,
    confusion_matrix,
    accuracy_score
)
from sklearn.decomposition import PCA

# visualization
import matplotlib.pyplot as plt
import seaborn as sns

# utilities
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Tuple
from enum import Enum, auto
import warnings
import time

warnings.filterwarnings('ignore')

# set random seeds for reproducibility for the rest of the code
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)

# Configuration attributes

In [4]:
@dataclass
class Config:

    # data
    sample_fraction: float = 0.25      # Use 25% of data (your requirement!)
    normalize_method: str = 'minmax'   # Options: 'minmax', 'standard'
    random_seed: int = 42

    # graph
    n_neighbors: int = 15 # k for k-NN graph
    sigma: float = None   # Gaussian kernel bandwidth (None = auto-estimate)

    # enable/disable methods
    enable_laplacian_eigenmaps: bool = True
    enable_lpp: bool = True
    enable_isomap: bool = True
    enable_lle: bool = True
    enable_tsne: bool = True

    # spectral settings
    n_clusters_list: List[int] = field(default_factory=lambda: [2, 3, 5, 7, 10])

    # tSNE only
    tsne_perplexity: float = 30.0
    tsne_n_iter: int = 1000


config = Config()

print("Configuration set!")
print(f"   Sample fraction: {config.sample_fraction * 100:.0f}%")
print(f"   n_neighbors: {config.n_neighbors}")
print(f"   Methods enabled: LE={config.enable_laplacian_eigenmaps}, LPP={config.enable_lpp}, "
      f"Isomap={config.enable_isomap}, LLE={config.enable_lle}, t-SNE={config.enable_tsne}")

Configuration set!
   Sample fraction: 25%
   n_neighbors: 15
   Methods enabled: LE=True, LPP=True, Isomap=True, LLE=True, t-SNE=True
