In [28]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import pickle
import torch
from tqdm import tqdm

from sklearn.svm import SVC
from sklearn.manifold import MDS
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, adjusted_mutual_info_score
from scipy.stats import wasserstein_distance

from pathlib import Path
import pathlib
import os
from datetime import datetime
import uuid
import json
import shutil

from kcm.koopman_category_model import KoopmanCategoryModel
from kcm.basic_feature_extract import BasicFeatureExtractor
from kcm.discovery import (
    CategoryDiscoveryTrainer,
    train_test_split_indices,
    prep_data_for_discovery,
    check_histograms,
    sup_con_loss,
    BaselineModel,
    HASHHead,
    cluster_acc,
    split_cluster_acc_v1,
    split_cluster_acc_v2,
    create_hash_ids
)

from kcm.utils import load_koopman_model

plt.style.use('dark_background')


In [5]:
######## Koopman Category Model Version ########
KCM_name = "KCM_20250814_202425_aa4a3b40"
################################################

pathlib.PosixPath = pathlib.WindowsPath


print(f'Loading in Koopman Category Model at: {KCM_name}')

full_path = Path(r"C:\Users\peterdb1\Documents\Masters in ACM\(i-j) 625.801-802 - ACM Master's Research\Technical Work\koopman-category-discovery\experiments")
koopman_dir = full_path / KCM_name
KCM, KCM_params = load_koopman_model(koopman_dir)

Loading in Koopman Category Model at: KCM_20250814_202425_aa4a3b40


In [14]:
# plt.scatter(KCM.X_transformed[:,0],KCM.X_transformed[:,1])
# KCM.X_transformed.mean(axis=1)

# embedding = MDS(n_components=self.MDS_dimension, dissimilarity='precomputed', random_state=self.seed)
# self.X_transformed = embedding.fit_transform(metric_matrix)

In [79]:
(abs(np.array(KCM.all_eigs)) > 5).sum()

np.int64(757)

In [76]:
(abs(KCM.df.groupby(['target','sample']).agg('mean')[[col for col in KCM.df.columns if 'eig_' in col]].values).max(axis=1) > 5).sum()

np.int64(4)

In [123]:
maxes = np.array([np.max(abs(KCM.all_eigs[i])) for i in range(len(KCM.all_eigs))])
KCM.all_modes[maxes.argmax()]

array([[-2.34326231e-001, -3.00162491e+116, -8.18988425e+098,
         1.08202457e+083, -7.46825854e+084, -1.76200401e+085,
        -7.46825854e+084, -7.46825854e+084, -7.46825854e+084,
         7.46825854e+084,  7.46825854e+084,  1.19173630e+085,
         7.46825854e+084,  7.46825854e+084, -7.46825854e+084],
       [-2.10830690e-001, -1.66756939e+116, -9.61754301e+098,
         3.63744116e+082, -4.19527147e+084,  1.48759949e+085,
        -4.19527147e+084, -4.19527147e+084, -4.19527147e+084,
         4.19527147e+084,  4.19527147e+084,  2.00199490e+083,
         4.19527147e+084,  4.19527147e+084, -4.19527147e+084],
       [-3.17240662e-001,  6.67027757e+116,  1.48902849e+099,
        -2.55952315e+083,  1.65659332e+085,  2.40992309e+085,
         1.65659332e+085,  1.65659332e+085,  1.65659332e+085,
        -1.65659332e+085, -1.65659332e+085, -1.58958568e+085,
        -1.65659332e+085, -1.65659332e+085,  1.65659332e+085],
       [-2.34326231e-001, -3.00162491e+116, -8.18988425e+098,
     