In [None]:
import torch
from sklearn import metrics
from sklearn.cluster import KMeans
from ssnmf.ssnmf import SSNMF_T
from torch.utils.data import random_split

from utils.logging import log
from utils.process import prepare_df, data_tensors

In [None]:
df = prepare_df()
df = df[df['Label'] != 'BENIGN']

In [None]:


device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


def get_Y_torch(X, y):
    y_unique = torch.unique(y)
    sample_size = X.shape[1]
    Y = torch.zeros(y_unique.shape[0], sample_size, device=device)
    for i in range(sample_size):
        j = ((y_unique == y[i]).nonzero(as_tuple=True)[0])
        Y[j, i] = 1
    return Y


def get_L_torch(Y, fraction_known=0.5):
    L = torch.zeros(Y.shape, device=device)
    m, n = L.shape
    num_samples = int(fraction_known * n)
    labeled_data = torch.randperm(n, dtype=torch.int32, device=device)[:num_samples]
    for i in labeled_data:
        L[:, i] = torch.tensor(1)
    return L


def split_L_idx(L, test_size=0.25):
    sample_size = L.shape[1]
    test_len = int(test_size * sample_size)
    train_idx, test_idx = torch.utils.data.random_split(range(sample_size), [sample_size - test_len, test_len])
    return train_idx, test_idx


def get_L_train(L, test_idx):
    L_train = torch.clone(L)
    L_train[:, test_idx] = torch.tensor(0, dtype=torch.float32, device=device)
    return L_train


In [None]:
X, y = data_tensors(df)
Y = get_Y_torch(X.T, y)
fraction_known = 0.1
k = 10
L = get_L_torch(Y, fraction_known=fraction_known)
snmf = SSNMF_T(X.T, k, L=L, Y=Y, lam=0.01 * torch.linalg.norm(X), modelNum=3)
snmf.mult(numiters=10000)

kmeans = KMeans(n_clusters=k, random_state=0).fit(snmf.S.T)
rand_score = metrics.rand_score(y, kmeans.labels_)
adujsted_rand_score = metrics.adjusted_rand_score(y, kmeans.labels_)
print(rand_score)