In [1]:
import pandas as pd
from sklearn.decomposition import NMF
from utils.process import prepare_df, data_arrays, data_tensors, LABELS
from sklearn import metrics
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import torch
from ssnmf.ssnmf import SSNMF_T
from sklearn.model_selection import KFold, StratifiedKFold

In [2]:
df = prepare_df()
df = df[df['Label'] != 'BENIGN']
df = df.sample(1000)
df.groupby('Label')['Label'].count()

Label
Bot                           3
DDoS                        247
DoS GoldenEye                22
DoS Hulk                    416
DoS Slowhttptest             11
DoS slowloris                12
FTP-Patator                  12
PortScan                    260
SSH-Patator                  11
Web Attack   Brute Force      4
Web Attack   XSS              2
Name: Label, dtype: int64

In [5]:
from ssnmf import SSNMF
import random

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


def get_Y_torch(X, y):
    y_unique = torch.unique(y)
    sample_size = X.shape[1]
    Y = torch.zeros(y_unique.shape[0], sample_size, device=device)
    for i in range(sample_size):
        j = ((y_unique == y[i]).nonzero(as_tuple=True)[0])
        Y[j, i] = 1
    return Y


def get_L_torch(Y, fraction_known=0.5):
    L = torch.zeros(Y.shape, device=device)
    m, n = L.shape
    num_samples = int(fraction_known * n)
    print(num_samples)
    labeled_data = torch.randperm(n, dtype=torch.int32, device=device)[:num_samples]
    for i in labeled_data:
        L[:,i] = torch.tensor(1)
    return L

#model = SSNMF(M_s,10, modelNum=1)

In [10]:
import torch


X, y = data_tensors(df)
Y = get_Y_torch(X.T, y)
L = get_L_torch(Y, fraction_known=0.5)
snmf = SSNMF_T(X.T, 11, Y=Y, L=L, lam=1 * np.linalg.norm(X), modelNum=3, str='torch')
snmf.mult(numiters=1000)
y_pred = torch.argmax(snmf.S, axis=0)

499


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.replace([np.inf, -np.inf], np.nan, inplace=True)


In [11]:
metrics.rand_score(y, y_pred)

0.7958585174360757

In [6]:
Y

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [12]:
skf = StratifiedKFold(n_splits=5)
fraction_known = 0.1
for k in [5, 10, 20, 30]:
    for lam in (0, 0.001, 0.5, 0.1, 0.5, 1, 10, 100, 500, 1000, 5000):
        for train_index, test_index in skf.split(X, y):
            X_train_T, X_test_T = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            Y_train = get_Y_torch(X_train_T.T, y_train)
            L_train = get_L_torch(Y_train, fraction_known=fraction_known)
            #print(f"norm X {np.linalg.norm(X_train_T)}")
            snmf = SSNMF(X_train_T.T, k, L=L_train, Y=Y_train, lam=lam * np.linalg.norm(X_train_T), modelNum=5)
            snmf.mult(numiters=1000)
            y_pred = np.argmax(snmf.S, axis=0)
            score = metrics.rand_score(y_train, y_pred)
            print(f'lambda {lam} score {score}')



lambda 0 score 0.7323717146433041
lambda 0 score 0.8102503128911139
lambda 0 score 0.8070025031289111
lambda 0 score 0.7240894868585732
lambda 0 score 0.8081289111389236
lambda 0.001 score 0.7485419274092616
lambda 0.001 score 0.7476752190237798
lambda 0.001 score 0.7254036295369212




lambda 0.001 score 0.7879192740926158
lambda 0.001 score 0.8094430538172716
lambda 0.5 score 0.9117021276595745




lambda 0.5 score 0.9126595744680851
lambda 0.5 score 0.9855725907384231
lambda 0.5 score 0.9944117647058823
lambda 0.5 score 0.9114580725907384
lambda 0.1 score 0.8400281602002503
lambda 0.1 score 0.8659730913642053




lambda 0.1 score 0.8658291614518148
lambda 0.1 score 0.9183416770963705
lambda 0.1 score 0.8846433041301627




lambda 0.5 score 0.9126533166458073
lambda 0.5 score 0.920247183979975
lambda 0.5 score 0.9104599499374217
lambda 0.5 score 0.916927409261577
lambda 0.5 score 0.9943178973717146
lambda 1 score 0.9181727158948686




lambda 1 score 0.907337296620776
lambda 1 score 0.9489987484355444
lambda 1 score 0.91180225281602
lambda 1 score 0.9161389236545682
lambda 10 score 0.8900750938673342
lambda 10 score 0.8893617021276595
lambda 10 score 0.9108385481852316
lambda 10 score 0.8623623279098873
lambda 10 score 0.939014392991239
lambda 100 score 0.8952127659574468




lambda 100 score 0.867787859824781
lambda 100 score 0.8730663329161452
lambda 100 score 0.8946120150187735
lambda 100 score 0.9166927409261577
lambda 500 score 0.8405788485607009
lambda 500 score 0.9431257822277848
lambda 500 score 0.8584981226533166




lambda 500 score 0.8683041301627034
lambda 500 score 0.8649687108886107
lambda 1000 score 0.914142678347935
lambda 1000 score 0.8506351689612015
lambda 1000 score 0.9643554443053818
lambda 1000 score 0.9505569461827285
lambda 1000 score 0.8517709637046308




lambda 5000 score 0.8578066332916146
lambda 5000 score 0.9124687108886108
lambda 5000 score 0.849715269086358
lambda 5000 score 0.9346871088861076
lambda 5000 score 0.8750907384230288
lambda 0 score 0.8078785982478097
lambda 0 score 0.8054599499374218
lambda 0 score 0.8159511889862328
lambda 0 score 0.8055913642052566
lambda 0 score 0.805366082603254




lambda 0.001 score 0.7995932415519399
lambda 0.001 score 0.8192396745932415
lambda 0.001 score 0.7846057571964956
lambda 0.001 score 0.805985607008761
lambda 0.001 score 0.8051032540675844




lambda 0.5 score 0.8647809762202754
lambda 0.5 score 0.8325125156445556
lambda 0.5 score 0.8705694618272841
lambda 0.5 score 0.8798654568210262
lambda 0.5 score 0.8615237797246558
lambda 0.1 score 0.8329130162703379




lambda 0.1 score 0.8387484355444306
lambda 0.1 score 0.8436670838548185
lambda 0.1 score 0.8705193992490613
lambda 0.1 score 0.83229662077597




lambda 0.5 score 0.8408948685857321
lambda 0.5 score 0.8191458072590738
lambda 0.5 score 0.8463204005006257
lambda 0.5 score 0.83159887359199
lambda 0.5 score 0.9235951188986232
lambda 1 score 0.8306633291614518




lambda 1 score 0.8637077596996245
lambda 1 score 0.8316989987484356
lambda 1 score 0.8115425531914894
lambda 1 score 0.8568022528160201
lambda 10 score 0.8383041301627033
lambda 10 score 0.8440331664580726
lambda 10 score 0.8714643304130163




lambda 10 score 0.8468053817271589
lambda 10 score 0.782543804755945
lambda 100 score 0.8398998748435544
lambda 100 score 0.8456101376720901
lambda 100 score 0.8479411764705882
lambda 100 score 0.8638297872340426




lambda 100 score 0.8273466833541927
lambda 500 score 0.8566958698372966
lambda 500 score 0.8312515644555695
lambda 500 score 0.8129349186483104




lambda 500 score 0.851886733416771
lambda 500 score 0.8437202753441803
lambda 1000 score 0.9076908635794744




lambda 1000 score 0.8570025031289111
lambda 1000 score 0.8152690863579475
lambda 1000 score 0.8221339173967459
lambda 1000 score 0.838279098873592




lambda 5000 score 0.852174593241552
lambda 5000 score 0.8337515644555694
lambda 5000 score 0.8505256570713392
lambda 5000 score 0.8504630788485606
lambda 5000 score 0.8913297872340425
lambda 0 score 0.784427409261577




lambda 0 score 0.8055475594493117
lambda 0 score 0.7695588235294117
lambda 0 score 0.767625156445557
lambda 0 score 0.7705600750938674
lambda 0.001 score 0.79090112640801




lambda 0.001 score 0.7851783479349187
lambda 0.001 score 0.761232790988736
lambda 0.001 score 0.8085544430538173
lambda 0.001 score 0.7783416770963705




lambda 0.5 score 0.793526282853567
lambda 0.5 score 0.7842334167709637
lambda 0.5 score 0.8006727158948685
lambda 0.5 score 0.7863172715894868
lambda 0.5 score 0.7848560700876095
lambda 0.1 score 0.7953191489361702




lambda 0.1 score 0.8033886107634544
lambda 0.1 score 0.7823779724655819
lambda 0.1 score 0.8342740926157697
lambda 0.1 score 0.8110763454317897




lambda 0.5 score 0.7875125156445557
lambda 0.5 score 0.8102847309136421
lambda 0.5 score 0.7964674593241552
lambda 0.5 score 0.7760951188986233
lambda 0.5 score 0.7926126408010012




lambda 1 score 0.7952972465581978
lambda 1 score 0.7890738423028786
lambda 1 score 0.7862077596996245
lambda 1 score 0.7830506883604506
lambda 1 score 0.7992928660826033




lambda 10 score 0.7937140175219024
lambda 10 score 0.8116520650813517
lambda 10 score 0.8017740926157697
lambda 10 score 0.7872371714643304
lambda 10 score 0.7913610763454317
lambda 100 score 0.8071057571964956




lambda 100 score 0.8049655819774718
lambda 100 score 0.7870212765957447
lambda 100 score 0.798854818523154
lambda 100 score 0.8072090112640801
lambda 500 score 0.8094461827284105
lambda 500 score 0.8205538172715895




lambda 500 score 0.7993742177722153
lambda 500 score 0.8045588235294118
lambda 500 score 0.7974342928660826




lambda 1000 score 0.793854818523154
lambda 1000 score 0.7804974968710888
lambda 1000 score 0.8092334167709637
lambda 1000 score 0.8006789737171465
lambda 1000 score 0.8071370463078849
lambda 5000 score 0.7817365456821026




lambda 5000 score 0.7800938673341677
lambda 5000 score 0.808116395494368
lambda 5000 score 0.7953754693366708
lambda 5000 score 0.7916958698372967




lambda 0 score 0.7646652065081352
lambda 0 score 0.7532916145181476
lambda 0 score 0.756842928660826
lambda 0 score 0.7367772215269086
lambda 0 score 0.7896182728410513
lambda 0.001 score 0.7686420525657072




lambda 0.001 score 0.7515550688360451
lambda 0.001 score 0.7484637046307885
lambda 0.001 score 0.7551032540675845
lambda 0.001 score 0.7529161451814769




lambda 0.5 score 0.7736076345431789
lambda 0.5 score 0.7946401752190237
lambda 0.5 score 0.7847121401752191
lambda 0.5 score 0.7815926157697122
lambda 0.5 score 0.7776533166458073
lambda 0.1 score 0.7614236545682103




lambda 0.1 score 0.769874843554443
lambda 0.1 score 0.7679192740926157
lambda 0.1 score 0.7847747183979975
lambda 0.1 score 0.7871433041301628




lambda 0.5 score 0.7649655819774719
lambda 0.5 score 0.7684198998748436
lambda 0.5 score 0.7696464330413016
lambda 0.5 score 0.7667897371714644
lambda 0.5 score 0.7618272841051315
lambda 1 score 0.7825782227784731




lambda 1 score 0.785409887359199
lambda 1 score 0.7725187734668335
lambda 1 score 0.7610575719649562
lambda 1 score 0.7868272841051314
lambda 10 score 0.7831445556946183




lambda 10 score 0.7812515644555694
lambda 10 score 0.7956289111389236
lambda 10 score 0.7790362953692115
lambda 10 score 0.787787859824781




lambda 100 score 0.7821182728410513
lambda 100 score 0.7725312891113892
lambda 100 score 0.7843147684605757
lambda 100 score 0.7804036295369211
lambda 100 score 0.771229662077597




lambda 500 score 0.7850438047559449
lambda 500 score 0.8060012515644556
lambda 500 score 0.7727878598247809
lambda 500 score 0.7741864831038798
lambda 500 score 0.7825719649561953
lambda 1000 score 0.7824405506883605




lambda 1000 score 0.7892459324155194
lambda 1000 score 0.8099530663329162
lambda 1000 score 0.7778097622027534
lambda 1000 score 0.7871057571964957




lambda 5000 score 0.7698247809762203
lambda 5000 score 0.7758260325406758
lambda 5000 score 0.7714549436795995
lambda 5000 score 0.8030632040050063
lambda 5000 score 0.8086983729662077
