# Flash Evaluation on CamFow Dataset:

This notebook is dedicated to evaluating Flash on the 5G CamFlow datasets, which are graph-level in nature. We employ Flash in graph-level detection mode to analyze this dataset effectively. Upon completion of the notebook execution, the results will be presented.

## Dataset Access: 
- This dataset will be publically available upon publishing

## Data Parsing and Execution:
- Utilize the parser included in this notebook to process the downloaded files.
- To obtain the evaluation results, execute all cells within this notebook.

## Model Training and Execution Flexibility:
- By default, the notebook operates using pre-trained model weights.
- Additionally, this notebook offers the flexibility to set parameters for training Graph Neural Networks (GNNs) and word2vec models from scratch.
- You can then utilize these freshly trained models to conduct the evaluation. 

Follow these guidelines for a thorough and efficient analysis of the Unicorn datasets using Flash.


In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import torch
from torch_geometric.data import Data
import torch.nn.functional as F
import json
import warnings
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
warnings.filterwarnings('ignore')
from torch_geometric.loader import NeighborLoader
import multiprocessing
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
%matplotlib inline

In [2]:
from pprint import pprint
import gzip
from sklearn.manifold import TSNE
import json
import copy
import os
from tqdm import tqdm, trange

In [3]:
Train_Gnn = False
Train_Word2vec = False
Parse_data = False
Disable_tqdm = True

In [4]:
from tqdm import tqdm 
def show(str):
	print (str + ' ' + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())))
def prepare_graph(df):
    def process_node(node, action, node_dict, label_dict, dummies, node_type):
        node_dict.setdefault(node, []).append(action)
        label_dict[node] = dummies.get(getattr(row, node_type), -1)  

    nodes = {}
    labels = {}
    edges = []
    # dummies = {
    #     "7998762093665332071": 0,
    #     "14709879154498484854": 1,
    #     "10991425273196493354": 2,
    #     "14871526952859113360": 3,
    #     "8771628573506871447": 4,
    #     "7877121489144997480": 5,
    #     "17841021884467483934": 6,
    #     "7895447931126725167": 7,
    #     "15125250455093594050": 8,
    #     "8664433583651064836": 9,
    #     "14377490526132269506": 10,
    #     "15554536683409451879": 11,
    #     "8204541918505434145": 12,
    #     "14356114695140920775": 13
    # }
    
    dummies = {'address': 0,
        'argv': 1,
        'block': 2,
        'file': 3,
        'iattr': 4,
        'link': 5,
        'path': 6,
        'pipe': 7,
        'process_memory': 8,
        'socket': 9,
        'task': 10,
        'xattr': 11}
    
    # dummies = {'block': 0,
    #     'file': 1,
    #     'iattr': 2,
    #     'link': 3,
    #     'path': 4,
    #     'pipe': 5,
    #     'process_memory': 6,
    #     'socket': 7,
    #     'task': 8}

    for row in df.itertuples():
        process_node(row.actorID, row.action, nodes, labels, dummies, 'actor_type')
        process_node(row.objectID, row.action, nodes, labels, dummies, 'object')

        edges.append((row.actorID, row.objectID))

    features = [nodes[node] for node in tqdm(nodes, disable=Disable_tqdm)]
    feat_labels = [labels[node] for node in tqdm(nodes, disable=Disable_tqdm)]
    edge_index = [[], []]
    node_index_map = {node: i for i, node in enumerate(nodes.keys())}
    for src, dst in tqdm(edges, disable=Disable_tqdm):
        src_index = node_index_map[src]
        dst_index = node_index_map[dst]
        edge_index[0].append(src_index)
        edge_index[1].append(dst_index)

    return features, feat_labels, edge_index, list(nodes.keys())


In [5]:
from torch_geometric.nn import GCNConv
from torch_geometric.nn import SAGEConv, GATConv
import torch.nn.functional as F
import torch.nn as nn

class GCN(torch.nn.Module):
    def __init__(self,in_channel,out_channel):
        super().__init__()
        self.conv1 = SAGEConv(in_channel, 32, normalize=True)
        self.conv2 = SAGEConv(32, 20, normalize=True)
        self.linear = nn.Linear(in_features=20,out_features=out_channel)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)

        x = self.conv2(x, edge_index)
        x = self.linear(x)
        return F.softmax(x, dim=1)

In [6]:
from gensim.models.callbacks import CallbackAny2Vec
import gensim
from gensim.models import Word2Vec
from multiprocessing import Pool
from itertools import compress
from tqdm import tqdm


class EpochSaver(CallbackAny2Vec):
    '''Callback to save model after each epoch.'''

    def __init__(self):
        self.epoch = 0

    def on_epoch_end(self, model):
        model.save('trained_weights/5gcamflow/5gcamflow.model')
        self.epoch += 1

In [7]:
class EpochLogger(CallbackAny2Vec):
    '''Callback to log information about training'''

    def __init__(self):
        self.epoch = 0

    def on_epoch_begin(self, model):
        print("Epoch #{} start".format(self.epoch))

    def on_epoch_end(self, model):
        print("Epoch #{} end".format(self.epoch))
        self.epoch += 1

In [8]:
logger = EpochLogger()
saver = EpochSaver()
date = '30-04-2024'

In [9]:
if Train_Word2vec:
    comb_data = []
    for i in tqdm(range(100)):
        f = open(f"5gcamflow/{date}/{i}.txt")
        data = f.read().split('\n')
        data = [line.split('\t') for line in data]
        comb_data = comb_data + data

    df = pd.DataFrame (comb_data, columns = ['actorID', 'actor_type','objectID','object','action','timestamp'])
    df.sort_values(by='timestamp', ascending=True,inplace=True)
    df = df.dropna()
    phrases,labels,edges,mapp = prepare_graph(df)
    


In [10]:
if Train_Word2vec:
        word2vec = Word2Vec(sentences=phrases, vector_size=30, window=5, min_count=1, workers=32,epochs=300,callbacks=[saver,logger])

In [11]:
from sklearn.utils import class_weight
import torch.nn.functional as F
from torch.nn import CrossEntropyLoss

model = GCN(30,14).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

In [12]:
from collections import Counter
import math

class PositionalEncoder:

    def __init__(self, d_model, max_len=100000, device='cuda' if torch.cuda.is_available() else 'cpu'):
        self.device = device
        position = torch.arange(max_len, device=device).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2, device=device) * (-math.log(10000.0) / d_model))
        self.pe = torch.zeros(max_len, d_model, device=device)
        self.pe[:, 0::2] = torch.sin(position * div_term)
        self.pe[:, 1::2] = torch.cos(position * div_term)

    def embed(self, x):
        return x + self.pe[:x.size(0)]

def infer(document):
    word_embeddings = [w2vmodel.wv[word] for word in document if word in  w2vmodel.wv]
    
    if not word_embeddings:
        return np.zeros(30)

    output_embedding = torch.tensor(word_embeddings, dtype=torch.float, device=device)
    if len(document) < 100000:
        output_embedding = encoder.embed(output_embedding)

    output_embedding = output_embedding.detach().cpu().numpy()
    return np.mean(output_embedding, axis=0)

encoder = PositionalEncoder(30)
w2vmodel = Word2Vec.load("trained_weights/5gcamflow/5gcamflow.model")

In [13]:
train_stop = 250
val_start = train_stop
val_size = 50
val_stop = val_start + val_size
ben_start = val_stop + 1
ben_size = 55
ben_stop = ben_start + ben_size
mal_start = 401
mal_size = 3
mal_stop = mal_start + mal_size


In [14]:
from torch_geometric import utils

################################## Training Main Model #####################################
if Train_Gnn:
    for i in trange(train_stop):
        f = open(f"5gcamflow/{date}/{i}.txt")
        data = f.read().split('\n')

        data = [line.split('\t') for line in data]
        df = pd.DataFrame (data, columns = ['actorID', 'actor_type','objectID','object','action','timestamp'])
        df.sort_values(by='timestamp', ascending=True,inplace=True)
        df = df.dropna()
        phrases,labels,edges,mapp = prepare_graph(df)

        criterion = CrossEntropyLoss()

        nodes = [infer(x) for x in tqdm(phrases, desc='Inferring Phrases', disable=Disable_tqdm)]
        nodes = np.array(nodes)  

        graph = Data(x=torch.tensor(nodes,dtype=torch.float).to(device),y=torch.tensor(labels,dtype=torch.long).to(device), edge_index=torch.tensor(edges,dtype=torch.long).to(device))
        graph.n_id = torch.arange(graph.num_nodes)
        mask = torch.tensor([True]*graph.num_nodes, dtype=torch.bool)

        for m_n in range(20):
            loader = NeighborLoader(graph, num_neighbors=[-1,-1], batch_size=5000,input_nodes=mask)
            total_loss = 0
            for subg in loader:
                subg.to(device)
                model.train()
                optimizer.zero_grad() 
                out = model(subg.x, subg.edge_index) 
                loss = criterion(out, subg.y) 
                loss.backward() 
                optimizer.step()      
                total_loss += loss.item() * subg.batch_size

            loader = NeighborLoader(graph, num_neighbors=[-1,-1], batch_size=5000,input_nodes=mask)
            for subg in loader:
                subg.to(device)
                model.eval()
                out = model(subg.x, subg.edge_index)
                sorted, indices = out.sort(dim=1,descending=True)
                conf = (sorted[:,0] - sorted[:,1]) / sorted[:,0]
                conf = (conf - conf.min()) / conf.max()
                pred = indices[:,0]
                cond = (pred == subg.y)
                mask[subg.n_id[cond]] = False

            print(f'Model# {m_n}. {mask.sum().item()} nodes still misclassified out of {mask.size()} ')
            torch.save(model.state_dict(), f'trained_weights/5gcamflow/5gcamflow{m_n}.pth')

### Validation

In [15]:
from torch.utils.data import Dataset, DataLoader
class PhraseDataset(Dataset):
    def __init__(self, phrases):
        self.phrases = phrases

    def __len__(self):
        return len(self.phrases)

    def __getitem__(self, idx):
        return self.phrases[idx]

def batch_infer(phrases):
    return [infer(phrase) for phrase in phrases] 

In [16]:
from tqdm import tqdm

scores = []
for i in trange(val_start,val_stop):
    print(f"Graph #: {i}")
    f = open(f"5gcamflow/{date}/{i}.txt")
    data = f.read().split('\n')

    data = [line.split('\t') for line in data]
    df = pd.DataFrame (data, columns = ['actorID', 'actor_type','objectID','object','action','timestamp'])
    df.sort_values(by='timestamp', ascending=True,inplace=True)
    df = df.dropna()

    # print("Preparing Graph")
    phrases,labels,edges,mapp = prepare_graph(df)
    # print("Done")
    # print("Inferring words")
    # dataset = PhraseDataset(phrases)
    # loader = DataLoader(dataset, batch_size=10, num_workers=4)
    # nodes = []
    # for batch in tqdm(loader):
    #     batch_embeddings = batch_infer(batch)
    #     nodes.extend(batch_embeddings)
    nodes = [infer(x) for x in tqdm(phrases, disable=Disable_tqdm)]
    nodes = np.array(nodes)  
    # print("Done!")

    graph = Data(x=torch.tensor(nodes,dtype=torch.float).to(device),y=torch.tensor(labels,dtype=torch.long).to(device), edge_index=torch.tensor(edges,dtype=torch.long).to(device))
    graph.n_id = torch.arange(graph.num_nodes).to(device)
    flag = torch.tensor([True]*graph.num_nodes, dtype=torch.bool).to(device)
    # print("Evaluating...")
    for m_n in tqdm(range(20), disable=Disable_tqdm):
        model.load_state_dict(torch.load(f'trained_weights/5gcamflow/5gcamflow{m_n}.pth'))
        model.eval()
        out = model(graph.x, graph.edge_index)

        sorted, indices = out.sort(dim=1,descending=True)
        conf = (sorted[:,0] - sorted[:,1]) / sorted[:,0]
        conf = (conf - conf.min()) / conf.max()

        pred = indices[:,0]
        cond = (pred == graph.y).to(device)
        falses = torch.tensor([False] * len(flag[graph.n_id[cond]]), dtype=torch.bool).to(device)
        flag[graph.n_id[cond]] = torch.logical_and(flag[graph.n_id[cond]], falses)
    # print("Done!")
            
    print(flag.sum().item(), (flag.sum().item() / len(flag))*100)
    scores.append(flag.sum().item())

  0%|          | 0/50 [00:00<?, ?it/s]

Graph #: 250


  2%|▏         | 1/50 [00:03<02:40,  3.27s/it]

### Testing

In [54]:
from math import ceil
thresh = 110
scores.sort()
print(scores)
print(len(scores))
avg = sum(scores) / len(scores)
print(avg)

thresh = scores[ceil(len(scores)*0.9)]
print(thresh)

[444, 457, 486, 499, 504, 515, 516, 518, 521, 522, 523, 541, 544, 547, 556, 561, 562, 569, 574, 607, 612, 618, 628, 628, 640, 645, 670, 682, 686, 687, 693, 697, 698, 719, 726, 733, 734, 736, 760, 761, 769, 776, 778, 828, 834, 836, 840, 882, 899, 945]
50
654.12
836


In [55]:
# thresh = 400
correct_benign = 0

for i in trange(ben_start ,ben_stop):
    print(f"Graph #: {i}")
    f = open(f"5gcamflow/{date}/{i}.txt")
    data = f.read().split('\n')

    data = [line.split('\t') for line in data]
    df = pd.DataFrame (data, columns = ['actorID', 'actor_type','objectID','object','action','timestamp'])
    df.sort_values(by='timestamp', ascending=True,inplace=True)
    df = df.dropna()

    phrases,labels,edges,mapp = prepare_graph(df)

    nodes = [infer(x) for x in tqdm(phrases, disable=Disable_tqdm)]
    nodes = np.array(nodes)  

    graph = Data(x=torch.tensor(nodes,dtype=torch.float).to(device),y=torch.tensor(labels,dtype=torch.long).to(device), edge_index=torch.tensor(edges,dtype=torch.long).to(device))
    graph.n_id = torch.arange(graph.num_nodes).to(device)
    flag = torch.tensor([True]*graph.num_nodes, dtype=torch.bool).to(device)

    for m_n in range(20):
        model.load_state_dict(torch.load(f'trained_weights/5gcamflow/5gcamflow{m_n}.pth'))
        model.eval()
        out = model(graph.x, graph.edge_index)

        sorted, indices = out.sort(dim=1,descending=True)
        conf = (sorted[:,0] - sorted[:,1]) / sorted[:,0]
        conf = (conf - conf.min()) / conf.max()

        pred = indices[:,0]
        cond = (pred == graph.y).to(device)
        falses = torch.tensor([False]*len(flag[graph.n_id[cond]]), dtype=torch.bool).to(device)
        flag[graph.n_id[cond]] = torch.logical_and(flag[graph.n_id[cond]], falses)

    if flag.sum().item() <= thresh:
        correct_benign = correct_benign + 1
            
    print(flag.sum().item(), (flag.sum().item() / len(flag))*100, correct_benign, i)
print(correct_benign)

  0%|          | 0/55 [00:00<?, ?it/s]

Graph #: 301


  2%|▏         | 1/55 [00:14<12:48, 14.23s/it]

688 0.5557395455536798 1 301
Graph #: 302


  4%|▎         | 2/55 [00:28<12:36, 14.27s/it]

980 0.7886054558622355 1 302
Graph #: 303


  5%|▌         | 3/55 [00:43<12:34, 14.52s/it]

452 0.3654001616814875 2 303
Graph #: 304


  7%|▋         | 4/55 [00:58<12:26, 14.63s/it]

509 0.40886818218330795 3 304
Graph #: 305


  9%|▉         | 5/55 [01:12<12:05, 14.50s/it]

821 0.6548198248496546 4 305
Graph #: 306


 11%|█         | 6/55 [01:26<11:45, 14.39s/it]

643 0.5137177827844622 5 306
Graph #: 307


 13%|█▎        | 7/55 [01:40<11:29, 14.37s/it]

442 0.3584345654183629 6 307
Graph #: 308


 15%|█▍        | 8/55 [01:55<11:19, 14.46s/it]

520 0.4203005148681307 7 308
Graph #: 309


 16%|█▋        | 9/55 [02:09<10:59, 14.33s/it]

914 0.7419855012461135 7 309
Graph #: 310


 18%|█▊        | 10/55 [02:24<10:46, 14.37s/it]

480 0.39535784002833396 8 310
Graph #: 311


 20%|██        | 11/55 [02:38<10:30, 14.32s/it]

600 0.4774637127578304 9 311
Graph #: 312


 22%|██▏       | 12/55 [02:52<10:12, 14.23s/it]

654 0.5266760620092612 10 312
Graph #: 313


 24%|██▎       | 13/55 [03:06<09:59, 14.27s/it]

715 0.5787739705513328 11 313
Graph #: 314


 25%|██▌       | 14/55 [03:21<09:48, 14.35s/it]

505 0.40334174627008723 12 314
Graph #: 315


 27%|██▋       | 15/55 [03:35<09:29, 14.23s/it]

680 0.5539534353259364 13 315
Graph #: 316


 29%|██▉       | 16/55 [03:49<09:19, 14.35s/it]

841 0.67985966273787 13 316
Graph #: 317


 31%|███       | 17/55 [04:04<09:04, 14.32s/it]

512 0.4106479736287004 14 317
Graph #: 318


 33%|███▎      | 18/55 [04:18<08:46, 14.23s/it]

626 0.49999201290714207 15 318
Graph #: 319


 35%|███▍      | 19/55 [04:32<08:33, 14.28s/it]

660 0.5257372269750992 16 319
Graph #: 320


 36%|███▋      | 20/55 [04:46<08:21, 14.34s/it]

787 0.6360418316711656 17 320
Graph #: 321


 38%|███▊      | 21/55 [05:00<08:04, 14.25s/it]

541 0.4419573564251287 18 321
Graph #: 322


 40%|████      | 22/55 [05:15<07:52, 14.31s/it]

466 0.37222231097336933 19 322
Graph #: 323


 42%|████▏     | 23/55 [05:29<07:39, 14.35s/it]

747 0.6029299003188183 20 323
Graph #: 324


 44%|████▎     | 24/55 [05:44<07:26, 14.41s/it]

680 0.5518360722256035 21 324
Graph #: 325


 45%|████▌     | 25/55 [05:58<07:10, 14.35s/it]

484 0.38752241865231873 22 325
Graph #: 326


 47%|████▋     | 26/55 [06:13<06:58, 14.42s/it]

754 0.6105559784281017 23 326
Graph #: 327


 49%|████▉     | 27/55 [06:27<06:43, 14.40s/it]

813 0.6507488013575276 24 327
Graph #: 328


 51%|█████     | 28/55 [06:41<06:28, 14.40s/it]

498 0.40494389331598635 25 328
Graph #: 329


 53%|█████▎    | 29/55 [06:55<06:11, 14.28s/it]

652 0.532444836428373 26 329
Graph #: 330


 55%|█████▍    | 30/55 [07:10<06:00, 14.44s/it]

798 0.6343099693178386 27 330
Graph #: 331


 56%|█████▋    | 31/55 [07:24<05:43, 14.33s/it]

738 0.5942507448264756 28 331
Graph #: 332


 58%|█████▊    | 32/55 [07:38<05:27, 14.22s/it]

548 0.44532208651275423 29 332
Graph #: 333


 60%|██████    | 33/55 [07:53<05:15, 14.33s/it]

648 0.508139644302249 30 333
Graph #: 334


 62%|██████▏   | 34/55 [08:07<05:02, 14.39s/it]

867 0.702098196571299 30 334
Graph #: 335


 64%|██████▎   | 35/55 [08:21<04:45, 14.28s/it]

587 0.4793127944670809 31 335
Graph #: 336


 65%|██████▌   | 36/55 [08:36<04:33, 14.38s/it]

640 0.517464424320828 32 336
Graph #: 337


 67%|██████▋   | 37/55 [08:50<04:17, 14.33s/it]

817 0.6536679814700729 33 337
Graph #: 338


 69%|██████▉   | 38/55 [09:05<04:03, 14.34s/it]

750 0.6016026695115788 34 338
Graph #: 339


 71%|███████   | 39/55 [09:19<03:49, 14.37s/it]

584 0.4729395949239977 35 339
Graph #: 340


 73%|███████▎  | 40/55 [09:34<03:38, 14.58s/it]

545 0.4394417074527701 36 340
Graph #: 341


 75%|███████▍  | 41/55 [09:48<03:21, 14.43s/it]

806 0.6470465456063452 37 341
Graph #: 342


 76%|███████▋  | 42/55 [10:03<03:07, 14.44s/it]

528 0.4263669178032414 38 342
Graph #: 343


 78%|███████▊  | 43/55 [10:17<02:52, 14.35s/it]

652 0.521453992881993 39 343
Graph #: 344


 80%|████████  | 44/55 [10:31<02:38, 14.41s/it]

814 0.6503519410688463 40 344
Graph #: 345


 82%|████████▏ | 45/55 [10:45<02:22, 14.25s/it]

802 0.6483322824206561 41 345
Graph #: 346


 84%|████████▎ | 46/55 [10:59<02:07, 14.12s/it]

472 0.38624572429256476 42 346
Graph #: 347


 85%|████████▌ | 47/55 [11:14<01:53, 14.23s/it]

610 0.4844999722008213 43 347
Graph #: 348


 87%|████████▋ | 48/55 [11:28<01:39, 14.27s/it]

835 0.6738272581282935 44 348
Graph #: 349


 89%|████████▉ | 49/55 [11:42<01:24, 14.16s/it]

545 0.443890599293033 45 349
Graph #: 350


 91%|█████████ | 50/55 [11:56<01:10, 14.17s/it]

473 0.3856533685557974 46 350
Graph #: 351


 93%|█████████▎| 51/55 [12:10<00:56, 14.05s/it]

702 0.5716938261953043 47 351
Graph #: 352


 95%|█████████▍| 52/55 [12:24<00:42, 14.13s/it]

802 0.6408310027966441 48 352
Graph #: 353


 96%|█████████▋| 53/55 [12:38<00:28, 14.17s/it]

582 0.4686707306270686 49 353
Graph #: 354


 98%|█████████▊| 54/55 [12:52<00:14, 14.13s/it]

592 0.49187410681644456 50 354
Graph #: 355


100%|██████████| 55/55 [13:06<00:00, 14.30s/it]

766 0.6231492629592268 51 355
51





In [56]:
correct_attack = 0

for i in trange(mal_start,mal_stop):
    print(f"Graph #: {i}")
    f = open(f"5gcamflow/{date}/{i}.txt")
    data = f.read().split('\n')

    data = [line.split('\t') for line in data]
    df = pd.DataFrame (data, columns = ['actorID', 'actor_type','objectID','object','action','timestamp'])
    df.sort_values(by='timestamp', ascending=True,inplace=True)
    df = df.dropna()
    
    phrases,labels,edges,mapp = prepare_graph(df)

    nodes = [infer(x) for x in phrases]
    nodes = np.array(nodes)  
    
    graph = Data(x=torch.tensor(nodes,dtype=torch.float).to(device),y=torch.tensor(labels,dtype=torch.long).to(device), edge_index=torch.tensor(edges,dtype=torch.long).to(device))
    graph.n_id = torch.arange(graph.num_nodes).to(device)
    flag = torch.tensor([True]*graph.num_nodes, dtype=torch.bool).to(device)

    for m_n in range(20):
        model.load_state_dict(torch.load(f'trained_weights/5gcamflow/5gcamflow{m_n}.pth'))
        model.eval()
        out = model(graph.x, graph.edge_index)

        sorted, indices = out.sort(dim=1,descending=True)
        conf = (sorted[:,0] - sorted[:,1]) / sorted[:,0]
        conf = (conf - conf.min()) / conf.max()

        pred = indices[:,0]
        cond = (pred == graph.y).to(device)
        falses = torch.tensor([False]*len(flag[graph.n_id[cond]]), dtype=torch.bool).to(device)
        flag[graph.n_id[cond]] = torch.logical_and(flag[graph.n_id[cond]], falses)

    if  flag.sum().item() > thresh:
        correct_attack = correct_attack + 1
   
    print(flag.sum().item(), (flag.sum().item() / len(flag))*100, correct_attack, i)

  0%|          | 0/3 [00:00<?, ?it/s]

Graph #: 401


 33%|███▎      | 1/3 [00:13<00:27, 13.54s/it]

1155 0.9822430860291868 1 401
Graph #: 402


 67%|██████▋   | 2/3 [00:27<00:13, 13.85s/it]

1311 1.0594966784657904 2 402
Graph #: 403


100%|██████████| 3/3 [00:38<00:00, 12.91s/it]

1105 1.1380137797505638 3 403





In [57]:
TP = correct_attack
FP = ben_size - correct_benign
TN = correct_benign
FN = mal_size - correct_attack

FPR = FP / (FP + TN) if (FP + TN) > 0 else 0
TPR = TP / (TP + FN) if (TP + FN) > 0 else 0

print(f"Number of True Positives (TP): {TP}")
print(f"Number of False Positives (FP): {FP}")
print(f"Number of False Negatives (FN): {FN}")
print(f"Number of True Negatives (TN): {TN}\n")

precision = TP / (TP + FP) if (TP + FP) > 0 else 0
recall = TPR  
accuracy = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

fscore = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
print(f"Fscore: {fscore}\n")

Number of True Positives (TP): 3
Number of False Positives (FP): 4
Number of False Negatives (FN): 0
Number of True Negatives (TN): 51

Precision: 0.42857142857142855
Recall: 1.0
Fscore: 0.6

