In [1]:
import time
import numpy as np
import pandas as pd
import math
from dataclasses import dataclass
import lightgbm as lgb

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score, roc_curve, confusion_matrix
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ParameterGrid
from imblearn.over_sampling import SMOTE

## Functions

In [2]:
def prepare_data(df, known_classes, target, random_state=42, offline_ratio=0.8, offline_size=None, online_size=None, stratify=False, resample_smote=False, scaling=False):
    if random_state is not None:
        np.random.seed(random_state)
        
    if offline_size is None and online_size is None:
        offline_size = math.floor(offline_ratio * df.shape[0])
        online_size = df.shape[0] - offline_size
        

    # Move target column to last position
    df[target] = df.pop(target)

    # Shuffle the dataframe
    df = shuffle(df, random_state=random_state)
    
    df.reset_index(inplace=True, drop=True) # Reset index to keep track of the order of the samples

    # Create the offline dataframe using known classes
    known_df = df.loc[df[target].isin(known_classes)]
    unknown_df = df.loc[df.index.difference(known_df.index)]

    if math.floor(offline_size) > known_df.shape[0]:
        print(f"Not enough samples of known classes: {known_df.shape[0]} need {math.floor(offline_size)}", flush=True)
        offline_size = known_df.shape[0]

    X_all = known_df.drop([target], axis=1)
    y_all = known_df[target]
    
    # Check if stratify is required
    if stratify:
        offline_df_x, rest_df_x, offline_df_y, rest_df_y = train_test_split(X_all, y_all, train_size=offline_size, stratify=known_df[target])
        
    else:
        offline_df_x = X_all.iloc[:offline_size, :] # Take offline_size first samples
        rest_df_x = X_all.iloc[offline_size:, :]

        offline_df_y = y_all.iloc[:offline_size]
        rest_df_y = y_all.iloc[offline_size:]


    # Check if SMOTE is enabled
    if resample_smote:
        oversampler = SMOTE(random_state=random_state)
        offline_df_x, offline_df_y = oversampler.fit_resample(offline_df_x, offline_df_y)
    
    offline_df = pd.concat([offline_df_x, offline_df_y], axis=1)
    rest_df = pd.concat([rest_df_x, rest_df_y], axis=1)

    rest_df = pd.concat([rest_df, unknown_df])

    # Split into X and y
    X_offline = offline_df.drop(target, axis=1)
    X_online = rest_df.drop(target, axis=1)
    y_offline = offline_df[target]
    y_online = rest_df[target]

    # Check if scaling is enabled
    if scaling:
        scaler = StandardScaler()
        X_offline = scaler.fit_transform(X_offline)
        X_online = scaler.transform(X_online)

    return X_offline, X_online, y_offline, y_online

def predict_proba_from_models(models, X):
    all_preds = []
    all_proba = []
    for model in models:
        # Compute the predictions of the model
        proba = model.clf.predict_proba(X)
        y_pred = (proba[:,1] >= model.min_confidence).astype(bool)
        
        proba = [row[1] if index else row[0] for row, index in zip(proba, y_pred)]
        
        all_preds.append(np.array(y_pred))
        all_proba.append(np.array(proba))
    
    
    all_preds = np.array(all_preds)
    all_proba = np.array(all_proba)

    return all_preds, all_proba
    
def one_class_predict(models, X, meta_clf, meta_classifier_mode):
    all_preds, all_proba = predict_proba_from_models(models, X)

    if meta_clf is None:
        y_pred = np.full(all_preds.shape[1], -1)
        for i in range(all_preds.shape[1]):
            true_indices = np.where(all_preds[:, i])[0]  # Indices where the label is True
            if len(true_indices) > 0:
                # Select the index with the highest confidence
                best_index = true_indices[np.argmax(all_proba[true_indices, i])]
                y_pred[i] = known_labels[best_index]
    else:
        all_proba_t = np.transpose(all_proba)
        
        if meta_classifier_mode == 1:
            y_pred = meta_clf.predict(all_proba_t)
        else:
            y_pred = meta_clf.predict(np.hstack((X, all_proba_t)))

        # Add unknown predictions
        indices = np.where(~np.transpose(all_preds).any(axis=1))[0] # Find the indices of the rows where all columns are False
        y_pred[indices] = -1
        
    return y_pred

def one_class_test(clf_type, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label, use_min_conf=0, resample_smote=0, count_wrong_labels=True, filter_unknown=0, tpr_ratio=100, meta_classifier=0, **kwargs):
    if isinstance(y_offline, pd.Series):
        y_offline = y_offline.to_numpy()
        
    benign_label = benign_label
    starting_time = time.time()
    models = []
    
    if resample_smote > 0:
        X_offline_original = X_offline.copy()
        y_offline_original = y_offline.copy()

    
    for i, label in enumerate(known_labels):
        # Check if SMOTE resampling is enabled
        if resample_smote > 0:
            oversample = SMOTE(random_state=42)
            y = (y_offline_original == label).astype(int)
            
            if label == 0:
                y = ~y.astype(bool)
            else:
                y[y == 1] = label
                
            X_offline, y_offline = oversample.fit_resample(X_offline_original, y)
            
        

        clf = clf_type(**kwargs)
        clf.fit(X_offline, y_offline == label)        
            
        # use_min_conf legend
        # 0 : Don't use
        # 1 : Old method, use the mean average confidence accross all classes
        # 2 : Use the average confidence of the class that the binary classifier is targeting
        # 3 : Use the ROC curve to find the threshold with minimum FPR, maximum TPR, but heavily favors minimizing the FPR
        if use_min_conf == 0:
            min_confidence = 0.5
        elif use_min_conf == 1:
            min_confidence = np.mean(np.amax(clf.predict_proba(X_offline), axis=1))
        elif use_min_conf == 2:
            min_confidence = np.mean(np.amax(clf.predict_proba(X_offline)[y_offline==label], axis=1))
        elif use_min_conf == 3:
            fpr, tpr, thresholds = roc_curve(y_offline==label, clf.predict_proba(X_offline)[:, 1])
            youden_j = tpr + (tpr_ratio * (1 - fpr))
            min_confidence = thresholds[np.argmax(youden_j)]

        print(f'Finished training label {label}')

        models.append(Model(label, clf, min_confidence))

    # Building the meta-classifier if needed
    # meta_classifier legend
    # 0 : Don't use
    # 1 : Use the probabilities
    # 2 : Use the probabilities and inputs
    meta_clf = None
    if meta_classifier > 0:
        _, all_proba = predict_proba_from_models(models, X_offline)
        
        # Invert rows and columns
        all_proba = np.transpose(all_proba)
        meta_clf = clf_type(**kwargs)
        
        if meta_classifier == 1:    
            meta_clf.fit(all_proba, y_offline)
            
        elif meta_classifier == 2:
            meta_clf.fit(np.hstack((X_offline, all_proba)), y_offline)
    
    # Predicting the final values
    final_labels = one_class_predict(models, X_online, meta_clf, meta_classifier)

    # Building the filter classifier if needed
    # filter_unknown legend
    # 0 : Don't use
    # 1 : Build a classifier on outputs that have been misclassified as attacks but are actually benign
    # 2 : Build a meta-classifier using the probabilities of the binary classifiers 
    if filter_unknown == 1:
        # Constructing a classifier on the unknown outputs
        offline_labels = one_class_predict(models, X_offline, meta_clf, meta_classifier)
        unknown_indexes = np.where(offline_labels == -1)[0]
    
        filter_clf = clf_type(**kwargs)
        filter_clf.fit(X_offline[unknown_indexes], y_offline[unknown_indexes] == benign_label)
        
    
        # Filtering the values
        filter_labels = filter_clf.predict(X_online[final_labels == -1])
        indices = np.where(final_labels == -1)[0]
    
        # Replace -1 with the benign label at those indices where corresponding filter_labels value is True
        final_labels[indices[filter_labels]] = benign_label

    
    elif filter_unknown == 2:
        # Constructing a classifier on the unknown outputs
        _, all_proba = predict_proba_from_models(models, X_offline)
        offline_labels = one_class_predict(models, X_offline, meta_clf, meta_classifier)

        # Invert rows and columns
        all_proba = np.transpose(all_proba)

        # Get the indexes where the label is unknown
        unknown_indexes = np.where(offline_labels == -1)[0]
        unknown_proba = all_proba[unknown_indexes]
        

        filter_clf = clf_type(**kwargs)
        X_offline_unknown = X_offline[unknown_indexes]
        filter_clf.fit(np.hstack((X_offline_unknown, unknown_proba)), y_offline[unknown_indexes] == benign_label)

        # Filtering the values
        _, online_proba = predict_proba_from_models(models, X_online)

        online_proba = np.transpose(online_proba)
        online_unknown_indexes = np.where(final_labels == -1)[0]

        online_proba = pd.DataFrame(online_proba[online_unknown_indexes])

        X_online_unknown = X_online[final_labels == -1]
        filter_labels = filter_clf.predict(np.hstack((X_online_unknown, online_proba)))
        indices = np.where(final_labels == -1)[0]
    
        # Replace -1 with the benign label at those indices where corresponding filter_labels value is True
        final_labels[indices[filter_labels]] = benign_label
        
        

    # Replace -1 with unknown_label if it exists, for now only one unknown label is supported
    if len(unknown_labels) > 0:
        final_labels[final_labels == -1] = unknown_labels[0]
    
    # Printing the global evaluation metrics
    print(f'\nGlobal Metrics: Micro F1: {f1_score(y_online, final_labels, average="micro", labels=known_labels+unknown_labels)}, Macro F1: {f1_score(y_online, final_labels, average="macro", labels=known_labels+unknown_labels)}, Weighted F1: {f1_score(y_online, final_labels, average="weighted", labels=known_labels+unknown_labels)}')
    compute_ids_metrics(y_online, final_labels, benign_label)
    
    # Printing the evaluation metrics for each label
    for i, label in enumerate(known_labels):
        print(f'Label: {label}, F1: {f1_score(y_online == label, final_labels == label)}, Precision: {precision_score(y_online == label, final_labels == label)}, Recall: {recall_score(y_online == label, final_labels == label)}, Accuracy: {accuracy_score(y_online == label, final_labels == label)}')
        print(f'Label: {label}, Negative F1: {f1_score(y_online == label, final_labels == label, pos_label=0)}, Negative Precision: {precision_score(y_online == label, final_labels == label, pos_label=0)}, Negative Recall: {recall_score(y_online == label, final_labels == label, pos_label=0)}')
        print(f'Confidence Threshold: {models[i].min_confidence}')
        print('\n')
    
    for unknown_label in unknown_labels:
        print(f'Unknown Label: {unknown_label}, F1: {f1_score(y_online == unknown_label, final_labels == unknown_label)}, Precision: {precision_score(y_online == unknown_label, final_labels == unknown_label)}, Recall: {recall_score(y_online == unknown_label, final_labels == unknown_label)}, Accuracy: {accuracy_score(y_online == unknown_label, final_labels == unknown_label)}')

    
    print(f'Time taken: {time.time()-starting_time}')
    
    return models


def test_normal_classifier(clf_type, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label, **kwargs):
    starting_time = time.time()

    clf = clf_type(**kwargs)
    clf.fit(X_offline, y_offline)

    y_pred = clf.predict(X_online)

    # Printing the global evaluation metrics
    print(f'\nGlobal Metrics: Micro F1: {f1_score(y_online, y_pred, average="micro", labels=known_labels+unknown_labels)}, Macro F1: {f1_score(y_online, y_pred, average="macro", labels=known_labels+unknown_labels)}, Weighted F1: {f1_score(y_online, y_pred, average="weighted", labels=known_labels+unknown_labels)}')
    compute_ids_metrics(y_online, y_pred, benign_label)

    # Printing the evaluation metrics for each label

    for label in known_labels:
        print(f'Label: {label}, F1: {f1_score(y_online == label, y_pred == label)}, Precision: {precision_score(y_online == label, y_pred == label)}, Recall: {recall_score(y_online == label, y_pred == label)}, Accuracy: {accuracy_score(y_online == label, y_pred == label)}')
        print(f'Label: {label}, Negative F1: {f1_score(y_online == label, y_pred == label, pos_label=0)}, Negative Precision: {precision_score(y_online == label, y_pred == label, pos_label=0)}, Negative Recall: {recall_score(y_online == label, y_pred == label, pos_label=0)}')
        print('\n')

    for unknown_label in unknown_labels:
        print(f'Unknown Label: {unknown_label}, F1: {f1_score(y_online == unknown_label, y_pred == unknown_label)}, Precision: {precision_score(y_online == unknown_label, y_pred == unknown_label)}, Recall: {recall_score(y_online == unknown_label, y_pred == unknown_label)}, Accuracy: {accuracy_score(y_online == unknown_label, y_pred == unknown_label)}')
    
    print(f'Time taken: {time.time()-starting_time}')\
    
    return clf
    
def compute_ids_metrics(y_true, y_pred, benign_label):
    print(f"Detection Rate (TPR/Recall): {recall_score(y_true != benign_label, y_pred != benign_label, average='binary')}")
    tn, fp, fn, tp = confusion_matrix(y_true != benign_label, y_pred != benign_label).ravel()
    print(f'False Alarm Rate (Micro FPR): {fp / (fp + tn)}\n')
    
@dataclass
class Model:
    label: int
    clf: object
    min_confidence: float

In [3]:
avail_params = {
    'use_min_conf': [0,3],
    'resample_smote': [0],
    'filter_unknown': [0],
    'meta_classifier': [0,2]
}

n_jobs = 22
grid = ParameterGrid(avail_params)

# NSLKDD

In [4]:
df = pd.read_csv('./data/NSLKDD.csv')

In [5]:
df['attack'].value_counts()

attack
0    77207
1    53387
2    14077
4     3738
3      108
Name: count, dtype: int64

In [6]:
df.shape

(148517, 124)

In [7]:
target = 'attack'
benign_label = 0

## No Unknown Class

In [8]:
known_labels = [0,1,2,3,4]
unknown_labels = []

X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [9]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.9987880420145435, Macro F1: 0.9353048682231746, Weighted F1: 0.998808171805184
Detection Rate (TPR/Recall): 0.99922871967466
False Alarm Rate (Micro FPR): 0.0007123429607563787

Label: 0, F1: 0.9992876570392436, Precision: 0.9992876570392436, Recall: 0.9992876570392436, Accuracy: 0.9992593590088877
Label: 0, Negative F1: 0.99922871967466, Negative Precision: 0.99922871967466, Negative Recall: 0.99922871967466


Label: 1, F1: 0.9999063495036523, Precision: 0.9999063495036523, Recall: 0.9999063495036523, Accuracy: 0.999932669000808
Label: 1, Negative F1: 0.9999474403447913, Negative Precision: 0.9999474403447913, Negative Recall: 0.9999474403447913


Label: 2, F1: 0.9984005686866891, Precision: 0.9989331436699858, Recall: 0.9978685612788633, Accuracy: 0.9996970105036359
Label: 2, Negative F1: 0.9998326546549897, Negative Precision: 0.999776885319054, Negative Recall: 0.9998884302130983


Label: 3, F1: 0.6956521739130435, Precision: 0.64, Recall: 0.76190476190

### Proposed Approach

In [10]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 4

Global Metrics: Micro F1: 0.998501052631579, Macro F1: 0.9372213547261492, Weighted F1: 0.9985302705847962
Detection Rate (TPR/Recall): 0.9997896508203618
False Alarm Rate (Micro FPR): 0.0020722704312912837

Label: 0, F1: 0.9988656619672662, Precision: 0.9998053591124375, Recall: 0.9979277295687087, Accuracy: 0.9988217075141395
Label: 0, Negative F1: 0.9987742093650405, Negative Precision: 0.9977608284934574, Negative Recall: 0.9997896508203618
Confidence Threshold: 0.5


Label: 1, F1: 0.9999063407324155, Precision: 1.0, Recall: 0.9998126990073047, Accuracy: 0.999932669000808
Label: 1, Negative F1: 0.9999474431071635, Negative Precision: 0.9998948917384907, Negative Recall: 1.0
Confidence Thresho

## Unknown Class 2

In [11]:
unknown_labels = [2]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [12]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.5256531106921627, Macro F1: 0.5277394982004907, Weighted F1: 0.36419308167200926
Detection Rate (TPR/Recall): 0.535214664737096
False Alarm Rate (Micro FPR): 0.0005571651437486071

Label: 0, F1: 0.650446007687287, Precision: 0.48210062352182326, Recall: 0.9994428348562514, Accuracy: 0.675464583894425
Label: 0, Negative F1: 0.6971410618912975, Negative Precision: 0.9995495495495496, Negative Recall: 0.535214664737096


Label: 1, F1: 0.750953216728197, Precision: 0.6013375981389939, Recall: 0.999677731227844, Accuracy: 0.8614664691624023
Label: 1, Negative F1: 0.9040457036259765, Negative Precision: 0.9998968380873782, Negative Recall: 0.8249638267086561


Label: 3, F1: 0.5142857142857142, Precision: 0.4090909090909091, Recall: 0.6923076923076923, Accuracy: 0.9994276865068678
Label: 3, Negative F1: 0.9997136745658801, Negative Precision: 0.9998652381914965, Negative Recall: 0.999562156882557


Label: 4, F1: 0.7230125523012553, Precision: 0.5676741130091985, R

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Proposed Approach

In [13]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 3
Finished training label 4

Global Metrics: Micro F1: 0.56258416374899, Macro F1: 0.5908175238823052, Weighted F1: 0.4413062375202837
Detection Rate (TPR/Recall): 0.5884708152436083
False Alarm Rate (Micro FPR): 0.0012257633162469357

Label: 0, F1: 0.6772706664651654, Precision: 0.5123470904310049, Recall: 0.9987742366837531, Accuracy: 0.7124293024508483
Label: 0, Negative F1: 0.7406800242865816, Negative Precision: 0.9990990990990991, Negative Recall: 0.5884708152436083
Confidence Threshold: 0.5


Label: 1, F1: 0.7477548068229763, Precision: 0.597303803562831, Recall: 0.9995165968417661, Accuracy: 0.8591098841906813
Label: 1, Negative F1: 0.902258448746993, Negative Precision: 0.999844712459237, Negative Recall: 0.8220274065877947
Confidence Thresh

## Unknown Class 3

In [14]:
unknown_labels = [3]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [15]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.9955898195529221, Macro F1: 0.7853774026246274, Weighted F1: 0.9938927995333247
Detection Rate (TPR/Recall): 0.9984622911861326
False Alarm Rate (Micro FPR): 0.0007793726050529324

Label: 0, F1: 0.9988962472406181, Precision: 0.9985720776270527, Recall: 0.9992206273949471, Accuracy: 0.9988553730137355
Label: 0, Negative F1: 0.998811355055237, Negative Precision: 0.9991606630761698, Negative Recall: 0.9984622911861326


Label: 1, F1: 0.9999060680067631, Precision: 1.0, Recall: 0.9998121536583076, Accuracy: 0.999932669000808
Label: 1, Negative F1: 0.9999475285969147, Negative Precision: 0.9998950627000367, Negative Recall: 1.0


Label: 2, F1: 0.999109210760734, Precision: 0.9992872416250891, Recall: 0.9989312433202707, Accuracy: 0.9998316725020199
Label: 2, Negative F1: 0.9999070545589739, Negative Precision: 0.9998884675440554, Negative Recall: 0.9999256422649366


Label: 4, F1: 0.928975487115022, Precision: 0.8735224586288416, Recall: 0.9919463087248322, Ac

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Proposed Approach

In [16]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 4

Global Metrics: Micro F1: 0.994983840560194, Macro F1: 0.8050230160065557, Weighted F1: 0.9940218034431653
Detection Rate (TPR/Recall): 0.9988816663171874
False Alarm Rate (Micro FPR): 0.0017535883613690978

Label: 0, F1: 0.998603125101517, Precision: 0.9989600935915768, Recall: 0.9982464116386309, Accuracy: 0.9985523835173714
Label: 0, Negative F1: 0.9984978165938865, Negative Precision: 0.9981142617684035, Negative Recall: 0.9988816663171874
Confidence Threshold: 0.5


Label: 1, F1: 0.9997651369251727, Precision: 1.0, Recall: 0.9995303841457688, Accuracy: 0.9998316725020199
Label: 1, Negative F1: 0.9998688318161547, Negative Precision: 0.9997376980379813, Negative Recall: 1.0
Confidence Threshold: 0.5


Label: 2, F1: 0.

## Unknown Class 4

In [17]:
unknown_labels = [4]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [18]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.8738890385133316, Macro F1: 0.5843633713463525, Weighted F1: 0.8268170632350896
Detection Rate (TPR/Recall): 0.8198272056505014
False Alarm Rate (Micro FPR): 0.00021665342673503286

Label: 0, F1: 0.9063768495482519, Precision: 0.8289323992575295, Recall: 0.999783346573265, Accuracy: 0.9037166711554
Label: 0, Negative F1: 0.9009009009009009, Negative Precision: 0.999769284011382, Negative Recall: 0.8198272056505014


Label: 1, F1: 0.9997911227154047, Precision: 0.9997911227154047, Recall: 0.9997911227154047, Accuracy: 0.9998653380016159
Label: 1, Negative F1: 0.9999006408664116, Negative Precision: 0.9999006408664116, Negative Recall: 0.9999006408664116


Label: 2, F1: 0.9644359464627151, Precision: 0.9323475046210721, Recall: 0.9988118811881188, Accuracy: 0.9937382170751414
Label: 2, Negative F1: 0.9965668721621322, Negative Precision: 0.9998888847735101, Negative Recall: 0.9932668604437249


Label: 3, F1: 0.05121293800539083, Precision: 0.02627939142461964

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Proposed Approach

In [19]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3

Global Metrics: Micro F1: 0.9002491246970105, Macro F1: 0.6606129231806944, Weighted F1: 0.8812656640301098
Detection Rate (TPR/Recall): 0.862142902188308
False Alarm Rate (Micro FPR): 0.0006499602802050986

Label: 0, F1: 0.9265173579726156, Precision: 0.8635796305541688, Recall: 0.9993500397197949, Accuracy: 0.9261042283867492
Label: 0, Negative F1: 0.9256864271930122, Negative Precision: 0.9993421052631579, Negative Recall: 0.862142902188308
Confidence Threshold: 0.5


Label: 1, F1: 0.9995822454308094, Precision: 0.9995822454308094, Recall: 0.9995822454308094, Accuracy: 0.9997306760032318
Label: 1, Negative F1: 0.9998012817328233, Negative Precision: 0.9998012817328233, Negative Recall: 0.9998012817328233
Confidence Thr

# NDSec1

In [8]:
df = pd.read_pickle('./data/nds1_clean_complete.pkl')

In [9]:
df['Label'] = df['Label'].str.strip()

In [10]:
df['Label'].value_counts()

Label
DOS UDP-Flood                       915613
DOS SYN-Flood                       436838
DOS HTTP-Flood                       16735
NORMAL                                9958
BRUTEFORCE HTTP-Bruteforce            2126
PROBE Port-Scan                       2000
BRUTEFORCE SSH-Bruteforce              970
PROBE Vulnerability-Scan                98
BOTNET C&C Communication                65
BRUTEFORCE FTP-Bruteforce               60
MISC SSL-Proxy (MitM)                   46
WEBATTACK SQL-Injection                 26
MALWARE Communication                   25
BOTNET C&C Command Execution            15
BOTNET C&C Malicious Download           12
SPOOFING ARP-Spoofing                   11
EXPLOIT Malware Infection                8
PROBE IP-Scan/Host-Scan                  4
MALWARE Malicious Download               3
MISC Malicious FTP-Upload (SFTP)         3
WEBATTACK XSS Stored                     2
DOS DNS-Amplification                    2
MALWARE Command Execution                2
WEBAT

In [11]:
# Categorize the labels
label_to_category = {
    "DOS UDP-Flood": "DOS",
    "DOS SYN-Flood": "DOS",
    "DOS HTTP-Flood": "DOS",
    "NORMAL": "Normal",
    "BRUTEFORCE HTTP-Bruteforce": "Bruteforce",
    "PROBE Port-Scan": "Probe",
    "BRUTEFORCE SSH-Bruteforce": "Bruteforce",
    "PROBE Vulnerability-Scan": "Probe",
    "BOTNET C&C Communication": "Botnet",
    "BRUTEFORCE FTP-Bruteforce": "Bruteforce",
    "MISC SSL-Proxy (MitM)": "Misc",
    "WEBATTACK SQL-Injection": "Webattack",
    "MALWARE Communication": "Malware",
    "BOTNET C&C Command Execution": "Botnet",
    "BOTNET C&C Malicious Download": "Botnet",
    "SPOOFING ARP-Spoofing": "Spoofing",
    "EXPLOIT Malware Infection": "Exploit",
    "PROBE IP-Scan/Host-Scan": "Probe",
    "MALWARE Malicious Download": "Malware",
    "MISC Malicious FTP-Upload (SFTP)": "Misc",
    "WEBATTACK XSS Stored": "Webattack",
    "DOS DNS-Amplification": "DOS",
    "MALWARE Command Execution": "Malware",
    "WEBATTACK XSS Infection": "Webattack",
    "SPOOFING DNS-Spoofing": "Spoofing",
    "WEBATTACK XSS Reflected": "Webattack",
    "DOS DHCP-Starvation": "DOS"
}

# Adding the new 'Category' column to the DataFrame
df['Label'] = df['Label'].map(label_to_category)

In [12]:
df['Label'].value_counts()

Label
DOS           1369189
Normal           9958
Bruteforce       3156
Probe            2102
Botnet             92
Misc               49
Malware            30
Webattack          30
Spoofing           12
Exploit             8
Name: count, dtype: int64

In [13]:
df['iflags'] = df['iflags'].str.strip()
df['riflags'] = df['riflags'].str.strip()
df['uflags'] = df['uflags'].str.strip()
df['ruflags'] = df['ruflags'].str.strip()

In [14]:
df = pd.get_dummies(df, columns = ['iflags', 'uflags', 'riflags', 'ruflags'])

In [15]:
df.columns

Index(['duration', 'protocol', 'srcip', 'srcport', 'dstip', 'dstport', 'pkt',
       'rpkt', 'oct', 'roct', 'entropy', 'rentropy', 'Label', 'dstport_cat',
       'srcport_cat', 'iflags_0', 'iflags_A', 'iflags_AF', 'iflags_AP',
       'iflags_APF', 'iflags_AR', 'iflags_AS', 'iflags_R', 'iflags_S',
       'uflags_0', 'uflags_A', 'uflags_AF', 'uflags_AP', 'uflags_APF',
       'uflags_APR', 'uflags_APRF', 'uflags_APRS', 'uflags_APRSF',
       'uflags_APS', 'uflags_APSF', 'uflags_AR', 'uflags_ARF', 'uflags_ARS',
       'uflags_ARSF', 'uflags_ASF', 'uflags_R', 'uflags_S', 'riflags_0',
       'riflags_A', 'riflags_AF', 'riflags_AP', 'riflags_AR', 'riflags_AS',
       'riflags_R', 'ruflags_0', 'ruflags_A', 'ruflags_AF', 'ruflags_AP',
       'ruflags_APF', 'ruflags_APR', 'ruflags_APRF', 'ruflags_APRS',
       'ruflags_APRSF', 'ruflags_APS', 'ruflags_APSF', 'ruflags_AR',
       'ruflags_ARF', 'ruflags_ARS', 'ruflags_ARSF', 'ruflags_AS',
       'ruflags_ASF', 'ruflags_R'],
      dtype='object')

In [16]:
for col, dtype in zip(df.columns, df.dtypes):
    if dtype == object:
        print(col)

srcip
dstip
Label


In [17]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

df['Label'] = le.fit_transform(df['Label'])

df.drop(['srcip', 'dstip'], axis=1, inplace=True)

In [18]:
df['Label'].value_counts()

Label
2    1369189
6       9958
1       3156
7       2102
0         92
5         49
4         30
9         30
8         12
3          8
Name: count, dtype: int64

In [19]:
target = 'Label'
benign_label = 6

## No Unknown Class

In [26]:
unknown_labels = []
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [33]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.999826668496277, Macro F1: 0.773626134183051, Weighted F1: 0.9998741359193165
Detection Rate (TPR/Recall): 0.9999563531611223
False Alarm Rate (Micro FPR): 0.017570281124497992

Label: 0, F1: 0.9444444444444444, Precision: 0.9444444444444444, Recall: 0.9444444444444444, Accuracy: 0.9999927778540115
Label: 0, Negative F1: 0.9999963886922733, Negative Precision: 0.9999963886922733, Negative Recall: 0.9999963886922733


Label: 1, F1: 1.0, Precision: 1.0, Recall: 1.0, Accuracy: 1.0
Label: 1, Negative F1: 1.0, Negative Precision: 1.0, Negative Recall: 1.0


Label: 2, F1: 0.9999963482192091, Precision: 0.9999963482192091, Recall: 0.9999963482192091, Accuracy: 0.9999927778540115
Label: 2, Negative F1: 0.9996760609005507, Negative Precision: 0.9996760609005507, Negative Recall: 0.9996760609005507


Label: 3, F1: 0.5, Precision: 0.5, Recall: 0.5, Accuracy: 0.9999927778540115
Label: 3, Negative F1: 0.9999963889009259, Negative Precision: 0.9999963889009259, Negative 

### Proposed Approach

In [34]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 4
Finished training label 5
Finished training label 6
Finished training label 7
Finished training label 8
Finished training label 9

Global Metrics: Micro F1: 0.9997887404052934, Macro F1: 0.7856211853533358, Weighted F1: 0.9998351862963447
Detection Rate (TPR/Recall): 0.9999745393439881
False Alarm Rate (Micro FPR): 0.02961847389558233

Label: 0, F1: 0.9444444444444444, Precision: 0.9444444444444444, Recall: 0.9444444444444444, Accuracy: 0.9999927778540115
Label: 0, Negative F1: 0.9999963886922733, Negative Precision: 0.9999963886922733, Negative Recall: 0.9999963886922733
Confidence Threshold: 0.5


Label: 1, F1: 1.0, Precision: 1.0, Recall: 1.0, Accuracy: 1.0
Label: 1, Negative F1: 1.0, Negative 

## Unknown Class 1

In [35]:
unknown_labels = [1]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [36]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.988466232856431, Macro F1: 0.6410593650475082, Weighted F1: 0.9859515524650544
Detection Rate (TPR/Recall): 0.9962138983753587
False Alarm Rate (Micro FPR): 0.013684744044602128

Label: 0, F1: 1.0, Precision: 1.0, Recall: 1.0, Accuracy: 1.0
Label: 0, Negative F1: 1.0, Negative Precision: 1.0, Negative Recall: 1.0


Label: 2, F1: 0.9999926290650706, Precision: 0.9999926290650706, Recall: 0.9999926290650706, Accuracy: 0.999985555708023
Label: 2, Negative F1: 0.9996422182468694, Negative Precision: 0.9996422182468694, Negative Recall: 0.9996422182468694


Label: 3, F1: 1.0, Precision: 1.0, Recall: 1.0, Accuracy: 1.0
Label: 3, Negative F1: 1.0, Negative Precision: 1.0, Negative Recall: 1.0


Label: 4, F1: 0.7272727272727273, Precision: 0.8, Recall: 0.6666666666666666, Accuracy: 0.9999891667810173
Label: 4, Negative F1: 0.9999945832829278, Negative Precision: 0.9999927777236107, Negative Recall: 0.999996388848765


Label: 5, F1: 0.7368421052631579, Precision: 0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Proposed Approach

In [37]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 2
Finished training label 3
Finished training label 4
Finished training label 5
Finished training label 6
Finished training label 7
Finished training label 8
Finished training label 9

Global Metrics: Micro F1: 0.9960964300932379, Macro F1: 0.7827722761787095, Weighted F1: 0.9961619824641554
Detection Rate (TPR/Recall): 0.9962357202867399
False Alarm Rate (Micro FPR): 0.02027369488089204

Label: 0, F1: 0.9714285714285714, Precision: 1.0, Recall: 0.9444444444444444, Accuracy: 0.9999963889270058
Label: 0, Negative F1: 0.999998194349397, Negative Precision: 0.9999963887053147, Negative Recall: 1.0
Confidence Threshold: 0.5


Label: 2, F1: 0.9999889435568577, Precision: 0.9999926290107395, Recall: 0.9999852581301413, Accuracy: 0.9999783335620346
Label: 2, Negative F1: 0.999463423

## Unknown Class 7

In [52]:
unknown_labels = [7]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [39]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.9922542484273777, Macro F1: 0.6083037059603142, Weighted F1: 0.9911270964625793
Detection Rate (TPR/Recall): 0.997490416299928
False Alarm Rate (Micro FPR): 0.01818181818181818

Label: 0, F1: 1.0, Precision: 1.0, Recall: 1.0, Accuracy: 1.0
Label: 0, Negative F1: 1.0, Negative Precision: 1.0, Negative Recall: 1.0


Label: 1, F1: 0.993660855784469, Precision: 0.9874015748031496, Recall: 1.0, Accuracy: 0.9999711114160461
Label: 1, Negative F1: 0.9999855227202809, Negative Precision: 1.0, Negative Recall: 0.999971045859739


Label: 2, F1: 0.9998971348378588, Precision: 0.9997942908360241, Recall: 1.0, Accuracy: 0.9997977799123231
Label: 2, Negative F1: 0.9940740740740741, Negative Precision: 1.0, Negative Recall: 0.9882179675994109


Label: 3, F1: 0.5, Precision: 0.5, Recall: 0.5, Accuracy: 0.9999927778540115
Label: 3, Negative F1: 0.9999963889009259, Negative Precision: 0.9999963889009259, Negative Recall: 0.9999963889009259


Label: 4, F1: 0.00732600732600732

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Proposed Approach

In [56]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 4
Finished training label 5
Finished training label 6
Finished training label 8
Finished training label 9

Global Metrics: Micro F1: 0.9955222694871554, Macro F1: 0.6306382385953373, Weighted F1: 0.9965485292575981
Detection Rate (TPR/Recall): 0.999563550660857
False Alarm Rate (Micro FPR): 0.02878787878787879

Label: 0, F1: 0.9444444444444444, Precision: 0.9444444444444444, Recall: 0.9444444444444444, Accuracy: 0.9999927778540115
Label: 0, Negative F1: 0.9999963886922733, Negative Precision: 0.9999963886922733, Negative Recall: 0.9999963886922733
Confidence Threshold: 0.5


Label: 1, F1: 0.991304347826087, Precision: 0.9827586206896551, Recall: 1.0, Accuracy: 0.9999602781970635
Label: 1, Negative F

# CICI IDS 2017 - Improved

In [27]:
df = pd.read_pickle('./data/dataset_cicids2017_87_cols_no_time_no_id.pkl')

In [28]:
df['Label'].value_counts()

Label
BENIGN                        1597081
Portscan                       159040
DoS Hulk                       158449
DDoS                            95144
Infiltration - Portscan         67023
DoS GoldenEye                    7567
DoS Slowloris                    3998
FTP-Patator                      3972
SSH-Patator                      2961
DoS Slowhttptest                 1741
Botnet                            736
Web Attack - Brute Force           73
Infiltration                       36
Web Attack - XSS                   18
Web Attack - SQL Injection         13
Heartbleed                         11
Name: count, dtype: int64

In [29]:
# Categorize the labels
label_to_category = {
    "BENIGN": "BENIGN",
    "Portscan": "Portscan",
    "DoS Hulk": "DoS",
    "DDoS": "DDoS",
    "Infiltration - Portscan": "Infiltration",
    "DoS GoldenEye": "DoS",
    "DoS Slowloris": "DoS",
    "FTP-Patator": "FTP-Patator",
    "SSH-Patator": "SSH-Patator",
    "DoS Slowhttptest": "DoS",
    "Botnet": "Botnet",
    "Web Attack - Brute Force": "Web Attack",
    "Infiltration": "Infiltration",
    "Web Attack - XSS": "Web Attack",
    "Web Attack - SQL Injection": "Web Attack",
    "Heartbleed": "Heartbleed"
}

# Adding the new 'Category' column to the DataFrame
df['Label'] = df['Label'].map(label_to_category)

In [30]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

df['Label'] = le.fit_transform(df['Label'])

df.drop(['Src IP', 'Dst IP'], axis=1, inplace=True)

In [31]:
df['Label'].value_counts()

Label
0    1597081
3     171755
7     159040
2      95144
6      67059
4       3972
8       2961
1        736
9        104
5         11
Name: count, dtype: int64

In [32]:
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)

In [33]:
target = 'Label'
benign_label = 0

## No Unknown Class

In [48]:
unknown_labels = []
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [49]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.9591559970636744, Macro F1: 0.6127086638991359, Weighted F1: 0.9683080434338532
Detection Rate (TPR/Recall): 0.995267382882703
False Alarm Rate (Micro FPR): 0.043748591178901494

Label: 0, F1: 0.9768954731246762, Precision: 0.9984505550543287, Recall: 0.9562514088210985, Accuracy: 0.9655649090025074
Label: 0, Negative F1: 0.9324266177763643, Negative Precision: 0.877050045752094, Negative Recall: 0.995267382882703


Label: 1, F1: 0.29822732012513037, Precision: 0.17610837438423646, Recall: 0.9727891156462585, Accuracy: 0.9983959844794219
Label: 1, Negative F1: 0.9991970746314954, Negative Precision: 0.9999904479893017, Negative Recall: 0.9984049591702927


Label: 2, F1: 0.9213778867242937, Precision: 0.9050998681942614, Recall: 0.9382521414682853, Accuracy: 0.9927378376059413
Label: 2, Negative F1: 0.9961931011045879, Negative Precision: 0.9970613686269214, Negative Recall: 0.9953263444873585


Label: 3, F1: 0.9352100089365505, Precision: 0.8986448410036227

### Proposed Approach

In [50]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 4
Finished training label 5
Finished training label 6
Finished training label 7
Finished training label 8
Finished training label 9

Global Metrics: Micro F1: 0.9977292314074117, Macro F1: 0.9935099508889607, Weighted F1: 0.9977358079534938
Detection Rate (TPR/Recall): 0.9999500778785095
False Alarm Rate (Micro FPR): 0.0016342324742655346

Label: 0, F1: 0.9991743888205666, Precision: 0.9999843210546285, Recall: 0.9983657675257345, Accuracy: 0.9987439581287598
Label: 0, Negative F1: 0.997375876989877, Negative Precision: 0.9948148957515918, Negative Recall: 0.9999500778785095
Confidence Threshold: 0.5


Label: 1, F1: 0.9966101694915255, Precision: 0.9932432432432432, Recall: 1.0, Accuracy: 0.99999761

## Unknown Class 6

In [51]:
unknown_labels = [6]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [52]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.8154714804610412, Macro F1: 0.4847972342267187, Weighted F1: 0.7668703462870066
Detection Rate (TPR/Recall): 0.8499290461270426
False Alarm Rate (Micro FPR): 0.032947847604481544

Label: 0, F1: 0.946184795651867, Precision: 0.9261989815307441, Recall: 0.9670521523955184, Accuracy: 0.9273164081492569
Label: 0, Negative F1: 0.888073286746139, Negative Precision: 0.9298021795600916, Negative Recall: 0.8499290461270426


Label: 1, F1: 0.21422594142259413, Precision: 0.11996251171508904, Recall: 1.0, Accuracy: 0.9977620050909022
Label: 1, Negative F1: 0.9988794067419378, Negative Precision: 1.0, Negative Recall: 0.9977613221312023


Label: 2, F1: 0.492003367003367, Precision: 0.32723491892108053, Recall: 0.9909778988798063, Accuracy: 0.9194512503217565
Label: 2, Negative F1: 0.9562577011089597, Negative Precision: 0.9995968167464464, Negative Recall: 0.916520492138829


Label: 3, F1: 0.9784049244845101, Precision: 0.9812103629739576, Recall: 0.9756154826591534, 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Proposed Approach

In [53]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 4
Finished training label 5
Finished training label 7
Finished training label 8
Finished training label 9

Global Metrics: Micro F1: 0.8404707654466933, Macro F1: 0.7814249490209024, Weighted F1: 0.7830486874912842
Detection Rate (TPR/Recall): 0.8111924465738412
False Alarm Rate (Micro FPR): 0.0003715380231291437

Label: 0, F1: 0.9535825196782657, Precision: 0.9115918144467581, Recall: 0.9996284619768708, Accuracy: 0.9356987596884444
Label: 0, Negative F1: 0.8953973922045293, Negative Precision: 0.9991087883848304, Negative Recall: 0.8111924465738412
Confidence Threshold: 0.5


Label: 1, F1: 0.9961089494163424, Precision: 0.9922480620155039, Recall: 1.0, Accuracy: 0.9999976166188401
Label: 1, Negati

## Unknown Class 8

In [54]:
unknown_labels = [8]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [55]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.9846629422363742, Macro F1: 0.7462520772950112, Weighted F1: 0.9813948540539289
Detection Rate (TPR/Recall): 0.9680371118652047
False Alarm Rate (Micro FPR): 0.007726456975536034

Label: 0, F1: 0.9910176486773319, Precision: 0.9897649294160137, Recall: 0.9922735430244639, Accuracy: 0.9863837434337849
Label: 0, Negative F1: 0.9718736307287846, Negative Precision: 0.9757406803285981, Negative Recall: 0.9680371118652047


Label: 1, F1: 0.6389496717724289, Precision: 0.4694533762057878, Recall: 1.0, Accuracy: 0.999606742108625
Label: 1, Negative F1: 0.999803263911328, Negative Precision: 1.0, Negative Recall: 0.9996066052176069


Label: 2, F1: 0.9776223776223776, Precision: 0.9585555386256285, Recall: 0.9974631361978754, Accuracy: 0.9979407586778908
Label: 2, Negative F1: 0.9989207204191203, Negative Precision: 0.9998799648897303, Negative Recall: 0.9979633147053171


Label: 3, F1: 0.9821742478171808, Precision: 0.9748240858230477, Recall: 0.9896360921626607, A

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Proposed Approach

In [56]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 4
Finished training label 5
Finished training label 6
Finished training label 7
Finished training label 9

Global Metrics: Micro F1: 0.9904521750736465, Macro F1: 0.8903917546524068, Weighted F1: 0.9872803363588348
Detection Rate (TPR/Recall): 0.9708518859967439
False Alarm Rate (Micro FPR): 0.001350713138755077

Label: 0, F1: 0.9946672980401811, Precision: 0.990716938463797, Recall: 0.9986492868612449, Accuracy: 0.9918941206753549
Label: 0, Negative F1: 0.9831116143032361, Negative Precision: 0.9956849294400467, Negative Recall: 0.9708518859967439
Confidence Threshold: 0.5


Label: 1, F1: 0.9965870307167235, Precision: 0.9931972789115646, Recall: 1.0, Accuracy: 0.9999976166188401
Label: 1, Negative

## Unknown Class 9

In [57]:
unknown_labels = [9]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [58]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.9614678767887276, Macro F1: 0.604041470699076, Weighted F1: 0.9676283343974376
Detection Rate (TPR/Recall): 0.9953602075434045
False Alarm Rate (Micro FPR): 0.032897868183070715

Label: 0, F1: 0.9825486787038524, Precision: 0.9984966587027296, Recall: 0.9671021318169293, Accuracy: 0.973851925295301
Label: 0, Negative F1: 0.9478765304231735, Negative Precision: 0.9047169896881037, Negative Recall: 0.9953602075434045


Label: 1, F1: 0.18985695708712613, Precision: 0.10496046010064701, Recall: 0.9931972789115646, Accuracy: 0.9970303070748286
Label: 1, Negative F1: 0.9985124270838557, Negative Precision: 0.9999976086909735, Negative Recall: 0.997031650473863


Label: 2, F1: 0.8555723264772804, Precision: 0.7486595174262735, Recall: 0.998107752956636, Accuracy: 0.9847201433842105
Label: 2, Negative F1: 0.9919333631119024, Negative Precision: 0.9999086776524069, Negative Recall: 0.9840842647679299


Label: 3, F1: 0.9712615114749306, Precision: 0.9752245640814888,

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Proposed Approach

In [59]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 4
Finished training label 5
Finished training label 6
Finished training label 7
Finished training label 8

Global Metrics: Micro F1: 0.9977977558082999, Macro F1: 0.8973637150033481, Weighted F1: 0.9978688024860733
Detection Rate (TPR/Recall): 0.9982937537417681
False Alarm Rate (Micro FPR): 0.0010865753150129012

Label: 0, F1: 0.9991887591460359, Precision: 0.9994642454319874, Recall: 0.9989134246849871, Accuracy: 0.9987654085591984
Label: 0, Negative F1: 0.9974179527056666, Negative Precision: 0.996543686999482, Negative Recall: 0.9982937537417681
Confidence Threshold: 0.5


Label: 1, F1: 0.9966101694915255, Precision: 0.9932432432432432, Recall: 1.0, Accuracy: 0.9999976166188401
Label: 1, Negativ

# NF-UNSW-NB15-v2

In [35]:
df = pd.read_csv('./data/NF-UNSW-NB15-v2.csv', dtype={'Attack': 'category'})

In [36]:
df.drop(['Label', 'IPV4_SRC_ADDR', 'IPV4_DST_ADDR'], axis=1, inplace=True)

In [37]:
df['Attack'].value_counts()

Attack
Benign            2295222
Exploits            31551
Fuzzers             22310
Generic             16560
Reconnaissance      12779
DoS                  5794
Analysis             2299
Backdoor             2169
Shellcode            1427
Worms                 164
Name: count, dtype: int64

In [38]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

df['Attack'] = le.fit_transform(df['Attack'])

In [39]:
df['Attack'].value_counts()

Attack
2    2295222
4      31551
5      22310
6      16560
7      12779
3       5794
0       2299
1       2169
8       1427
9        164
Name: count, dtype: int64

In [40]:
target = 'Attack'
benign_label = 2

## No Unknown Class

In [21]:
unknown_labels = []
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [22]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.9874533265000889, Macro F1: 0.6683945521506386, Weighted F1: 0.9894876179630785
Detection Rate (TPR/Recall): 0.9943190784282784
False Alarm Rate (Micro FPR): 0.0038079138383248666

Label: 0, F1: 0.19146792072556265, Precision: 0.11323003575685339, Recall: 0.6195652173913043, Accuracy: 0.9949650144857809
Label: 0, Negative F1: 0.9974746441472492, Negative Precision: 0.9996319957605911, Negative Recall: 0.9953265842397847


Label: 1, F1: 0.16930488644184447, Precision: 0.12070657507360157, Recall: 0.2834101382488479, Accuracy: 0.9974751859095711
Label: 1, Negative F1: 0.9987356715553335, Negative Precision: 0.9993480575889451, Negative Recall: 0.9981240355847


Label: 2, F1: 0.9979747896225427, Precision: 0.9997638848807618, Recall: 0.9961920861616751, Accuracy: 0.9961176015312045
Label: 2, Negative F1: 0.9532045786899299, Negative Precision: 0.9153551886107211, Negative Recall: 0.9943190784282784


Label: 3, F1: 0.4016136261766024, Precision: 0.4179104477611

### Proposed Approach

In [23]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 4
Finished training label 5
Finished training label 6
Finished training label 7
Finished training label 8
Finished training label 9

Global Metrics: Micro F1: 0.9897450984512577, Macro F1: 0.6584294117639793, Weighted F1: 0.9887656547154944
Detection Rate (TPR/Recall): 0.9960023144495292
False Alarm Rate (Micro FPR): 0.003973475309556382

Label: 0, F1: 0.13043478260869565, Precision: 0.12450592885375494, Recall: 0.13695652173913042, Accuracy: 0.9982428800033469
Label: 0, Negative F1: 0.9991205514561156, Negative Precision: 0.999168671696517, Negative Recall: 0.999072435850459
Confidence Threshold: 0.5


Label: 1, F1: 0.16393442622950818, Precision: 0.391304347826087, Recall: 0.10368663594470046, Acc

## Unknown Class 6

In [24]:
unknown_labels = [6]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [25]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.9545617136103587, Macro F1: 0.4655295980030081, Weighted F1: 0.9461498536998572
Detection Rate (TPR/Recall): 0.830043997485858
False Alarm Rate (Micro FPR): 0.0036707116205586744

Label: 0, F1: 0.1520353114271702, Precision: 0.08537592949600661, Recall: 0.6935123042505593, Accuracy: 0.9927665226804447
Label: 0, Negative F1: 0.9963677691506168, Negative Precision: 0.999711228774261, Negative Recall: 0.9930465988844408


Label: 1, F1: 0.12722063037249284, Precision: 0.08390022675736962, Recall: 0.26303317535545023, Accuracy: 0.9968141741013063
Label: 1, Negative F1: 0.9984041745034656, Negative Precision: 0.9993476418616749, Negative Recall: 0.9974624868884687


Label: 2, F1: 0.9921382665357493, Precision: 0.9879823557516028, Recall: 0.9963292883794413, Accuracy: 0.9852611101233122
Label: 2, Negative F1: 0.8823116752964757, Negative Precision: 0.9416042780748664, Negative Recall: 0.830043997485858


Label: 3, F1: 0.25568672617333715, Precision: 0.189177673625

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Proposed Approach

In [26]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 4
Finished training label 5
Finished training label 7
Finished training label 8
Finished training label 9

Global Metrics: Micro F1: 0.9580403928418278, Macro F1: 0.472419874320215, Weighted F1: 0.9590750586298448
Detection Rate (TPR/Recall): 0.9945631678189818
False Alarm Rate (Micro FPR): 0.0038992907324615954

Label: 0, F1: 0.11931243680485339, Precision: 0.1088560885608856, Recall: 0.1319910514541387, Accuracy: 0.9981780339082323
Label: 0, Negative F1: 0.9990880736576832, Negative Precision: 0.9991874566765722, Negative Recall: 0.9989887104068609
Confidence Threshold: 0.5


Label: 1, F1: 0.144, Precision: 0.22167487684729065, Recall: 0.1066350710900474, Accuracy: 0.9988808819068935
Label: 1, Neg

## Unknown Class 3

In [27]:
unknown_labels = [3]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [28]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.978182426708224, Macro F1: 0.5819286031522931, Weighted F1: 0.9768008066574982
Detection Rate (TPR/Recall): 0.9883691206543967
False Alarm Rate (Micro FPR): 0.0036780961892547677

Label: 0, F1: 0.142301532830016, Precision: 0.07941777323799795, Recall: 0.6835164835164835, Accuracy: 0.9921578061101756
Label: 0, Negative F1: 0.9960608948461711, Negative Precision: 0.9996962915938153, Negative Recall: 0.9924518425460637


Label: 1, F1: 0.12160694896851248, Precision: 0.07926397735314933, Recall: 0.26107226107226106, Accuracy: 0.9966154521969229
Label: 1, Negative F1: 0.9983044595438598, Negative Precision: 0.9993349306187873, Negative Recall: 0.9972761114344697


Label: 2, F1: 0.9978573797020601, Precision: 0.9993975956785764, Recall: 0.9963219038107453, Accuracy: 0.9959314304839402
Label: 2, Negative F1: 0.959766667356184, Negative Precision: 0.9327731092436975, Negative Recall: 0.9883691206543967


Label: 4, F1: 0.7261582323592303, Precision: 0.6546716360364

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Proposed Approach

In [29]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 4
Finished training label 5
Finished training label 6
Finished training label 7
Finished training label 8
Finished training label 9

Global Metrics: Micro F1: 0.9821903337482089, Macro F1: 0.6375351318269535, Weighted F1: 0.9827805634729739
Detection Rate (TPR/Recall): 0.9919478527607362
False Alarm Rate (Micro FPR): 0.003854081652855474

Label: 0, F1: 0.09704321455648218, Precision: 0.07407407407407407, Recall: 0.14065934065934066, Accuracy: 0.9975086548618882
Label: 0, Negative F1: 0.9987526065913902, Negative Precision: 0.9991806215959648, Negative Recall: 0.998324958123953
Confidence Threshold: 0.5


Label: 1, F1: 0.1248630887185104, Precision: 0.11776859504132231, Recall: 0.13286713286713286, Accuracy: 0.998328644193659

## Unknown Class 8

In [30]:
unknown_labels = [8]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=True,
                                     scaling=True)

### Baseline

In [31]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.9847025969815189, Macro F1: 0.5614423778362223, Weighted F1: 0.9856571178584113
Detection Rate (TPR/Recall): 0.9944300775810623
False Alarm Rate (Micro FPR): 0.0037973826665531163

Label: 0, F1: 0.2152579930357708, Precision: 0.1259259259259259, Recall: 0.7407407407407407, Accuracy: 0.9948144042003535
Label: 0, Negative F1: 0.997398607063742, Negative Precision: 0.9997496607798382, Negative Recall: 0.995058585080277


Label: 1, F1: 0.17107309486780714, Precision: 0.12895662368112543, Recall: 0.2540415704387991, Accuracy: 0.9977701310518664
Label: 1, Negative F1: 0.9988835638819301, Negative Precision: 0.999323137790705, Negative Recall: 0.9984443765153197


Label: 2, F1: 0.9979754273095277, Precision: 0.9997545582047686, Recall: 0.9962026173334468, Accuracy: 0.9961280605788037
Label: 2, Negative F1: 0.9557632100948785, Negative Precision: 0.9199907982516679, Negative Recall: 0.9944300775810623


Label: 3, F1: 0.346646571213263, Precision: 0.3070761014686248

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Proposed Approach

In [32]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 4
Finished training label 5
Finished training label 6
Finished training label 7
Finished training label 9

Global Metrics: Micro F1: 0.9849473386953383, Macro F1: 0.5999470022720875, Weighted F1: 0.9867864306217933
Detection Rate (TPR/Recall): 0.9950268549830913
False Alarm Rate (Micro FPR): 0.00410746221724348

Label: 0, F1: 0.12040816326530612, Precision: 0.11324376199616124, Recall: 0.12854030501089325, Accuracy: 0.9981968601939107
Label: 0, Negative F1: 0.9990975050516684, Negative Precision: 0.9991623633081623, Negative Recall: 0.9990326552148678
Confidence Threshold: 0.5


Label: 1, F1: 0.1657848324514991, Precision: 0.35074626865671643, Recall: 0.10854503464203233, Accuracy: 0.999010574097122

# NF-CSE-CIC-IDS2018-v2

In [43]:
df = pd.read_csv('./data/NF-CSE-CIC-IDS2018-v2.csv', dtype={'Attack': 'category'})

In [44]:
df.drop(['Label', 'IPV4_SRC_ADDR', 'IPV4_DST_ADDR'], axis=1, inplace=True)

In [45]:
df['Attack'].value_counts()

Attack
Benign                      16635567
DDOS attack-HOIC             1080858
DoS attacks-Hulk              432648
DDoS attacks-LOIC-HTTP        307300
Bot                           143097
Infilteration                 116361
SSH-Bruteforce                 94979
DoS attacks-GoldenEye          27723
FTP-BruteForce                 25933
DoS attacks-SlowHTTPTest       14116
DoS attacks-Slowloris           9512
Brute Force -Web                2143
DDOS attack-LOIC-UDP            2112
Brute Force -XSS                 927
SQL Injection                    432
Name: count, dtype: int64

In [46]:
# Categorize the labels
label_to_category = {
    "Benign": "Benign",
    "DDOS attack-HOIC": "DDoS",
    "DoS attacks-Hulk": "DoS",
    "DDoS attacks-LOIC-HTTP ": "DDoS",
    "Bot": "Bot",
    "Infilteration": "Infilteration",
    "SSH-Bruteforce": "SSH-Bruteforce",
    "DoS attacks-GoldenEye": "DoS",
    "FTP-BruteForce": "FTP-BruteForce",
    "DoS attacks-SlowHTTPTest": "DoS",
    "DoS attacks-Slowloris": "DoS",
    "Brute Force -Web ": "Web Attack",
    "DDOS attack-LOIC-UDP": "DDoS",
    "Brute Force -XSS": "Web Attack",
    "SQL Injection": "Web Attack",
}

# Adding the new 'Category' column to the DataFrame
df['Attack'] = df['Attack'].map(label_to_category)

In [47]:
df['Attack'].value_counts()

Attack
Benign            16635567
DDoS               1082970
DoS                 483999
Bot                 143097
Infilteration       116361
SSH-Bruteforce       94979
FTP-BruteForce       25933
Web Attack            1359
Name: count, dtype: int64

In [48]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

df['Attack'] = le.fit_transform(df['Attack'])

In [49]:
df['Attack'].value_counts()

Attack
0    16635567
2     1082970
3      483999
8      309443
1      143097
5      116361
6       94979
4       25933
7        1359
Name: count, dtype: int64

In [50]:
target = 'Attack'
benign_label = 0

## No Unknown Class

In [12]:
unknown_labels = []
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=False,
                                     scaling=True)

  temp **= 2
  new_unnormalized_variance -= correction**2 / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  upper_bound = n_samples * eps * var + (n_samples * mean * eps) ** 2


### Baseline

In [13]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.9660474305999193, Macro F1: 0.6295978850615538, Weighted F1: 0.9687753588284137
Detection Rate (TPR/Recall): 0.9524852312079853
False Alarm Rate (Micro FPR): 0.0021060294297099527

Label: 0, F1: 0.9957313891204856, Precision: 0.9935781606588049, Recall: 0.9978939705702901, Accuracy: 0.9924668050901596
Label: 0, Negative F1: 0.9679726913920279, Negative Precision: 0.9839721302175782, Negative Recall: 0.9524852312079853


Label: 1, F1: 0.364831541218638, Precision: 0.22949829096057936, Recall: 0.8891645410391698, Accuracy: 0.9765514554843914
Label: 1, Negative F1: 0.9880552451986955, Negative Precision: 0.9991351907828568, Negative Recall: 0.9772183472382105


Label: 2, F1: 0.8394980269400052, Precision: 0.9409825539063083, Recall: 0.7577726068127464, Accuracy: 0.9833915625888192
Label: 2, Negative F1: 0.9912426862497239, Negative Precision: 0.9854438522228471, Negative Recall: 0.9971101706049271


Label: 3, F1: 0.9747175177763413, Precision: 0.97585296401760

### Proposed Approach

In [14]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 4
Finished training label 5
Finished training label 6
Finished training label 7
Finished training label 8

Global Metrics: Micro F1: 0.9952559849356019, Macro F1: 0.8307669222124815, Weighted F1: 0.9942364781199347
Detection Rate (TPR/Recall): 0.965836927736987
False Alarm Rate (Micro FPR): 0.0005452172663756036

Label: 0, F1: 0.9974140062985886, Precision: 0.995381546961756, Recall: 0.9994547827336244, Accuracy: 0.9954368411497795
Label: 0, Negative F1: 0.9806180245739574, Negative Precision: 0.9958585704077276, Negative Recall: 0.965836927736987
Confidence Threshold: 0.5


Label: 1, F1: 0.9999825293942941, Precision: 0.9999650593990217, Recall: 1.0, Accuracy: 0.9999997353616626
Label: 1, Negative 

## Unknown Class 5

In [12]:
unknown_labels = [5]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=False,
                                     scaling=True)

  temp **= 2
  new_unnormalized_variance -= correction**2 / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  upper_bound = n_samples * eps * var + (n_samples * mean * eps) ** 2


### Baseline

In [13]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.9580654090700027, Macro F1: 0.681504441853215, Weighted F1: 0.9429809362341894
Detection Rate (TPR/Recall): 0.7608832819383672
False Alarm Rate (Micro FPR): 0.002418756085030002

Label: 0, F1: 0.97948904170733, Precision: 0.9620413926914106, Recall: 0.99758124391497, Accuracy: 0.9641256269943806
Label: 0, Negative F1: 0.8570545820354432, Negative Precision: 0.9810542325008751, Negative Recall: 0.7608832819383672


Label: 1, F1: 0.9370975382941765, Precision: 0.9181996355259086, Recall: 0.9567896811178789, Accuracy: 0.9990512715607469
Label: 1, Negative F1: 0.9995220312910181, Negative Precision: 0.9996783707531804, Negative Recall: 0.999365740720992


Label: 2, F1: 0.9491895890779474, Precision: 0.9633459608887068, Recall: 0.9354432477216239, Accuracy: 0.9944018406125636
Label: 2, Negative F1: 0.9970377328475853, Negative Precision: 0.996184277353451, Negative Recall: 0.9978926519481197


Label: 3, F1: 0.9050461145300359, Precision: 0.9339576923510385, Reca

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unknown Label: 5, F1: 0.0, Precision: 0.0, Recall: 0.0, Accuracy: 0.9692064184323778
Time taken: 191.68124914169312


### Proposed Approach

In [14]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 4
Finished training label 6
Finished training label 7
Finished training label 8

Global Metrics: Micro F1: 0.9560742702200892, Macro F1: 0.6646795160854394, Weighted F1: 0.9452819690445292
Detection Rate (TPR/Recall): 0.7854948239933046
False Alarm Rate (Micro FPR): 0.009484248344116748

Label: 0, F1: 0.9778886001691749, Precision: 0.9655793394917794, Recall: 0.9905157516558832, Accuracy: 0.9615374640554978
Label: 0, Negative F1: 0.8523573654718998, Negative Precision: 0.9316618736189916, Negative Recall: 0.7854948239933046
Confidence Threshold: 0.5


Label: 1, F1: 0.9999462587105673, Precision: 0.9998925231970767, Recall: 1.0, Accuracy: 0.999999206084988
Label: 1, Negative F1: 0.999999600088567, Ne

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Label: 6, F1: 0.0, Precision: 0.0, Recall: 0.0, Accuracy: 0.9950975748013492
Label: 6, Negative F1: 0.9975427641933058, Negative Precision: 0.9950975748013492, Negative Recall: 1.0
Confidence Threshold: inf


Label: 7, F1: 0.0035087719298245615, Precision: 0.003278688524590164, Recall: 0.0037735849056603774, Accuracy: 0.9998496854244084
Label: 7, Negative F1: 0.9999248370432693, Negative Precision: 0.9999301298394019, Negative Recall: 0.9999195443031677
Confidence Threshold: 0.09909130181423327


Label: 8, F1: 0.9966173818369196, Precision: 0.999849914951806, Recall: 0.9934056830420015, Accuracy: 0.9998922921967152
Label: 8, Negative F1: 0.9999452748312764, Negative Precision: 0.9998929755381878, Negative Recall: 0.9999975795956688
Confidence Threshold: 0.784388228365029


Unknown Label: 5, F1: 0.006693351614734219, Precision: 0.023528192371475954, Recall: 0.00390165089677813, Accuracy: 0.9643402486859384
Time taken: 236.85258054733276
--------------------------------------------------

## Unknown Class 4

In [12]:
unknown_labels = [4]
known_labels = [l for l in np.unique(df[target]) if l not in unknown_labels]


X_offline, X_online, y_offline, y_online = prepare_data(df=df,
                                     offline_ratio=0.8,
                                     known_classes=known_labels,
                                     target=target,
                                     random_state=42,
                                     stratify=True,
                                     resample_smote=False,
                                     scaling=True)

  temp **= 2
  new_unnormalized_variance -= correction**2 / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count
  upper_bound = n_samples * eps * var + (n_samples * mean * eps) ** 2


### Baseline

In [13]:
# With resampling
clf = test_normal_classifier(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, random_state=42, n_jobs=n_jobs, verbose=0)


Global Metrics: Micro F1: 0.9462421620740448, Macro F1: 0.46711000098795213, Weighted F1: 0.9491448851527422
Detection Rate (TPR/Recall): 0.9469782090568607
False Alarm Rate (Micro FPR): 0.03388245121677745

Label: 0, F1: 0.9790172161209743, Precision: 0.9922660196230042, Recall: 0.9661175487832225, Accuracy: 0.9637374025535482
Label: 0, Negative F1: 0.8665800094446635, Negative Precision: 0.7987650664584511, Negative Recall: 0.9469782090568607


Label: 1, F1: 0.7863274362005155, Precision: 0.8766472240224671, Recall: 0.7128803316702972, Accuracy: 0.9970818330544927
Label: 1, Negative F1: 0.9985308845322138, Negative Precision: 0.9978240476813673, Negative Recall: 0.9992387235086447


Label: 2, F1: 0.9832364056652234, Precision: 0.9715219273002479, Recall: 0.9952368351415718, Accuracy: 0.9980654937542706
Label: 2, Negative F1: 0.9989735193262018, Negative Precision: 0.9997116423150569, Negative Recall: 0.9982364854985731


Label: 3, F1: 0.7672033219885116, Precision: 0.698318547115622

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unknown Label: 4, F1: 0.0, Precision: 0.0, Recall: 0.0, Accuracy: 0.9931371339985635
Time taken: 179.24500107765198


### Proposed Approach

In [14]:
count = 1
for params in list(grid):
    print('---------------------------------------------------------------------------')
    print(f'{count}/{len(grid)}')
    print(params)
    models = one_class_test(lgb.LGBMClassifier, known_labels, unknown_labels, X_online, y_online, X_offline, y_offline, benign_label=benign_label, **params, random_state=42, n_jobs=n_jobs, verbose=0)
    count+=1

---------------------------------------------------------------------------
1/4
{'filter_unknown': 0, 'meta_classifier': 0, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 5
Finished training label 6
Finished training label 7
Finished training label 8

Global Metrics: Micro F1: 0.9858159143969077, Macro F1: 0.7018454860399498, Weighted F1: 0.9832104033776541
Detection Rate (TPR/Recall): 0.9673689138576779
False Alarm Rate (Micro FPR): 0.001676125219186768

Label: 0, F1: 0.9968494584327685, Precision: 0.9953793907690268, Recall: 0.9983238747808132, Accuracy: 0.9944743515169864
Label: 0, Negative F1: 0.9775496423855871, Negative Precision: 0.9879469369659142, Negative Recall: 0.9673689138576779
Confidence Threshold: 0.5


Label: 1, F1: 0.9999473009292603, Precision: 0.9998946074126119, Recall: 1.0, Accuracy: 0.999999206084988
Label: 1, Negative F1: 0.9999996000297044, N

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Label: 7, F1: 0.0, Precision: 0.0, Recall: 0.0, Accuracy: 0.9999285476489266
Label: 7, Negative F1: 0.9999642725480581, Negative Precision: 0.9999285476489266, Negative Recall: 1.0
Confidence Threshold: inf


Label: 8, F1: 0.996781736558658, Precision: 0.9997058294791548, Recall: 0.9938746994215897, Accuracy: 0.9998954678567629
Label: 8, Negative F1: 0.9999468710905697, Negative Precision: 0.9998985892063946, Negative Recall: 0.9999951576377235
Confidence Threshold: 0.8122935636151236


Unknown Label: 4, F1: 0.0, Precision: 0.0, Recall: 0.0, Accuracy: 0.5313707577812934
Time taken: 232.91973686218262
---------------------------------------------------------------------------
3/4
{'filter_unknown': 0, 'meta_classifier': 2, 'resample_smote': 0, 'use_min_conf': 0}
Finished training label 0
Finished training label 1
Finished training label 2
Finished training label 3
Finished training label 5
Finished training label 6
Finished training label 7
Finished training label 8

Global Metrics: Mic