In [None]:
import pandas as pd
import numpy as np
import socket
import struct
import pennylane as qml
import base64
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.ensemble import (RandomForestClassifier, AdaBoostClassifier, 
                            ExtraTreesClassifier, GradientBoostingClassifier)
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

from pathlib import Path
import json
from collections import defaultdict
from imblearn.under_sampling import RandomUnderSampler
from sklearn.metrics import precision_recall_fscore_support, accuracy_score, roc_auc_score

In [None]:
df = pd.read_csv(r"TestbedThuJun17Flows.csv")
df.shape


In [None]:
df.info()

In [None]:
# Drop payload columns
payload_columns = [
    "sourcePayloadAsBase64", "sourcePayloadAsUTF",
    "destinationPayloadAsBase64", "destinationPayloadAsUTF"
]
df.drop(columns=payload_columns, inplace=True)

In [None]:
# Convert labels
df["Label"] = df["Label"].map({"Normal": 0, "Attack": 1})

In [None]:
df.info()

In [None]:
# Select numeric features
numeric_cols = df.select_dtypes(include=['int64']).columns
X = df[numeric_cols].drop(columns="Label")
y = df['Label']


In [None]:
import pennylane as qml
n_features = X.shape[1]
N = int(np.ceil(np.log2(n_features)))
wires = range(N)
dev = qml.device('default.qubit', wires)    

@qml.qnode(dev)
def circuit(f=None):
    qml.AmplitudeEmbedding(f, wires=wires,pad_with=0,normalize=True)
    return qml.state()
X_norm = X.values
X_quantum = circuit(X_norm)
X_real = np.real(np.array(X_quantum))
# Create column names based on index
column_names = [f'feature_{i}' for i in range(X_real.shape[1])]
X_real = pd.DataFrame(X_real, columns=column_names)




In [None]:
# import pennylane as qml
# from pennylane import numpy as np

# N = X.shape[1]
# wires = range(N)
# dev = qml.device("default.qubit", wires)

# @qml.qnode(dev)
# def circuit(val_list):
#     qml.AngleEmbedding(val_list, wires, rotation="Y")
#     return [qml.expval(qml.PauliZ(w)) for w in wires]

# # Function to process DataFrame through quantum circuit
# def quantum_transform(df):
#     # Convert DataFrame to numpy array
#     values = df.values
#     # Process each row through quantum circuit
#     quantum_features = np.array([circuit(row) for row in values])
#     # Remove tensor properties and convert to regular numpy array
#     quantum_features = np.array(quantum_features).astype(float)
#     return quantum_features
# # Transform your data
# X_real = quantum_transform(X)

# quantum_cols = [f'quantum_state_{i}' for i in range(len(X_real[0]))]
# X_real = pd.DataFrame(X_real, columns=quantum_cols)
# X_real.head()


In [None]:
X_real.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_real, y, test_size=0.3, random_state=42)  

In [None]:
import time
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score
from sklearn.model_selection import train_test_split
import xgboost as xgb
import lightgbm as lgb
import seaborn as sns
import matplotlib.pyplot as plt
# Create a dictionary of models to evaluate
models = {
    "SVM (Linear)": SVC(kernel="linear", random_state=42),
    "SVM (Poly)": SVC(kernel="poly", random_state=42),
    "SVM (RBF)": SVC(kernel="rbf", random_state=42),
    "SVM (Sigmoid)": SVC(kernel="sigmoid", random_state=42),
    "KNN": KNeighborsClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "AdaBoost": AdaBoostClassifier(random_state=42),
    "Extra Trees": ExtraTreesClassifier(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(random_state=42)
}

# Function to calculate metrics
def evaluate_model(model, X_train, X_test, y_train, y_test):
    start_time = time.time()  # Track model fitting time
    
    # Train the model
    model.fit(X_train, y_train)
    
    # Predict
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None

    # Get performance metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_proba) if y_proba is not None else None
    cohen_kappa = cohen_kappa_score(y_test, y_pred)
    
    # Running time
    end_time = time.time()
    runtime = end_time - start_time
    
    # Detailed classification report
    class_report = classification_report(y_test, y_pred)
    
    
    # Return all metrics
    return {
        "Model": model.__class__.__name__,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1,
        "ROC AUC": roc_auc,
        "Cohen’s Kappa": cohen_kappa,
        "Running Time (s)": runtime,
    }

# Evaluating all models and storing results
results = []

for name, model in models.items():
    print(f"Evaluating model: {name}")
    result = evaluate_model(model, X_train, X_test, y_train, y_test)
    results.append(result)

# Convert results into a DataFrame
results_df = pd.DataFrame(results)

# Display all the results
print(results_df)

In [None]:
results_df
