In [1]:
import pandas as pd
import os
import json

# Load dateset
df = pd.read_csv(os.getcwd() + '/../../data/edge-iiot/Edge-IIoTset dataset/Selected dataset for ML and DL/ML-EdgeIIoT-dataset.csv', low_memory=False)
attack_df = df[df['Attack_label'] == 1]
attack_df = attack_df.drop(columns=['Attack_label', 'Attack_type'])
attack_df_train = attack_df.sample(frac=0.8, random_state=42)
attack_df_test = attack_df.drop(attack_df_train.index)

normal_df = df[df['Attack_label'] == 0]
normal_df = normal_df.drop(columns=['Attack_label', 'Attack_type'])
normal_df_train = normal_df.sample(frac=0.8, random_state=42)
normal_df_test = normal_df.drop(normal_df_train.index)

print("Attack Training set size: ", attack_df_train.shape)
print("Attack Test set size: ", attack_df_test.shape)

print("Normal Training set size: ", normal_df_train.shape)
print("Normal Test set size: ", normal_df_test.shape)

Attack Training set size:  (106799, 61)
Attack Test set size:  (26700, 61)
Normal Training set size:  (19441, 61)
Normal Test set size:  (4860, 61)


In [2]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma

embeddings = HuggingFaceEmbeddings()
vector_store = Chroma(
    collection_name="edge-iiotset",
    embedding_function=embeddings, 
    persist_directory="./chroma_db_binary")
retriever = vector_store.as_retriever(
    search_type="mmr", 
    search_kwargs={"k": 5, "fetch_k": 5})

  from tqdm.autonotebook import tqdm, trange


In [3]:
from statistics import mode
from sklearn.metrics import classification_report

sample_size = 10 # attack_df_test.shape[0]

y_pred = []
y_true = []
for i in range(sample_size):
    query_document = str(attack_df_test.iloc[i].to_list())
    similar_documents = retriever.invoke(query_document, filter={"source": "edge-iiotset"})
    y_true.append(1)
    if mode([doc.metadata["label"] for doc in similar_documents]) == "attack":
        y_pred.append(1)
    else:
        y_pred.append(0)

print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        10

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10



In [4]:
from statistics import mode
from sklearn.metrics import classification_report

sample_size = 10 # normal_df_test.shape[0]

y_pred = []
y_true = []
for i in range(sample_size):
    query_document = str(normal_df_test.iloc[i].to_list())
    similar_documents = retriever.invoke(query_document, filter={"source": "edge-iiotset"})
    y_true.append(1)
    if mode([doc.metadata["label"] for doc in similar_documents]) == "normal":
        y_pred.append(1)
    else:
        y_pred.append(0)

print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00        10

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10

