In [12]:
import json

# Load json dataset from json
with open('edge-iiotset-ddos-test.json', 'r') as f:
    ddos_json_test = json.load(f)

with open('edge-iiotset-normal-test.json', 'r') as f:
    normal_json_test = json.load(f)

In [15]:
from langchain_community.embeddings.ollama import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from statistics import mode
from sklearn.metrics import classification_report
from tqdm import tqdm

embeddings = OllamaEmbeddings(model="all-minilm")
vector_store = Chroma(
    collection_name="edge-iiotset",
    embedding_function=embeddings, 
    persist_directory="./chroma_langchain_db")
retriever = vector_store.as_retriever(
    search_type="mmr", 
    search_kwargs={"k": 5, "fetch_k": 5})

In [9]:
sample_size = len(ddos_json_test)
# sample_size = 100
y_pred = []
y_true = []
for i in tqdm(range(sample_size), ncols=100, desc="Test ddos samples"):
    query_document = ddos_json_test[i]
    similar_documents = retriever.invoke(query_document, filter={"source": "edge-iiotset"})
    y_true.append(1)
    if mode([doc.metadata["label"] for doc in similar_documents]) == "ddos":
        y_pred.append(1)
    else:
        y_pred.append(0)

print(classification_report(y_true, y_pred))

Test ddos samples: 100%|██████████████████████████████████████| 9879/9879 [2:13:54<00:00,  1.23it/s]

              precision    recall  f1-score   support

           1       1.00      1.00      1.00      9879

    accuracy                           1.00      9879
   macro avg       1.00      1.00      1.00      9879
weighted avg       1.00      1.00      1.00      9879






In [16]:
sample_size = len(normal_json_test)
# sample_size = 100
y_pred = []
y_true = []
for i in tqdm(range(sample_size), ncols=100, desc="Test normal samples"):
    query_document = normal_json_test[i]
    similar_documents = retriever.invoke(query_document, filter={"source": "edge-iiotset"})
    y_true.append(1)
    if mode([doc.metadata["label"] for doc in similar_documents]) == "normal":
        y_pred.append(1)
    else:
        y_pred.append(0)

print(classification_report(y_true, y_pred))

Test normal samples: 100%|██████████████████████████████████████| 4860/4860 [38:49<00:00,  2.09it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.63      0.78      4860

    accuracy                           0.63      4860
   macro avg       0.50      0.32      0.39      4860
weighted avg       1.00      0.63      0.78      4860




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
