# Load Dataset

In [2]:
################################################################################
# Load dataset and split it into training and test set
################################################################################

import pandas as pd
import os
from tabulate import tabulate

dataset_name = "cic-iot"
sample_size = 100000

# Load dateset
df = pd.read_csv(os.getcwd() + f'/data/sample-{sample_size}-2.csv')

# Split dataset according to attack type
normal_df = df[df['label'] == 'BenignTraffic']
attack_df = df[df['label'] != 'BenignTraffic']

# Drop columns
normal_df = normal_df.drop(columns=['label'])
attack_df = attack_df.drop(columns=['label'])

# Split dataset into training and test set
normal_df_train = normal_df.sample(frac=0.8, random_state=42)
normal_df_test = normal_df.drop(normal_df_train.index)
attack_df_train = attack_df.sample(frac=0.8, random_state=42)
attack_df_test = attack_df.drop(attack_df_train.index)

# Print dataset sizes in a table
data = [
    ["Normal", normal_df.shape[0], normal_df_train.shape[0], normal_df_test.shape[0]],
    ["Attack", attack_df.shape[0], attack_df_train.shape[0], attack_df_test.shape[0]]
]
print(tabulate(data, headers=["Atack type", "Total", "Train", "Test"], tablefmt="grid"))

+--------------+---------+---------+--------+
| Atack type   |   Total |   Train |   Test |
| Normal       |   10800 |    8640 |   2160 |
+--------------+---------+---------+--------+
| Attack       |   89200 |   71360 |  17840 |
+--------------+---------+---------+--------+


# Feature Importance

In [9]:
################################################################################
# Generate Feature Importance
################################################################################

import os
import dotenv
import time
import numpy as np
import json
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_chroma import Chroma
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

dotenv.load_dotenv(os.getcwd() + '/../.env')

template = """
You are provided with network data entries categorized as either normal or attack, along with their corresponding feature names.
Carefully analyze the differences between normal and attack entries by comparing corresponding fields.
Output top 10 important features that can be used to filter an entry as either normal or attack.
Output only in the Python list structure.

Normal Entries:
```{normal_entries}```

Attack Entries:
```{attack_entries}```

Example output:
['feature1', 'feature2', 'feature3', ..., 'feature10']
"""

prompt = PromptTemplate(template=template, input_variables=["normal_entries", "attack_entries"])
llm = ChatOpenAI(model="gpt-4o", temperature=0.0)
model_name = "gpt-4o"
# llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.0)
# model_name = "gemini-1.5-pro"
# llm = ChatAnthropic(model='claude-3-opus-20240229')
# model_name = "claude-3-opus-20240229"
chain = prompt | llm
train_set_size = sample_size
embeddings = HuggingFaceEmbeddings()
vector_store = Chroma(
    collection_name=dataset_name,
    embedding_function=embeddings, 
    persist_directory=f"./vector-stores/chroma-db-{train_set_size}-2")

normal_vectors = vector_store._collection.get(include=['embeddings'], where={'label': 'normal'})['embeddings']
normal_mean_vector = np.mean(normal_vectors, axis=0).tolist()
normal_documents = vector_store._collection.query(query_embeddings=[normal_mean_vector], n_results=10)['documents'][0]

attack_vectors = vector_store._collection.get(include=['embeddings'], where={'label': 'attack'})['embeddings']
attack_mean_vector = np.mean(attack_vectors, axis=0).tolist()
attack_documents = vector_store._collection.query(query_embeddings=[attack_mean_vector], n_results=10)['documents'][0]

normal_entries = {}
for i, feature_name in enumerate(normal_df_train.columns.to_list()):
    normal_entries[feature_name] = [json.loads(doc)[i] for doc in normal_documents]

attack_entries = {}
for i, feature_name in enumerate(attack_df_train.columns.to_list()):
    attack_entries[feature_name] = [json.loads(doc)[i] for doc in attack_documents]

completions = []
for i in range(10):
    completion = chain.invoke({
        "normal_entries": json.dumps(normal_entries),
        "attack_entries": json.dumps(attack_entries)
    })
    completions.append(completion.content)
    print(completion.content)
    time.sleep(10)

with open(f"results/feature-importance-{sample_size}-llm-{model_name}.txt", "a") as f:
    f.write("\n".join(completions))



```python
['flow_duration', 'Header_Length', 'Rate', 'Srate', 'psh_flag_number', 'ack_flag_number', 'syn_count', 'fin_count', 'rst_count', 'Tot sum']
```
```python
['flow_duration', 'Header_Length', 'Rate', 'Srate', 'psh_flag_number', 'ack_flag_number', 'syn_count', 'fin_count', 'rst_count', 'Tot sum']
```
```python
['flow_duration', 'Header_Length', 'Rate', 'Srate', 'psh_flag_number', 'ack_flag_number', 'syn_count', 'fin_count', 'rst_count', 'Tot sum']
```
```python
['flow_duration', 'Header_Length', 'Rate', 'Srate', 'psh_flag_number', 'ack_flag_number', 'syn_count', 'fin_count', 'rst_count', 'Tot sum']
```
```python
['flow_duration', 'Header_Length', 'Rate', 'Srate', 'psh_flag_number', 'ack_flag_number', 'syn_count', 'fin_count', 'rst_count', 'Tot sum']
```
```python
['flow_duration', 'Header_Length', 'Rate', 'Srate', 'psh_flag_number', 'ack_flag_number', 'syn_count', 'fin_count', 'rst_count', 'Tot sum']
```
```python
['flow_duration', 'Header_Length', 'Rate', 'Srate', 'psh_flag_numb

# Prediction

In [67]:
################################################################################
# Generate Rules with transposed data
################################################################################

import os
import dotenv
import json
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_anthropic import ChatAnthropic
from langchain_chroma import Chroma
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
import numpy as np
import uuid
import tiktoken     # https://github.com/openai/tiktoken

dotenv.load_dotenv(os.getcwd() + '/../.env')

template = """
You are provided with network data entries categorized as either normal or attack, along with their corresponding feature names.
Carefully analyze the differences between normal and attack entries by comparing corresponding fields.
Generate 5 simple and deterministic rules for top 5 important features to filter an entry as either normal or attack. 
Output only in the JSON format with the structure: 
{{'feature1': 'rule', 'feature2': 'rule', ..., 'feature5': 'rule'}}.

Normal Entries:
```{normal_entries}```

Attack Entries:
```{attack_entries}```
"""
prompt = PromptTemplate(template=template, input_variables=["normal_entries", "attack_entries"])
llm = ChatOpenAI(model="gpt-4o", temperature=0.0)
model_name = "gpt-4o"
# llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.0)
# model_name = "gemini-1.5-pro"
# llm = ChatAnthropic(model='claude-3-opus-20240229')
# model_name = "claude-3-opus-20240229"
chain = prompt | llm
train_set_size = sample_size
embeddings = HuggingFaceEmbeddings()
vector_store = Chroma(
    collection_name=dataset_name,
    embedding_function=embeddings, 
    persist_directory=f"./vector-stores/chroma-db-{train_set_size}-2")

normal_vectors = vector_store._collection.get(include=['embeddings'], where={'label': 'normal'})['embeddings']
normal_mean_vector = np.mean(normal_vectors, axis=0).tolist()
normal_documents = vector_store._collection.query(query_embeddings=[normal_mean_vector], n_results=10)['documents'][0]

attack_vectors = vector_store._collection.get(include=['embeddings'], where={'label': 'attack'})['embeddings']
attack_mean_vector = np.mean(attack_vectors, axis=0).tolist()
attack_documents = vector_store._collection.query(query_embeddings=[attack_mean_vector], n_results=10)['documents'][0]

normal_entries = {}
for i, feature_name in enumerate(normal_df_train.columns.to_list()):
    normal_entries[feature_name] = [json.loads(doc.replace("'", "\""))[i] for doc in normal_documents]

attack_entries = {}
for i, feature_name in enumerate(attack_df_train.columns.to_list()):
    attack_entries[feature_name] = [json.loads(doc.replace("'", "\""))[i] for doc in attack_documents]

# prompt_text = prompt.invoke({
#     "normal_entries": json.dumps(normal_entries),
#     "attack_entries": json.dumps(attack_entries)
# }).text

# print(prompt_text)

completion = chain.invoke({
    "normal_entries": json.dumps(normal_entries),
    "attack_entries": json.dumps(attack_entries)
})

print(completion.content)

id = str(uuid.uuid4())
with open(f"results/llm/generated-rules-{sample_size}-llm-{model_name}.txt", "a") as f:
    f.write(f"{id}\n")
    f.write(f"{completion.content}\n")

encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
num_tokens_prompt = len(encoding.encode(prompt.invoke({"normal_entries": json.dumps(normal_entries),"attack_entries": json.dumps(attack_entries)}).text))
num_tokens_completion = len(encoding.encode(str(completion.content)))

print(f"Prompt tokens: {num_tokens_prompt}")
print(f"Completion tokens: {num_tokens_completion}")
print(f"Total tokens: {num_tokens_prompt + num_tokens_completion}")
print(f"Percentage of tokens used: {(num_tokens_prompt + num_tokens_completion) / 128000}")



```json
{
    "flow_duration": "if flow_duration < 1.0 then attack else normal",
    "Header_Length": "if Header_Length < 1000 then attack else normal",
    "Rate": "if Rate < 10.0 then attack else normal",
    "ack_flag_number": "if ack_flag_number == 0 then attack else normal",
    "HTTPS": "if HTTPS == 0 then attack else normal"
}
```
Prompt tokens: 5353
Completion tokens: 94
Total tokens: 5447
Percentage of tokens used: 0.0425546875


In [66]:
################################################################################
# Evaluate generated rules
################################################################################

from statistics import mode
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm

datasets = {"normal": normal_df_test, "attack": attack_df_test}
y_pred = []
y_true = []
for attack_type, dataset in datasets.items():
    test_set_size = dataset.shape[0]
    for i in tqdm(range(test_set_size), ncols=100, desc=f"Predicting {attack_type} entries..."):
        predicted_attack_types = []
        predicted_attack_types.append("attack" if dataset.iloc[i]['flow_duration'] < 1 else "normal")
        predicted_attack_types.append("attack" if dataset.iloc[i]['Header_Length'] < 1000 else "normal")
        predicted_attack_types.append("attack" if dataset.iloc[i]['Rate'] < 10 else "normal")
        predicted_attack_types.append("attack" if dataset.iloc[i]['ack_flag_number'] == 0 else "normal")
        predicted_attack_types.append("attack" if dataset.iloc[i]['HTTPS'] == 0 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['Max'] == 54 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['Protocol Type'] == 6 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['Duration'] == 64 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['syn_flag_number'] > 0 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['Srate'] < 10 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['Drate'] < 10 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['ack_count'] < 1 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['Tot sum']< 1000 else "normal")
        y_true.append(attack_type)
        y_pred.append(mode(predicted_attack_types))

c_report = classification_report(y_true, y_pred, digits=4)
c_matrix = confusion_matrix(y_true, y_pred)

with open(f"results/llm/result-llm-{sample_size}-2.txt", "a") as f:
    f.write(f"{id}\n")
    f.write(f"Classication Report\n{c_report}\n\nConfusion Matrix\n{c_matrix}\n")

print(c_report)
print(c_matrix)

Predicting normal entries...: 100%|███████████████████████████| 1000/1000 [00:00<00:00, 5703.70it/s]
Predicting attack entries...: 100%|███████████████████████████| 1000/1000 [00:00<00:00, 6965.28it/s]

              precision    recall  f1-score   support

      attack     0.9369    0.9650    0.9507      1000
      normal     0.9639    0.9350    0.9492      1000

    accuracy                         0.9500      2000
   macro avg     0.9504    0.9500    0.9500      2000
weighted avg     0.9504    0.9500    0.9500      2000

[[965  35]
 [ 65 935]]





# Feedback Loop

In [3]:
################################################################################
# Prompt Template
################################################################################
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder

system_message = ("system",
"""
You are a good data analyst.
You are provided with network data entries categorized as either normal or attack, along with their corresponding feature names.
Carefully analyze the differences between normal and attack entries by comparing corresponding fields.
Your task is to generate {k} simple and deterministic rules for top {k} important features to filter attack entries.
Supported operators are '==', '!=', '>', '<', '>=', '<='.
Generate exactly {k} rules to filter attack entries and make a tool call for each rule.
"""
)
human_message = ("user",
"""
Analyze the following network data and generate rules for the top 5 important features to filter attack entries.

Normal Entries:
```{normal_entries}```

Attack Entries:
```{attack_entries}```
"""
)

prompt = ChatPromptTemplate.from_messages([
    system_message,
    human_message,
    MessagesPlaceholder("msgs")
])

# Invoke prompt
# prompt.invoke({"k": 5, "normal_entries": normal_entries, "attack_entries": attack_entries, "msgs": []})

In [4]:
################################################################################
# Tool
################################################################################

from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm
import operator
from typing import Annotated
from langchain_core.tools import tool

show_progress = True
operations = {'<': operator.lt, '>': operator.gt, '==': operator.eq, '<=': operator.le, '>=': operator.ge, '!=': operator.ne}

@tool
def evaluate_rule(
    feature_name: Annotated[str, "Feature name"],
    value: Annotated[str, "Value"], 
    op: Annotated[str, "Operator"]
) -> bool:
    """Evaluate the rule and return the macro f1-score."""
    try:
        value = float(value)
    except ValueError:
        value
    datasets = {"normal": normal_df_train, "attack": attack_df_train}
    y_pred = []
    y_true = []
    if op in operations:
        for attack_type, dataset in datasets.items():
            test_set_size = dataset.shape[0]
            for i in tqdm(range(test_set_size), ncols=100, desc=f"Predicting {attack_type} entries...", disable=not show_progress):
                y_true.append(attack_type)
                y_pred.append("attack" if operations[op](dataset.iloc[i][feature_name], value) else "normal")
        c_report = classification_report(y_true, y_pred, digits=4, output_dict=True)
        return c_report['macro avg']['f1-score']
    else:
        raise ValueError(f"Unsupported operator: {op}")

# Invoke tool
# print(evaluate_rule.invoke({"feature_name": "flow_duration", "value": "1", "op": "<"}))

In [5]:
################################################################################
# LLM
################################################################################

import os
import dotenv
from langchain_openai import ChatOpenAI
# from langchain_google_genai import ChatGoogleGenerativeAI
# from langchain_anthropic import ChatAnthropic

dotenv.load_dotenv(os.getcwd() + '/../.env')

model_name = "gpt-4o"
llm = ChatOpenAI(model=model_name, temperature=0.1)
# model_name = "gemini-1.5-pro"
# llm = ChatGoogleGenerativeAI(model=model_name, temperature=0.0)
# model_name = "claude-3-opus-20240229"
# llm = ChatAnthropic(model=model_name, temperature=0.0)

llm_with_tool = llm.bind_tools([evaluate_rule])

In [6]:
################################################################################
# Vector Store
################################################################################

import json
import numpy as np
from langchain_chroma import Chroma
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_openai import OpenAIEmbeddings

train_set_size = sample_size
n_results = 10
embeddings = HuggingFaceEmbeddings()
vector_store = Chroma(
    collection_name=dataset_name,
    embedding_function=embeddings, 
    persist_directory=f"./vector-stores/chroma-db-{train_set_size}-2")

normal_vectors = vector_store._collection.get(include=['embeddings'], where={'label': 'normal'})['embeddings']
normal_mean_vector = np.mean(normal_vectors, axis=0).tolist()
normal_documents = vector_store._collection.query(query_embeddings=[normal_mean_vector], n_results=n_results)['documents'][0]

attack_vectors = vector_store._collection.get(include=['embeddings'], where={'label': 'attack'})['embeddings']
attack_mean_vector = np.mean(attack_vectors, axis=0).tolist()
attack_documents = vector_store._collection.query(query_embeddings=[attack_mean_vector], n_results=n_results)['documents'][0]

normal_entries_dict = {}
for i, feature_name in enumerate(normal_df_train.columns.to_list()):
    normal_entries_dict[feature_name] = [json.loads(doc.replace("'", "\""))[i] for doc in normal_documents]

attack_entries_dict = {}
for i, feature_name in enumerate(attack_df_train.columns.to_list()):
    attack_entries_dict[feature_name] = [json.loads(doc.replace("'", "\""))[i] for doc in attack_documents]

In [13]:
################################################################################
# Chain
################################################################################

from langchain_core.messages import HumanMessage

chain = prompt | llm_with_tool

n_repetitions = 5
context_window = 128000
show_progress = False

def get_initial_state():
  n = 0
  k = 5
  mean_f1s = 0
  max_f1s = 0
  n_max = 0
  token_usage = {}
  normal_entries = json.dumps(normal_entries_dict)
  attack_entries = json.dumps(attack_entries_dict)
  msgs = []
  return locals()

state = get_initial_state()
train_f1_scores = []
while state["n"] < n_repetitions:
    ai_msg = chain.invoke(state)
    tool_msgs = []
    for tool_call in ai_msg.tool_calls:
        tool_msg = evaluate_rule.invoke(tool_call)
        tool_msgs.append(tool_msg)
    state["mean_f1s"] = sum(float(msg.content) for msg in tool_msgs) / len(tool_msgs)
    human_msg = HumanMessage(f"The current mean f1-score for the generated rules is {state['mean_f1s']}. "
                             "If this mean f1-score is greater than the previous rounds, keep the better performing "
                             "rules and revise or replace only the underperforming ones (those with a score less than mean). "
                             "Otherwise, revise or replace any rules that have a score less than mean. "
                             f"Based on the feedback, generate exactly {state['k']} rules to filter attack entries and "
                             "make a tool call for each rule, ensuring that a tool call is made for every entry every time.")    
    state["n"] += 1
    state["msgs"].extend([ai_msg, *tool_msgs, human_msg])
    train_f1_scores.append(state["mean_f1s"])
    state["max_f1s"] = state["mean_f1s"] if state["mean_f1s"] > state["max_f1s"] else state["max_f1s"]
    state["n_max"] = state["n"] if state["mean_f1s"] > state["max_f1s"] else state["n_max"]
    state["token_usage"] = {key: ai_msg.response_metadata["token_usage"][key] for key in ["completion_tokens", "prompt_tokens", "total_tokens"]} 
    print("Round:", state["n"], "Current mean f1-score:", state["mean_f1s"], "Token usage:", state["token_usage"])
print(train_f1_scores)


Round: 1 Current mean f1-score: 0.6411302584590476 Token usage: {'completion_tokens': 732, 'prompt_tokens': 5450, 'total_tokens': 6182}
Round: 2 Current mean f1-score: 0.6827872672551862 Token usage: {'completion_tokens': 389, 'prompt_tokens': 6357, 'total_tokens': 6746}
Round: 3 Current mean f1-score: 0.6616197951100998 Token usage: {'completion_tokens': 349, 'prompt_tokens': 6920, 'total_tokens': 7269}
Round: 4 Current mean f1-score: 0.6827898256690854 Token usage: {'completion_tokens': 317, 'prompt_tokens': 7443, 'total_tokens': 7760}
Round: 5 Current mean f1-score: 0.6827898256690854 Token usage: {'completion_tokens': 314, 'prompt_tokens': 7934, 'total_tokens': 8248}
[0.6411302584590476, 0.6827872672551862, 0.6616197951100998, 0.6827898256690854, 0.6827898256690854]


In [17]:
################################################################################
# Evaluate generated rules
################################################################################

from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm
import operator
from statistics import mode

operations = {'<': operator.lt, '>': operator.gt, '==': operator.eq, '<=': operator.le, '>=': operator.ge, '!=': operator.ne}

def evaluate_rules(tool_calls):
    datasets = {"normal": normal_df_test, "attack": attack_df_test}
    y_pred = []
    y_true = []
    for attack_type, dataset in datasets.items():
        test_set_size = dataset.shape[0]
        for i in tqdm(range(test_set_size), ncols=100, desc=f"Predicting {attack_type} entries...", disable=not show_progress):
            predicted_attack_types = []
            for tool_call in tool_calls:
                args = json.loads(tool_call["function"]["arguments"])
                op = args["op"]
                feature_name = args["feature_name"]
                value = args["value"]
                try:
                    value = float(value)
                except ValueError:
                    value
                predicted_attack_types.append("attack" if operations[op](dataset.iloc[i][feature_name], value) else "normal")
            y_true.append(attack_type)
            y_pred.append(mode(predicted_attack_types))
    c_report = classification_report(y_true, y_pred, digits=4, output_dict=True)
    c_matrix = confusion_matrix(y_true, y_pred)
    # print(c_report)
    # print(c_matrix)
    return c_report

# tool_calls = state["msgs"][-7].additional_kwargs["tool_calls"]
# for tool_call in tool_calls:
#     rule = json.loads(tool_call["function"]["arguments"])
#     print("attack if", rule["feature_name"], rule["op"], rule["value"], "else normal")

# evaluate_rules(tool_calls)

# test_f1_scores = []
# for i in range(20, 0, -1):
#     index = -7 * i
#     tool_calls = state["msgs"][index].additional_kwargs["tool_calls"]
#     for tool_call in tool_calls:
#         rule = json.loads(tool_call["function"]["arguments"])
#     test_f1_scores.append(evaluate_rules(tool_calls)['macro avg']['f1-score'])

# print(test_f1_scores)

for i in range(len(state["msgs"])):
    if state["msgs"][i].type != "ai":
        continue
    tool_calls = state["msgs"][i].additional_kwargs["tool_calls"]
    for tool_call in tool_calls:
        rule = json.loads(tool_call["function"]["arguments"])
        print("attack if", rule["feature_name"], rule["op"], rule["value"], "else normal")
    c_report = evaluate_rules(tool_calls)
    print(c_report["macro avg"]["f1-score"])

attack if flow_duration < 1 else normal
attack if Header_Length < 1000 else normal
attack if Duration == 64 else normal
attack if syn_flag_number == 1 else normal
attack if ack_flag_number == 0 else normal
0.8006668027669441
attack if flow_duration < 1 else normal
attack if Header_Length < 1000 else normal
attack if Duration <= 70 else normal
attack if syn_flag_number >= 0.5 else normal
attack if ack_flag_number == 0 else normal
0.9019476275688372
attack if flow_duration < 1 else normal
attack if Header_Length < 1000 else normal
attack if Duration <= 70 else normal
attack if psh_flag_number == 1 else normal
attack if ack_flag_number == 0 else normal
0.9045155019750267
attack if flow_duration < 1 else normal
attack if Header_Length < 1000 else normal
attack if Duration <= 70 else normal
attack if Rate > 50 else normal
attack if ack_flag_number == 0 else normal
0.9426194924959315
attack if flow_duration < 1 else normal
attack if Header_Length < 1000 else normal
attack if Duration <= 70 e

In [16]:
################################################################################
# Evaluate generated rules for efficiency
################################################################################

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
from tabulate import tabulate
from statistics import mode
import time
import warnings
import pandas as pd
import os

warnings.filterwarnings("ignore")

sample_size = 100000

# Load dateset
df = pd.read_csv(os.getcwd() + f'/data/sample-{sample_size}-2.csv')

# This dataset does not need categorical encoding as all features are numerical
# except for the label.

# Split dataset according to attack type
normal_df = df[df['label'] == 'BenignTraffic']
attack_df = df[df['label'] != 'BenignTraffic']
normal_df.loc[:, 'label'] = 'normal'
attack_df.loc[:, 'label'] = 'attack'

# Split dataset into training and test set
normal_df_train = normal_df.sample(frac=0.8, random_state=42)
normal_df_test = normal_df.drop(normal_df_train.index)
attack_df_train = attack_df.sample(frac=0.8, random_state=42)
attack_df_test = attack_df.drop(attack_df_train.index)

X_train = pd.concat([normal_df_train, attack_df_train]).drop(columns=['label'])
y_train = pd.concat([normal_df_train, attack_df_train])['label']
X_test = pd.concat([normal_df_test, attack_df_test]).drop(columns=['label'])
y_test = pd.concat([normal_df_test, attack_df_test])['label']

# Create instances of ML models
model_dt = DecisionTreeClassifier()
model_rf = RandomForestClassifier()

# Fit the models to the training data
model_dt.fit(X_train, y_train)
model_rf.fit(X_train, y_train)

# Predict the labels for the test data
y_true = y_test

elapsed_times_dt = []
elapsed_times_rf = []
elapsed_times_llm = []
y_pred_dt = []
y_pred_rf = []
y_pred_llm = []
for i in range(len(X_test)):
    # Predict using DT
    start = time.time()
    y_pred_dt.append(model_dt.predict([X_test.iloc[i]]))
    end = time.time()
    elapsed_times_dt.append(end - start)

    # Predict using RF
    start = time.time()
    y_pred_rf.append(model_rf.predict([X_test.iloc[i]]))
    end = time.time()
    elapsed_times_rf.append(end - start)
    
    # Predict using LLM
    start = time.time()
    row = X_test.iloc[i]
    conditions = [
        row['flow_duration'] < 1,
        row['Header_Length'] < 1000,
        row['Duration'] <= 70,
        row['Srate'] > 50,
        row['ack_flag_number'] == 0
    ]
    predicted_attack_types = ["attack" if condition else "normal" for condition in conditions]
    y_pred_llm.append(mode(predicted_attack_types))
    end = time.time()
    elapsed_times_llm.append(end - start)

print(f"DT time taken: {sum(elapsed_times_dt)/len(X_test)}")
print(classification_report(y_true, y_pred_dt, digits=4, output_dict=False))
print(confusion_matrix(y_true, y_pred_dt))
print("\n")

print(f"RF time taken: {sum(elapsed_times_rf)/len(X_test)}")
print(classification_report(y_true, y_pred_rf, digits=4, output_dict=False))
print(confusion_matrix(y_true, y_pred_rf))
print("\n")

print(f"LLM time taken: {sum(elapsed_times_llm)/len(X_test)}\n")
print(classification_report(y_true, y_pred_llm, digits=4, output_dict=False))
print(confusion_matrix(y_true, y_pred_llm))

DT time taken: 0.0002441995024681091
              precision    recall  f1-score   support

      attack     0.9960    0.9961    0.9961     17840
      normal     0.9680    0.9671    0.9676      2160

    accuracy                         0.9930     20000
   macro avg     0.9820    0.9816    0.9818     20000
weighted avg     0.9930    0.9930    0.9930     20000

[[17771    69]
 [   71  2089]]


RF time taken: 0.0034858589172363282
              precision    recall  f1-score   support

      attack     0.9991    0.9954    0.9972     17840
      normal     0.9632    0.9926    0.9777      2160

    accuracy                         0.9951     20000
   macro avg     0.9811    0.9940    0.9875     20000
weighted avg     0.9952    0.9951    0.9951     20000

[[17758    82]
 [   16  2144]]


LLM time taken: 0.0001223496913909912

              precision    recall  f1-score   support

      attack     0.9973    0.9760    0.9865     17840
      normal     0.8315    0.9778    0.8987      2160

   

# Explaination

In [None]:
# write function to use llm to explain the generated rules

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder

system_message = ("system",
"""
You are a good data analyst.
You are provided with network data entries categorized as either normal or attack, along with their corresponding feature names.
Carefully analyze the differences between normal and attack entries by comparing corresponding fields.
Your task is to generate {k} simple and deterministic rules for top {k} important features to filter attack entries.
Supported operators are '==', '!=', '>', '<', '>=', '<='.
Generate exactly {k} rules to filter attack entries and make a tool call for each rule.
"""
)
human_message = ("user",
"""
Analyze the following network data and generate rules for the top 5 important features to filter attack entries.

Normal Entries:
```{normal_entries}```

Attack Entries:
```{attack_entries}```
"""
)

prompt = ChatPromptTemplate.from_messages([
    system_message,
    human_message,
    MessagesPlaceholder("msgs")
])

import os
import dotenv
from langchain_openai import ChatOpenAI
# from langchain_google_genai import ChatGoogleGenerativeAI
# from langchain_anthropic import ChatAnthropic

dotenv.load_dotenv(os.getcwd() + '/../.env')

model_name = "gpt-4o"
llm = ChatOpenAI(model=model_name, temperature=0.0)
# model_name = "gemini-1.5-pro"
# llm = ChatGoogleGenerativeAI(model=model_name, temperature=0.0)
# model_name = "claude-3-opus-20240229"
# llm = ChatAnthropic(model=model_name, temperature=0.0)

llm_with_tool = llm.bind_tools([evaluate_rule])

# Other

In [68]:
################################################################################
# Generate Rules with Feedback Loop
################################################################################

import os
import dotenv
import json
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_anthropic import ChatAnthropic
from langchain_chroma import Chroma
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from statistics import mode
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import tiktoken     # https://github.com/openai/tiktoken

dotenv.load_dotenv(os.getcwd() + '/../.env')

template = """
You were provided with network data entries categorized as either normal or attack, along with their corresponding feature names.
You were asked to carefully analyze the differences between normal and attack entries by comparing corresponding fields to 
generate 5 simple and deterministic rules for the top 5 important features to filter an entry as either normal or attack.
The rules you generated were evaluated individually against a test set of network data entries, and the following F1-scores were obtained:

F1-scores 1st round:
```
"flow_duration": "if flow_duration < 1 then attack else normal" --> 0.9064
"Header_Length": "if Header_Length <= 1000 then attack else normal" --> 0.8295
"ack_flag_number": "if ack_flag_number == 0 then attack else normal" --> 0.8690
"HTTPS": "if HTTPS == 0 then attack else normal" --> 0.8160
"Max": "if Max == 54.0 then attack else normal" --> 0.6968

Overall F1-score --> 0.9735
```

F1-scores 2nd round:
```
"flow_duration": "if flow_duration < 1 then attack else normal" --> 0.9064
"Header_Length": "if Header_Length <= 100 then attack else normal" --> 0.8265
"ack_flag_number": "if ack_flag_number == 0 then attack else normal" --> 0.8690
"HTTPS": "if HTTPS == 0 then attack else normal" --> 0.8160
"Duration": "if Duration <= 70 then attack else normal" --> 0.3329

Overall F1-score --> 0.9332
```

F1-scores 3rd round:
```
"flow_duration": "if flow_duration < 1 then attack else normal" --> 0.9064
"Header_Length": "if Header_Length <= 100 then attack else normal" --> 0.8265
"ack_flag_number": "if ack_flag_number == 0 then attack else normal" --> 0.8690
"HTTPS": "if HTTPS == 0 then attack else normal" --> 0.8160
"AVG": "if AVG <= 60 then attack else normal" --> 0.9388

Overall F1-score --> 0.9700
```

Based on the feedback provided, drop underperforming rules that has the least f1-score.
Generate new rules to revise the rules to improve the F1-scores.
Output only in the JSON format with the structure: 
{{'feature1': 'rule', 'feature2': 'rule', ..., 'feature5': 'rule'}}.

Normal Entries:
```{normal_entries}```

Attack Entries:
```{attack_entries}```
"""

prompt_template = ChatPromptTemplate.from_messages([
    ("system", ""),
    ("user", "{}")
])
prompt = PromptTemplate(template=template, input_variables=["normal_entries", "attack_entries"])
llm = ChatOpenAI(model="gpt-4o", temperature=0.0)
model_name = "gpt-4o"
# llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.0)
# model_name = "gemini-1.5-pro"
# llm = ChatAnthropic(model='claude-3-opus-20240229')
# model_name = "claude-3-opus-20240229"
chain = prompt | llm
train_set_size = sample_size
embeddings = HuggingFaceEmbeddings()
vector_store = Chroma(
    collection_name="cic-iot",
    embedding_function=embeddings, 
    persist_directory=f"./vector-stores/chroma-db-{train_set_size}-2")

normal_vectors = vector_store._collection.get(include=['embeddings'], where={'label': 'normal'})['embeddings']
normal_mean_vector = np.mean(normal_vectors, axis=0).tolist()
normal_documents = vector_store._collection.query(query_embeddings=[normal_mean_vector], n_results=10)['documents'][0]

attack_vectors = vector_store._collection.get(include=['embeddings'], where={'label': 'attack'})['embeddings']
attack_mean_vector = np.mean(attack_vectors, axis=0).tolist()
attack_documents = vector_store._collection.query(query_embeddings=[attack_mean_vector], n_results=10)['documents'][0]

normal_entries = {}
for i, feature_name in enumerate(normal_df_train.columns.to_list()):
    normal_entries[feature_name] = [json.loads(doc)[i] for doc in normal_documents]

attack_entries = {}
for i, feature_name in enumerate(attack_df_train.columns.to_list()):
    attack_entries[feature_name] = [json.loads(doc)[i] for doc in attack_documents]

# print(prompt.invoke({
#     "normal_entries": json.dumps(normal_entries),
#     "attack_entries": json.dumps(attack_entries)
# }).text)

completion = chain.invoke({
    "normal_entries": json.dumps(normal_entries),
    "attack_entries": json.dumps(attack_entries)
})

print(completion.content)

with open(f"results/llm/generated-rules-{sample_size}-llm-{model_name}.txt", "a") as f:
    f.write(completion.content)

encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
num_tokens_prompt = len(encoding.encode(prompt.invoke({"normal_entries": json.dumps(normal_entries),"attack_entries": json.dumps(attack_entries)}).text))
num_tokens_completion = len(encoding.encode(str(completion.content)))

print(f"Prompt tokens: {num_tokens_prompt}")
print(f"Completion tokens: {num_tokens_completion}")
print(f"Total tokens: {num_tokens_prompt + num_tokens_completion}")
print(f"Percentage of tokens used: {(num_tokens_prompt + num_tokens_completion) / 128000}")

SyntaxError: invalid syntax (518245297.py, line 10)

In [None]:
################################################################################
# Generate Rules
################################################################################

import os
import dotenv
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_anthropic import ChatAnthropic
from langchain_chroma import Chroma
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from statistics import mode
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

dotenv.load_dotenv(os.getcwd() + '/../.env')

template = """
You are provided with network data entries categorized as either normal or attack, along with their corresponding feature names.
Carefully analyze the differences between normal and attack entries by comparing corresponding fields.
Generate 5 simple and deterministic rules for top 5 important features to filter an entry as either normal or attack. 
Output only in the JSON format with the structure: 
{{'feature1': 'rule', 'feature2': 'rule', ..., 'feature5': 'rule'}}.

Feature Names:
```{feature_names}```

Normal Entries:
```{normal_entries}```

Attack Entries:
```{attack_entries}```
"""
prompt = PromptTemplate(template=template, input_variables=["feature_names", "normal_entries", "attack_entries"])
llm = ChatOpenAI(model="gpt-4o", temperature=0.0)
model_name = "gpt-4o"
# llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.0)
# model_name = "gemini-1.5-pro"
# llm = ChatAnthropic(model='claude-3-opus-20240229')
# model_name = "claude-3-opus-20240229"
chain = prompt | llm
train_set_size = sample_size
embeddings = HuggingFaceEmbeddings()
vector_store = Chroma(
    collection_name="cic-iot",
    embedding_function=embeddings, 
    persist_directory=f"./vector-stores/chroma-db-{train_set_size}-2")

normal_vectors = vector_store._collection.get(include=['embeddings'], where={'label': 'normal'})['embeddings']
normal_mean_vector = np.mean(normal_vectors, axis=0).tolist()
normal_documents = vector_store._collection.query(query_embeddings=[normal_mean_vector], n_results=10)['documents'][0]

attack_vectors = vector_store._collection.get(include=['embeddings'], where={'label': 'attack'})['embeddings']
attack_mean_vector = np.mean(attack_vectors, axis=0).tolist()
attack_documents = vector_store._collection.query(query_embeddings=[attack_mean_vector], n_results=10)['documents'][0]

completion = chain.invoke({
    "feature_names": normal_df_train.columns.to_list(),
    "normal_entries": ",\n".join([f"{doc} --> normal" for doc in normal_documents]),
    "attack_entries": ",\n".join([f"{doc} --> attack" for doc in attack_documents])
    })

print(completion.content)

with open(f"results/generated-rules-{sample_size}-llm-{model_name}.txt", "a") as f:
    f.write(completion.content)

In [None]:
################################################################################
# Evaluate generated rules
################################################################################

from statistics import mode
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm

datasets = {"normal": normal_df_test, "attack": attack_df_test}
y_pred = []
y_true = []
for attack_type, dataset in datasets.items():
    test_set_size = dataset.shape[0]
    for i in tqdm(range(test_set_size), ncols=100, desc=f"Predicting {attack_type} entries..."):
        predicted_attack_types = []
        predicted_attack_types.append("attack" if dataset.iloc[i]['flow_duration'] < 1 else "normal")
        predicted_attack_types.append("attack" if dataset.iloc[i]['Header_Length'] < 100 else "normal")
        predicted_attack_types.append("attack" if dataset.iloc[i]['Duration'] == 64 else "normal")
        predicted_attack_types.append("attack" if dataset.iloc[i]['Rate'] < 10 else "normal")
        predicted_attack_types.append("attack" if dataset.iloc[i]['ack_flag_number'] == 0 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['syn_flag_number'] > 0 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['Srate'] < 10 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['Drate'] < 10 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['ack_count'] < 1 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['Tot sum']< 1000 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['Max'] < 100 else "normal")
        # predicted_attack_types.append("attack" if dataset.iloc[i]['Protocol Type'] == 6 else "normal")
        y_true.append(attack_type)
        y_pred.append(mode(predicted_attack_types))

c_report = classification_report(y_true, y_pred)
c_matrix = confusion_matrix(y_true, y_pred)

with open(f"results/result-llm-{sample_size}-2.txt", "a") as f:
    f.write(f"Classication Report\n{c_report}\n\nConfusion Matrix\n{c_matrix}")

print(c_report)
print(c_matrix)

In [None]:
################################################################################
# Get a Summary
################################################################################

import dotenv
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_chroma import Chroma
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from statistics import mode
from sklearn.metrics import classification_report, confusion_matrix
import tiktoken     # https://github.com/openai/tiktoken

dotenv.load_dotenv(os.getcwd() + '/../.env')

template = """
Given normal and attack network data entries, output human understandable small summary on 
how attack and normal entries can be simply separated.

Feature Names:
```{feature_names}```

Normal Entries:
```{normal_entries}```

Attack Entries:
```{attack_entries}```
"""
prompt = PromptTemplate(template=template, input_variables=["feature_names", "normal_entries", "attack_entries"])
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.0)
# llm = ChatGoogleGenerativeAI(model="gemini-1.0-pro")
chain = prompt | llm
train_set_size = sample_size
embeddings = HuggingFaceEmbeddings()
vector_store = Chroma(
    collection_name="cic-iot",
    embedding_function=embeddings, 
    persist_directory=f"./vector-stores/chroma-db-{train_set_size}-2")
retriever = vector_store.as_retriever(
    search_type="mmr", 
    search_kwargs={"k": 5, "fetch_k": 5})

# retriever = vector_store.as_retriever(
#     search_type="mmr", 
#     search_kwargs={"k": 10, "fetch_k": 10})
# normal_documents = retriever.invoke(str(normal_df_test.iloc[0].to_list()), filter={"source": "cic-iot", "label": "normal"})
# attack_documents = retriever.invoke(str(attack_df_test.iloc[0].to_list()), filter={"source": "cic-iot", "label": "attack"})
# completion = chain.invoke({
#     "feature_names": normal_df_train.columns.to_list(),
#     "normal_entries": ",\n".join([f"{doc.page_content} --> {doc.metadata['label']}" for doc in normal_documents]),
#     "attack_entries": ",\n".join([f"{doc.page_content} --> {doc.metadata['label']}" for doc in attack_documents])
#     })
# print(completion)

normal_documents = retriever.invoke(str(normal_df_test.iloc[0].to_list()), filter={"source": "cic-iot", "label": "normal"})
attack_documents = retriever.invoke(str(attack_df_test.iloc[0].to_list()), filter={"source": "cic-iot", "label": "attack"})
completion = chain.invoke({
    "feature_names": normal_df_train.columns.to_list(),
    "normal_entries": ",\n".join([f"{doc.page_content} --> {doc.metadata['label']}" for doc in normal_documents]),
    "attack_entries": ",\n".join([f"{doc.page_content} --> {doc.metadata['label']}" for doc in attack_documents])
    })
print(completion)

In [4]:
################################################################################
# Generate Decision Tree
################################################################################

import os
import dotenv
import json
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
# from langchain_openai import OpenAIEmbeddings
# from langchain_google_genai import ChatGoogleGenerativeAI
# from langchain_anthropic import ChatAnthropic
from langchain_chroma import Chroma
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
import numpy as np
import uuid
import tiktoken     # https://github.com/openai/tiktoken

dotenv.load_dotenv(os.getcwd() + '/../.env')

template = """
You are provided with network data entries categorized as either normal or attack, along with their corresponding feature names.
Carefully analyze the differences between normal and attack entries by comparing corresponding fields.
Generate a decision tree with top 5 important features as nodes to filter an entry as either normal or attack. 

Normal Entries:
```{normal_entries}```

Attack Entries:
```{attack_entries}```
"""
prompt = PromptTemplate(template=template, input_variables=["normal_entries", "attack_entries"])
llm = ChatOpenAI(model="gpt-4o", temperature=0.0)
model_name = "gpt-4o"
# llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.0)
# model_name = "gemini-1.5-pro"
# llm = ChatAnthropic(model='claude-3-opus-20240229')
# model_name = "claude-3-opus-20240229"
chain = prompt | llm
train_set_size = sample_size
embeddings = HuggingFaceEmbeddings()
vector_store = Chroma(
    collection_name=dataset_name,
    embedding_function=embeddings, 
    persist_directory=f"./vector-stores/chroma-db-{train_set_size}-2")

normal_vectors = vector_store._collection.get(include=['embeddings'], where={'label': 'normal'})['embeddings']
normal_mean_vector = np.mean(normal_vectors, axis=0).tolist()
normal_documents = vector_store._collection.query(query_embeddings=[normal_mean_vector], n_results=10)['documents'][0]

attack_vectors = vector_store._collection.get(include=['embeddings'], where={'label': 'attack'})['embeddings']
attack_mean_vector = np.mean(attack_vectors, axis=0).tolist()
attack_documents = vector_store._collection.query(query_embeddings=[attack_mean_vector], n_results=10)['documents'][0]

normal_entries = {}
for i, feature_name in enumerate(normal_df_train.columns.to_list()):
    normal_entries[feature_name] = [json.loads(doc.replace("'", "\""))[i] for doc in normal_documents]

attack_entries = {}
for i, feature_name in enumerate(attack_df_train.columns.to_list()):
    attack_entries[feature_name] = [json.loads(doc.replace("'", "\""))[i] for doc in attack_documents]

# prompt_text = prompt.invoke({
#     "normal_entries": json.dumps(normal_entries),
#     "attack_entries": json.dumps(attack_entries)
# }).text

# print(prompt_text)

completion = chain.invoke({
    "normal_entries": json.dumps(normal_entries),
    "attack_entries": json.dumps(attack_entries)
})

print(completion.content)

id = str(uuid.uuid4())
with open(f"results/llm/generated-rules-{sample_size}-llm-{model_name}.txt", "a") as f:
    f.write(f"{id}\n")
    f.write(f"{completion.content}\n")

encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
num_tokens_prompt = len(encoding.encode(prompt.invoke({"normal_entries": json.dumps(normal_entries),"attack_entries": json.dumps(attack_entries)}).text))
num_tokens_completion = len(encoding.encode(str(completion.content)))

print(f"Prompt tokens: {num_tokens_prompt}")
print(f"Completion tokens: {num_tokens_completion}")
print(f"Total tokens: {num_tokens_prompt + num_tokens_completion}")
print(f"Percentage of tokens used: {(num_tokens_prompt + num_tokens_completion) / 128000}")



To generate a decision tree that can differentiate between normal and attack entries, we need to analyze the provided data and identify the most significant features that distinguish the two categories. Let's start by comparing the features of normal and attack entries.

### Feature Analysis

1. **Flow Duration**:
   - Normal: Ranges from approximately 41 to 82.
   - Attack: Ranges from 0 to 0.48.
   - **Observation**: Attack entries have significantly lower flow durations.

2. **Header Length**:
   - Normal: Ranges from approximately 337,755 to 4,609,454.
   - Attack: Ranges from 54 to 166.
   - **Observation**: Attack entries have much smaller header lengths.

3. **Protocol Type**:
   - Normal: Mostly around 6, with some variations.
   - Attack: Consistently 6.
   - **Observation**: Not a distinguishing feature.

4. **Rate**:
   - Normal: Ranges from approximately 19.9 to 83.9.
   - Attack: Ranges from approximately 2.38 to 178.43.
   - **Observation**: Attack entries have a wider ra

In [8]:
################################################################################
# Evaluate generated rules
################################################################################

from statistics import mode
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm

def classify_traffic(record):
    # Step 1: Check Flow Duration
    if record["flow_duration"] < 1:
        return "attack"
    
    # Step 2: Check Header Length
    if record["Header_Length"] < 200:
        return "attack"
    
    # Step 3: Check Rate
    if record["Rate"] < 10:
        return "attack"
    
    # Step 4: Check IAT
    if record["ack_flag_number"] == 0:
        return "attack"
    
    # Step 5: Check Tot Sum
    if record["Weight"] > 100:
        return "attack"
    
    return "normal"

datasets = {"normal": normal_df_test, "attack": attack_df_test}
y_pred = []
y_true = []
for attack_type, dataset in datasets.items():
    test_set_size = dataset.shape[0]
    for i in tqdm(range(test_set_size), ncols=100, desc=f"Predicting {attack_type} entries..."):
        y_true.append(attack_type)
        y_pred.append(classify_traffic(dataset.iloc[i]))

c_report = classification_report(y_true, y_pred, digits=4)
c_matrix = confusion_matrix(y_true, y_pred)

print(c_report)
print(c_matrix)

Predicting normal entries...: 100%|██████████████████████████| 1000/1000 [00:00<00:00, 14100.92it/s]
Predicting attack entries...: 100%|██████████████████████████| 1000/1000 [00:00<00:00, 23512.39it/s]

              precision    recall  f1-score   support

      attack     0.6172    0.9980    0.7627      1000
      normal     0.9948    0.3810    0.5510      1000

    accuracy                         0.6895      2000
   macro avg     0.8060    0.6895    0.6568      2000
weighted avg     0.8060    0.6895    0.6568      2000

[[998   2]
 [619 381]]



