In [1]:
%%capture
%pip install -U transformers accelerate

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, TextStreamer
import torch


base_model = "/kaggle/input/llama-3.2/transformers/3b-instruct/1"

tokenizer = AutoTokenizer.from_pretrained(base_model)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    return_dict=True,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)

2025-04-22 12:30:02.804763: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745325002.981548      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745325003.032388      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
if model.config.pad_token_id is None:
    model.config.pad_token_id = model.config.eos_token_id

In [4]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

Device set to use cuda:0


In [5]:
import pandas as pd

pd = pd.read_csv('/kaggle/input/test-sample/sample_20_rows.csv')
pd.head(5)

Unnamed: 0,Source IP,Destination IP,Source Port,Destination Port,Protocol,Flow Duration,Total Fwd Packets,Total Backward Packets,Flow Bytes/s,Label
0,172.16.0.5,192.168.50.1,842,21484,17,-0.194471,0.325118,-0.145124,-0.416396,drdos_dns
1,172.16.0.5,192.168.50.1,634,45239,17,-0.194038,0.407954,-0.145124,-0.431972,drdos_dns
2,172.16.0.5,192.168.50.1,634,8272,17,-0.194129,0.842839,-0.145124,-0.411765,drdos_dns
3,172.16.0.5,192.168.50.1,634,57621,17,-0.194152,0.842839,-0.145124,-0.410605,drdos_dns
4,192.168.50.6,34.204.21.102,57025,443,6,2.902553,-0.948475,2.009327,-0.491065,benign


In [6]:
X = pd.drop('Label', axis=1)
X.head(5)

Unnamed: 0,Source IP,Destination IP,Source Port,Destination Port,Protocol,Flow Duration,Total Fwd Packets,Total Backward Packets,Flow Bytes/s
0,172.16.0.5,192.168.50.1,842,21484,17,-0.194471,0.325118,-0.145124,-0.416396
1,172.16.0.5,192.168.50.1,634,45239,17,-0.194038,0.407954,-0.145124,-0.431972
2,172.16.0.5,192.168.50.1,634,8272,17,-0.194129,0.842839,-0.145124,-0.411765
3,172.16.0.5,192.168.50.1,634,57621,17,-0.194152,0.842839,-0.145124,-0.410605
4,192.168.50.6,34.204.21.102,57025,443,6,2.902553,-0.948475,2.009327,-0.491065


In [7]:
system_message = """
You are a cybersecurity expert trained to analyze network flow logs and determine whether the traffic is benign or part of a Distributed Denial of Service (DDoS) attack.

Use the ReAct (Reasoning + Acting) approach:
1. First, REASON through the meaning of the network features such as IPs, ports, protocols, flow duration, packet counts, and byte rates.
2. Then, ACT by concluding whether the flow is BENIGN or DDOS based on patterns, anomalies, or suspicious behavior.

Be concise but clear in your reasoning, and always end with the final classification in the format: "Final Answer: BENIGN" or "Final Answer: DDOS".
"""

few_shot_example = """
Example:

Input:
Source IP : 172.16.0.5
Destination IP : 149.171.126.12
Source Port : 49855
Destination Port : 443
Protocol : 6
Flow Duration : 1147838
Total Fwd Packets : 12
Total Backward Packets : 15
Flow Bytes/s : 12.2

Analysis:
Let me analyze this network flow:
- Source IP 172.16.0.5 is a private IP address (likely internal network)
- Destination IP 149.171.126.12 is a public IP
- Source Port 49855 is a high ephemeral port (normal for client connections)
- Destination Port 443 indicates HTTPS traffic (normal web browsing)
- Protocol 6 is TCP (standard for web browsing)
- Flow Duration of 1147838 microseconds (~1.15 seconds) is reasonable for a web request
- Packet counts (12 forward, 15 backward) are balanced and typical for normal web traffic
- Flow Bytes/s at 12.2 is relatively low and consistent with regular web browsing

This appears to be normal HTTPS web traffic from an internal network to an external server with balanced packet exchange and reasonable data rates.

Final Answer: BENIGN
"""

In [8]:
from IPython.display import Markdown, display
import time

for i in range(20):
    # Create the prompt from your dataset
    prompt = f"""
Source IP : {X['Source IP'][i]}
Destination IP : {X['Destination IP'][i]}
Source Port : {X['Source Port'][i]}
Destination Port : {X['Destination Port'][i]}
Protocol : {X['Protocol'][i]}
Flow Duration : {X['Flow Duration'][i]}
Total Fwd Packets : {X['Total Fwd Packets'][i]}
Total Backward Packets : {X['Total Backward Packets'][i]}
Flow Bytes/s : {X['Flow Bytes/s'][i]}
"""
    
    # Format messages for the model
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": few_shot_example},
        {"role": "user", "content": prompt}
    ]
    
    # Apply chat template
    formatted_prompt = tokenizer.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    
    # Generate response
    response = pipe(formatted_prompt, max_new_tokens=1024, do_sample=True)
    
    # Extract and display the response
    generated_text = response[0]["generated_text"]
    
    # Print the response index and the generated text
    print(f"\nResponse for index {i}:")
    print(generated_text)
    
    # Optional: Allow time to see output before next iteration
    time.sleep(0.5)


Response for index 0:
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a cybersecurity expert trained to analyze network flow logs and determine whether the traffic is benign or part of a Distributed Denial of Service (DDoS) attack.

Use the ReAct (Reasoning + Acting) approach:
1. First, REASON through the meaning of the network features such as IPs, ports, protocols, flow duration, packet counts, and byte rates.
2. Then, ACT by concluding whether the flow is BENIGN or DDOS based on patterns, anomalies, or suspicious behavior.

Be concise but clear in your reasoning, and always end with the final classification in the format: "Final Answer: BENIGN" or "Final Answer: DDOS".<|eot_id|><|start_header_id|>user<|end_header_id|>

Example:

Input:
Source IP : 172.16.0.5
Destination IP : 149.171.126.12
Source Port : 49855
Destination Port : 443
Protocol : 6
Flow Duration : 1147838
Total Fwd Packets : 12
Total Backward Packets : 15
Flow Bytes/s : 12.2

Analysis:
Let me analy

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset



Response for index 10:
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a cybersecurity expert trained to analyze network flow logs and determine whether the traffic is benign or part of a Distributed Denial of Service (DDoS) attack.

Use the ReAct (Reasoning + Acting) approach:
1. First, REASON through the meaning of the network features such as IPs, ports, protocols, flow duration, packet counts, and byte rates.
2. Then, ACT by concluding whether the flow is BENIGN or DDOS based on patterns, anomalies, or suspicious behavior.

Be concise but clear in your reasoning, and always end with the final classification in the format: "Final Answer: BENIGN" or "Final Answer: DDOS".<|eot_id|><|start_header_id|>user<|end_header_id|>

Example:

Input:
Source IP : 172.16.0.5
Destination IP : 149.171.126.12
Source Port : 49855
Destination Port : 443
Protocol : 6
Flow Duration : 1147838
Total Fwd Packets : 12
Total Backward Packets : 15
Flow Bytes/s : 12.2

Analysis:
Let me anal