In [None]:
import json
import os
import pandas as pd

In [None]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import jensenshannon
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from sklearn.preprocessing import MinMaxScaler

def distribution_similarity(real_traffic, gen_traffic, bins=10):

    # Normalization and Histogram
    real_hist, _ = np.histogram(real_traffic, bins=bins, density=True)
    gen_hist, _ = np.histogram(gen_traffic, bins=bins, density=True)

    # Prevent zeros by adding lower case numbers
    """Jensen-Shannon Divergence can have problems with histograms containing zeros (log(0) is undefined). 
    Therefore, a small number (1e-9) is added to the histograms to remove the zeros."""
    real_hist += 1e-9
    gen_hist += 1e-9

    # Similarity Calculation
    """Jensen-Shannon Divergence (JSD): Measures the similarity between two probability distributions 
    (0: exactly the same, 1: completely different)."""
    js = jensenshannon(real_hist, gen_hist)

    return js

# GPT 4.1 EXPERIMENT 1 RESULTS

In [19]:
real_traffic = []
with open(r'../../Datasets/Experiment_1_one_way_communication_10_minute_input_sample.json', 'r') as file:
    for line in file:
        real_traffic.append(json.loads(line))


real_traffic  = pd.DataFrame(real_traffic)
real_traffic.head(2)

Unnamed: 0,No.,Time,Source,Destination,Protocol,Length,Info,Info_clean
0,1,0.0,0x1de6,Broadcast,ZigBee,80,Link Status,Link Status
1,2,8.041278,0x1de6,0xd7a7,ZigBee HA,69,"ZCL: Read Attributes Response, Seq: 216","ZCL: Read Attributes Response,"


In [None]:
real_traffic = real_traffic.drop(columns=["No.", "Info_clean"])
real_traffic.head(2)

Unnamed: 0,Time,Source,Destination,Protocol,Length,Info
0,0.0,0x1de6,Broadcast,ZigBee,80,Link Status
1,8.041278,0x1de6,0xd7a7,ZigBee HA,69,"ZCL: Read Attributes Response, Seq: 216"


In [21]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import jensenshannon

N = [1,2,3,4,5,6,7,8,9,10]

num_pac = []
pack_size = []
time_mean = []
time_min = []
time_max = []
JS_metric = []

for n in N:
    file_path = fr"../../Generated_Traffic/GPT4.1_Exp1_Trial_{n}_one_way_communication_10_minute_generated_message.json"

    with open(file_path, 'r') as f:
        generated_traffic = json.load(f)

        
    generated_traffic

    data = json.loads(generated_traffic)  # Convert string to Python list
    generated_traffic = pd.DataFrame(data)

    #generated_traffic =  pd.DataFrame(generated_traffic)
    generated_traffic.head(2)
    generated_traffic = pd.DataFrame(generated_traffic[2:-2])
    #generated_traffic.head(2)

    print(f"\n ================== Generated Traffic Trial Number {n} ==================")      

    print("Number of Packet:", len(generated_traffic))
    print("Average Packet Size:", (generated_traffic['length'].astype(int)).mean())
    print("Time Interval Average:", (generated_traffic['time'].astype(float)).diff().mean())
    print("Time Interval min:", (generated_traffic['time'].astype(float)).diff().min())
    print("Time Interval max:", (generated_traffic['time'].astype(float)).diff().max())
    print("Number of source:", generated_traffic['src'].nunique())
    print("Number of destination:", generated_traffic['dst'].nunique())
    
    num_pac.append(len(generated_traffic))
    pack_size.append((generated_traffic['length'].astype(int)).mean())
    time_mean.append((generated_traffic['time'].astype(float)).diff().mean())
    time_min.append((generated_traffic['time'].astype(float)).diff().min())
    time_max.append((generated_traffic['time'].astype(float)).diff().max())


    """ JENSEN SHANNON FOR TIMESTAMPT"""

    generated_traffic['time'] = pd.to_numeric( generated_traffic['time'], errors='coerce')
    real_traffic['Time'] = pd.to_numeric(real_traffic['Time'], errors='coerce')

    js_packet = distribution_similarity(real_traffic['Time'], generated_traffic['time'] )
    print(f"\nJensen-Shannon for time: {js_packet:.4f}")
    JS_metric.append(js_packet)
    

print("Number of Packet:", np.mean(num_pac),  
    "Average Packet Size", np.mean(pack_size),
    "Time Interval Average", np.mean(time_mean),
    "Time Interval min",  np.mean(time_min),
    "Time Interval max",  np.mean(time_max),
     "Average JS:", np.mean(JS_metric)
    )

print(JS_metric)



Number of Packet: 117
Average Packet Size: 69.12820512820512
Time Interval Average: 4.9945910862068965
Time Interval min: 0.021898999999990565
Time Interval max: 15.611914999999996
Number of source: 1
Number of destination: 2

Jensen-Shannon for time: 0.0552

Number of Packet: 124
Average Packet Size: 69.29032258064517
Time Interval Average: 4.711373000000001
Time Interval min: 0.016797999999994317
Time Interval max: 20.221397000000003
Number of source: 1
Number of destination: 2

Jensen-Shannon for time: 0.0490

Number of Packet: 136
Average Packet Size: 69.76470588235294
Time Interval Average: 4.296511111111111
Time Interval min: 0.025000000000034106
Time Interval max: 9.12899999999999
Number of source: 1
Number of destination: 2

Jensen-Shannon for time: 0.1016

Number of Packet: 136
Average Packet Size: 69.41911764705883
Time Interval Average: 4.326704518518518
Time Interval min: 0.02727300000000099
Time Interval max: 13.893526999999992
Number of source: 1
Number of destination: 2