In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


1. mon_standard.pkl > array code



In [None]:
import pickle

USE_SUBLABEL = False
URL_PER_SITE = 10
TOTAL_URLS   = 950

# Load the pickle file
print("Loading datafile...")
with open("/content/drive/MyDrive/기계학습/mon_standard.pkl", 'rb') as fi: # Path to mon_standard.pkl in Colab
    data = pickle.load(fi)

X1 = [] # Array to store instances (timestamps) - 19,000 instances, e.g., [[0.0, 0.5, 3.4, ...], [0.0, 4.5, ...], [0.0, 1.5, ...], ... [... ,45.8]]
X2 = [] # Array to store instances (direction*size) - size information
y = [] # Array to store the site of each instance - 19,000 instances, e.g., [0, 0, 0, 0, 0, 0, ..., 94, 94, 94, 94, 94]

# Differentiate instances and sites, and store them in the respective x and y arrays
# x array (direction*timestamp), y array (site label)
for i in range(TOTAL_URLS):
    if USE_SUBLABEL:
        label = i
    else:
        label = i // URL_PER_SITE # Calculate which site's URL the current URL being processed belongs to and set that value as the label. Thus, URLs fetched from the same site are labeled identically.
    for sample in data[i]:
        size_seq = []
        time_seq = []
        for c in sample:
            dr = 1 if c > 0 else -1
            time_seq.append(abs(c))
            size_seq.append(dr * 512)
        X1.append(time_seq)
        X2.append(size_seq)
        y.append(label)
size = len(y)

print(f'Total samples: {size}') # Output: 19000


Loading datafile...
Total samples: 19000


In [None]:
# Features
num_total_packets = [] # feature 1
sum_packets = [] # feature 2
num_incoming_packets = [] # feature 3
frac_incoming_packets = [] # feature 4
num_outgoing_packets=[] # feature 5
frac_outgoing_packets=[] # feature 6
average_incoming_ordering=[] # feature 7
std_dev_incoming_ordering=[] # feature 8
average_outgoing_ordering=[] # feature 9
std_dev_outgoing_ordering=[] # feature 10
alternative_packets_per_second_sum = [] # feature 11
mean_of_the_sequence=[] # feature 12

In [None]:
import numpy as np

for size_seq in X2:
    # 1. Total number of pacekts
    total = len(size_seq)
    num_total_packets.append(total)

    # 2. Sum of packets (absolute value gives the size regardless of direction)
    sum_pckts = sum(abs(size) for size in size_seq)
    sum_packets.append(sum_pckts)

    # 3. Number of incoming packets
    incoming = sum(1 for size in size_seq if size < 0)
    num_incoming_packets.append(incoming)

    # 4. Number of incoming packets as a fraction of the total number of packets
    frac_incoming = incoming / total if total > 0 else 0
    frac_incoming_packets.append(frac_incoming)

    # 5. Number of outgoing packets
    outgoing = sum(1 for size in size_seq if size > 0)
    num_outgoing_packets.append(outgoing)

    # 6. Number of outgoing packets as a fraction of the total number of packets
    frac_outgoing = outgoing / total if total > 0 else 0
    frac_outgoing_packets.append(frac_outgoing)

    # 7. Average of the incoming packet ordering list
    incoming_ordering_list = []
    for idx, size in enumerate(size_seq):
        if size < 0:
            incoming_ordering_list.append(idx)

    average_incoming_ordering.append(np.mean(incoming_ordering_list))

    # 8. Standard deviation of the incoming packet ordering list
    std_dev_incoming_ordering.append(np.std(incoming_ordering_list))

    # 9. Average of the outgoing packet ordering list
    outgoing_ordering_list = []
    for idx, size in enumerate(size_seq):
        if size > 0:
            outgoing_ordering_list.append(idx)
    average_outgoing_ordering.append(np.mean(outgoing_ordering_list))

    # 10. Standard deviation of the outgoing packet ordering list
    std_dev_outgoing_ordering.append(np.std(outgoing_ordering_list))

    # 11. mean of the sequence
    chunks = [size_seq[i:i + 20] for i in range(0, len(size_seq), 20)]
    outgoing_counts = [sum(1 for elem in chunk if elem > 0) for chunk in chunks]
    filtered_array = [element for element in outgoing_counts if element != 0]
    average_outgoing_counts = sum(filtered_array) / len(filtered_array)
    mean_of_the_sequence.append(average_outgoing_counts)

In [None]:
# 12. Sum of alternative number packets per second
alternative_packets_per_second_sum = []

for time_seq, size_seq in zip(X1, X2):
    packets_per_subset = len(size_seq) // 20 # 20 even-sized subsets
    subset_sums = []

    for i in range(0, len(size_seq), packets_per_subset):
        start_index = i
        end_index = min(i + packets_per_subset, len(size_seq))

        time_interval = time_seq[end_index - 1] - time_seq[start_index]
        time_interval = max(time_interval, 0.001) # Ensure that the time interval does not become zero.

        packets_per_second = (end_index - start_index) / time_interval
        subset_sums.append(packets_per_second)

        if (end_index == packets_per_subset*20):
            break

    alternative_packets_per_second_sum.append(sum(subset_sums))

In [None]:
print(f"feature 1: {num_total_packets}")
print(f"feature 2: {sum_packets}")
print(f"feature 3: {num_incoming_packets}")
print(f"feature 4: {frac_incoming_packets}")
print(f"feature 5: {num_outgoing_packets}")
print(f"feature 6: {frac_outgoing_packets}")

feature 1: [1421, 518, 1358, 1446, 1406, 559, 1376, 1403, 564, 564, 1423, 567, 563, 1443, 1452, 552, 564, 1417, 560, 1178, 629, 5072, 5021, 5221, 1036, 1019, 5183, 1028, 4895, 4950, 5051, 4961, 787, 1016, 5113, 5101, 4948, 1034, 5092, 5028, 4675, 5570, 4696, 4715, 4780, 5723, 4672, 4701, 4743, 4766, 4686, 5545, 4786, 4733, 4296, 5030, 5283, 4978, 4650, 1268, 2230, 2228, 2227, 2219, 2233, 2261, 2245, 1128, 2304, 2253, 2168, 2403, 2316, 2257, 2229, 2366, 2216, 2242, 2242, 2201, 5760, 5657, 5663, 5405, 5649, 7268, 5571, 5803, 3984, 3618, 7522, 5573, 7432, 5640, 7540, 7741, 7457, 5523, 5623, 7892, 9900, 9810, 9840, 9798, 9775, 9813, 4387, 9485, 9755, 9904, 9769, 9864, 2621, 9785, 9771, 9809, 9777, 9767, 9841, 9788, 2499, 1775, 2748, 755, 1953, 2158, 1917, 1978, 2367, 2105, 1886, 1862, 2049, 1888, 2447, 2098, 2095, 1877, 691, 696, 9962, 9961, 9964, 9962, 9962, 9963, 9964, 9963, 9963, 9941, 9963, 9963, 9963, 9964, 9963, 9964, 9962, 9962, 9967, 5556, 1805, 1843, 1902, 1842, 1890, 1948, 1810, 

In [None]:
print(f'feature 7: {average_outgoing_ordering}');
print(f'feature 8: {std_dev_outgoing_ordering}');
print(f'feature 9: {average_incoming_ordering}');
print(f'feature 10: {std_dev_incoming_ordering}');
print(f'feature 11: {mean_of_the_sequence}');
print(f"feature 12: {alternative_packets_per_second_sum}")

feature 7: [773.3223140495868, 226.1625, 786.1101694915254, 820.139344262295, 789.6086956521739, 249.5735294117647, 800.6782608695652, 806.7301587301587, 259.1714285714286, 258.2567567567568, 806.982905982906, 256.5970149253731, 253.32394366197184, 812.2844827586207, 874.5441176470588, 251.81428571428572, 261.43055555555554, 805.0956521739131, 247.51470588235293, 655.6767676767677, 273.01785714285717, 3762.3192019950125, 3713.169942929613, 3851.3823708206687, 438.4355828220859, 437.7672955974843, 3812.1993883792047, 429.26027397260276, 3596.1587628865977, 3656.317106152806, 3764.4322200392926, 3652.103355704698, 326.6942148760331, 440.57236842105266, 3821.4731043421907, 3703.6456639566395, 3613.031081081081, 448.42261904761904, 3767.769426751592, 3675.8987905792487, 2391.1774193548385, 2576.5717488789237, 2525.9664429530203, 2407.988344988345, 2427.77752293578, 2692.1533477321814, 2359.344186046512, 2570.5456790123458, 2451.021377672209, 2403.4125874125875, 2359.291469194313, 2588.0089

In [None]:
print(len(num_total_packets))
print(len(sum_packets))
print(len(num_incoming_packets))
print(len(frac_incoming_packets))
print(len(num_outgoing_packets))
print(len(frac_outgoing_packets))
print(len(average_outgoing_ordering))
print(len(std_dev_outgoing_ordering))
print(len(average_incoming_ordering))
print(len(std_dev_incoming_ordering))
print(len(mean_of_the_sequence))
print(len(alternative_packets_per_second_sum))

19000
19000
19000
19000
19000
19000
19000
19000
19000
19000
19000
19000


In [None]:
incoming_packets_first30 =[] # feature 13
outgoing_packets_first30 = [] # feature 14
transmission_time_Q1 = [] # feature 15
transmission_time_Q2 = [] # feature 16
transmission_time_Q3 = [] # feature 17
transmission_time_Q4 = [] # feature 18

In [None]:
num_packets_per_second = [] # feature 19
mean_packets_per_second = [] # feature 20
std_packets_per_second = [] # feature 21
med_packets_per_second = [] # feature 22

In [None]:
# RAM 부족으로 이용X
incoming_max_inter_arrival_times = [] # feature 23
incoming_mean_inter_arrival_times = [] # feature 24
incoming_std_inter_arrival_times = [] # feature 25
incoming_third_quartileinter_arrival_times = [] # feature 26
outgoing_max_inter_arrival_times = [] # feature 27
outgoing_mean_inter_arrival_times = [] # feature 28
outgoing_std_inter_arrival_times = [] # feature 29
outgoing_third_quartileinter_arrival_times = [] # feature 30

In [None]:
for size_seq in X2:
    incoming_packets = sum(1 for size in size_seq[:30] if size < 0)
    incoming_packets_first30.append(incoming_packets)

    outgoing_packets = sum(1 for size in size_seq[:30] if size > 0)
    outgoing_packets_first30.append(outgoing_packets)

print("incoming_packets_first30:", incoming_packets_first30)
print("outgoing_packets_first30:", outgoing_packets_first30)

incoming_packets_first30: [21, 22, 23, 21, 22, 22, 23, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 22, 22, 21, 20, 21, 21, 21, 21, 21, 20, 21, 21, 21, 20, 21, 22, 22, 21, 20, 20, 21, 20, 22, 22, 22, 22, 23, 22, 22, 22, 23, 21, 21, 22, 22, 22, 23, 22, 22, 21, 22, 22, 22, 22, 22, 22, 21, 22, 22, 21, 23, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 22, 21, 21, 22, 23, 22, 23, 23, 23, 22, 21, 22, 23, 22, 22, 22, 22, 22, 22, 22, 21, 22, 22, 22, 22, 22, 22, 23, 22, 24, 22, 22, 23, 21, 22, 23, 22, 22, 22, 22, 22, 22, 23, 22, 22, 22, 23, 21, 22, 22, 22, 22, 21, 21, 22, 23, 23, 22, 22, 22, 23, 22, 22, 22, 21, 22, 23, 23, 22, 21, 22, 22, 21, 21, 22, 22, 22, 22, 24, 22, 22, 22, 23, 21, 22, 23, 23, 22, 23, 22, 23, 22, 22, 22, 22, 21, 23, 21, 21, 22, 22, 22, 21, 21, 22, 22, 23, 22, 22, 22, 22, 22, 23, 23, 21, 22, 22, 21, 22, 22, 18, 19, 18, 18, 18, 18, 18, 18, 18, 19, 18, 18, 18, 18, 18, 18, 19, 18, 19, 18, 22, 21, 22, 22, 21, 22, 21, 20, 22, 22, 22, 21, 22, 22, 21, 21, 21, 22, 21, 20, 19, 18, 18, 1

In [None]:
for i in range(len(X1)):
    Q1 = np.percentile(X1[i], 25)
    Q2 = np.percentile(X1[i], 50)
    Q3 = np.percentile(X1[i], 75)
    Q4 = np.percentile(X1[i], 100)

    transmission_time_Q1.append(Q1)
    transmission_time_Q2.append(Q2)
    transmission_time_Q3.append(Q3)
    transmission_time_Q4.append(Q4)

print("transmission_time_Q1:", transmission_time_Q1)
print("transmission_time_Q2:", transmission_time_Q2)
print("transmission_time_Q3:", transmission_time_Q3)
print("transmission_time_Q4:", transmission_time_Q4)

transmission_time_Q1: [3.88, 3.69, 5.06, 5.32, 4.18, 4.26, 6.66, 2.79, 4.6, 5.21, 6.23, 5.79, 7.02, 4.61, 4.67, 5.16, 3.81, 5.02, 13.79, 10.41, 8.87, 4.7, 6.81, 5.59, 4.342499999999999, 5.52, 5.94, 4.4525, 7.43, 6.27, 7.09, 3.5, 6.34, 14.12, 6.29, 9.12, 5.73, 5.65, 7.29, 6.73, 4.07, 7.64, 4.6, 2.93, 4.14, 7.91, 3.48, 5.04, 6.73, 4.0, 4.09, 4.0, 4.11, 8.03, 8.7, 5.04, 4.16, 5.71, 7.3, 2.0, 5.21, 5.69, 7.44, 7.2, 12.14, 4.56, 6.18, 27.74, 7.35, 6.03, 8.63, 6.73, 8.6, 6.64, 6.09, 8.38, 8.03, 4.36, 8.24, 6.85, 2.84, 5.72, 3.67, 5.49, 5.76, 5.74, 3.92, 4.08, 5.24, 4.72, 4.99, 6.39, 5.29, 6.55, 10.3225, 9.32, 12.13, 4.95, 3.7, 8.96, 8.77, 9.82, 12.7, 7.96, 12.29, 6.66, 22.78, 18.89, 8.3, 12.69, 7.84, 6.76, 22.32, 5.61, 9.18, 6.67, 12.33, 8.42, 8.97, 8.36, 3.85, 5.75, 3.91, 7.04, 3.9, 5.64, 7.86, 2.68, 3.58, 3.7, 5.74, 2.28, 3.49, 2.57, 3.54, 2.94, 2.89, 2.94, 1.87, 2.31, 8.25, 5.9, 6.08, 7.98, 8.53, 5.62, 7.28, 6.16, 7.34, 5.29, 7.0, 5.32, 4.75, 7.35, 3.12, 6.83, 4.77, 5.59, 13.74, 3.33, 5.4

In [None]:
import statistics

for i in range(len(X1)):
    num_packets = len(X1[i]) / X1[i][-1]
    mean_packets = statistics.mean(X1[i])
    std_packets = statistics.stdev(X1[i])
    med_packets = statistics.median(X1[i])

    num_packets_per_second.append(num_packets)
    mean_packets_per_second.append(mean_packets)
    std_packets_per_second.append(std_packets)
    med_packets_per_second.append(med_packets)

print(f"Num Packets Per Second: {num_packets_per_second}")
print(f"Mean Packets Per Second: {mean_packets_per_second}")
print(f"Std Packets Per Second: {std_packets_per_second}")
print(f"Med Packets Per Second: {med_packets_per_second}")

Num Packets Per Second: [140.13806706114397, 50.98425196850393, 122.23222322232223, 108.23353293413174, 132.14285714285714, 52.04841713221602, 86.86868686868686, 158.5310734463277, 47.71573604060914, 45.66801619433198, 111.6078431372549, 46.285714285714285, 39.50877192982456, 135.2389878163074, 132.96703296703296, 47.018739352640544, 53.91969407265774, 133.05164319248826, 24.22145328719723, 40.94542926659715, 15.887850467289718, 97.05319556065825, 254.61460446247466, 282.98102981029814, 105.82226762002044, 93.31501831501832, 260.84549572219424, 103.00601202404809, 213.75545851528386, 251.65226232841889, 255.48811330298432, 310.644959298685, 34.746136865342166, 50.22244191794365, 122.32057416267943, 90.26720934347904, 276.1160714285714, 103.7111334002006, 249.60784313725492, 89.94633273703042, 591.0240202275601, 313.97970687711387, 363.1863882443929, 661.2903225806451, 541.9501133786848, 322.7862380146644, 621.2765957446809, 398.72773536895676, 281.1499703615886, 559.3896713615023, 577.

In [None]:
import pandas as pd
data = {'num_total_packets': num_total_packets,
        'sum_packets': sum_packets,
        'num_incoming_packets': num_incoming_packets,
        'frac_incoming_packets': frac_incoming_packets,
        'num_outgoing_packets': num_outgoing_packets,
        'frac_outgoing_packets': frac_outgoing_packets,
        'average_outgoing_ordering': average_outgoing_ordering,
        'std_dev_outgoing_ordering': std_dev_outgoing_ordering,
        'average_incoming_ordering': average_incoming_ordering,
        'std_dev_incoming_ordering': std_dev_incoming_ordering,
        'mean_of_the_sequence': mean_of_the_sequence,
        'alternative_packets_per_second_sum': alternative_packets_per_second_sum}
df1 = pd.DataFrame(data)

print(df1.head())

   num_total_packets  sum_packets  num_incoming_packets  \
0               1421       727552                  1300   
1                518       265216                   438   
2               1358       695296                  1240   
3               1446       740352                  1324   
4               1406       719872                  1291   

   frac_incoming_packets  num_outgoing_packets  frac_outgoing_packets  \
0               0.914849                   121               0.085151   
1               0.845560                    80               0.154440   
2               0.913108                   118               0.086892   
3               0.915629                   122               0.084371   
4               0.918208                   115               0.081792   

   average_outgoing_ordering  std_dev_outgoing_ordering  \
0                 773.322314                 515.483953   
1                 226.162500                 139.231951   
2                 786.110169 

In [None]:
import pandas as pd
data = {'label': y}
df2 = pd.DataFrame(data)

print(df2.head())

   label
0      0
1      0
2      0
3      0
4      0


In [None]:
df1.to_csv('mon_features.csv', index=False)

In [None]:
df2.to_csv('mon_labels.csv', index=False)

In [None]:
import pandas as pd
data_modified = {'num_total_packets': num_total_packets, #1
                 'sum_packets': sum_packets, #2
                 'num_incoming_packets': num_incoming_packets, #3
                 'frac_incoming_packets': frac_incoming_packets, #4
                 'num_outgoing_packets': num_outgoing_packets, #5
                 'frac_outgoing_packets': frac_outgoing_packets, #6
                 'average_outgoing_ordering': average_outgoing_ordering, #7
                 'std_dev_outgoing_ordering': std_dev_outgoing_ordering, #8
                 'average_incoming_ordering': average_incoming_ordering, #9
                 'std_dev_incoming_ordering': std_dev_incoming_ordering, #10
                 'mean_of_the_sequence': mean_of_the_sequence, #11
                 'alternative_packets_per_second_sum': alternative_packets_per_second_sum, #12
                 'incoming_packets_first30': incoming_packets_first30, #13
                 'outgoing_packets_first30': outgoing_packets_first30, #14
                 'transmission_time_Q1': transmission_time_Q1, #15
                 'transmission_time_Q2': transmission_time_Q2, #16
                 'transmission_time_Q3': transmission_time_Q3, #17
                 'transmission_time_Q4': transmission_time_Q4, #18
                 'num_packets_per_second': num_packets_per_second, #19
                 'mean_packets_per_second': mean_packets_per_second, #20
                 'std_packets_per_second': std_packets_per_second, #21
                 'med_packets_per_second': med_packets_per_second #22
                 }
df1_modified = pd.DataFrame(data_modified)

print(df1_modified.head())

   num_total_packets  sum_packets  num_incoming_packets  \
0               1421       727552                  1300   
1                518       265216                   438   
2               1358       695296                  1240   
3               1446       740352                  1324   
4               1406       719872                  1291   

   frac_incoming_packets  num_outgoing_packets  frac_outgoing_packets  \
0               0.914849                   121               0.085151   
1               0.845560                    80               0.154440   
2               0.913108                   118               0.086892   
3               0.915629                   122               0.084371   
4               0.918208                   115               0.081792   

   average_outgoing_ordering  std_dev_outgoing_ordering  \
0                 773.322314                 515.483953   
1                 226.162500                 139.231951   
2                 786.110169 

In [None]:
df1_modified.to_csv('mon_features_modified1.csv', index=False)

In [None]:
df1_modified1=pd.read_csv('/content/drive/MyDrive/ML_Project/mon_features_modified1.csv')

In [None]:
import pandas as pd
data_modified2 = {'incoming_max_inter_arrival_times': incoming_max_inter_arrival_times, #23
                  'incoming_mean_inter_arrival_times': incoming_mean_inter_arrival_times. #24
                  'incoming_std_inter_arrival_times': incoming_std_inter_arrival_times, #25
                  'incoming_third_quartileinter_arrival_times': incoming_third_quartileinter_arrival_times, #26
                  'outgoing_max_inter_arrival_times': outgoing_max_inter_arrival_times, #27
                  'outgoing_mean_inter_arrival_times': outgoing_mean_inter_arrival_times, #28
                  'outgoing_std_inter_arrival_times': outgoing_std_inter_arrival_times, #29
                  'outgoing_third_quartileinter_arrival_times': outgoing_third_quartileinter_arrival_times #30
                 }
df1_modified2 = pd.DataFrame(data_modified2)

print(df1_modified.head())

2. unmon_standard10.pkl > array code

In [None]:
import pickle

TOTAL_URLS = 10000  # total number in the dataset

# Load 10,000 unmon pickle file
print("Loading datafile...")
with open('/content/drive/MyDrive/ML_Project/unmon_standard10.pkl의 사본', 'rb') as f:  # Path to unmon_standard10.pkl in Colab
    x = pickle.load(f)

size = len(x)
print(f'Total samples: {size}')

X1 = [] # Array to store instances (timestamps) - 10,000 instances, e.g., [[0.0, 0.5, 3.4, ...], [0.0, 4.5, ...], [0.0, 1.5, ...], ... [... ,45.8]]
X2 = [] # Array to store instances (direction*size) - size information

for i in range(TOTAL_URLS):
    size_seq = []
    time_seq = []
    for c in x[i]:
        dr = 1 if c > 0 else -1
        time_seq.append(abs(c))
        size_seq.append(dr * 512) # In the pickle file, there is no size information, so the conversion code is set to multiply by 512 uniformly.
    X1.append(time_seq)
    X2.append(size_seq)

print(len(X1)) # Print the length of X1

Loading datafile...
Total samples: 10000
10000


In [None]:
# Features
num_total_packets = [] # feature 1
sum_packets = [] # feature 2
num_incoming_packets = [] # feature 3
frac_incoming_packets = [] # feature 4
num_outgoing_packets=[] # feature 5
frac_outgoing_packets=[] # feature 6
average_incoming_ordering=[] # feature 7
std_dev_incoming_ordering=[] # feature 8
average_outgoing_ordering=[] # feature 9
std_dev_outgoing_ordering=[] # feature 10
alternative_packets_per_second_sum = [] # feature 11
mean_of_the_sequence=[] # feature 12

In [None]:
import numpy as np

for size_seq in X2:
    # 1. Total number of pacekts
    total = len(size_seq)
    num_total_packets.append(total)

    # 2. Sum of packets (absolute value gives the size regardless of direction)
    sum_pckts = sum(abs(size) for size in size_seq)
    sum_packets.append(sum_pckts)

    # 3. Number of incoming packets
    incoming = sum(1 for size in size_seq if size < 0)
    num_incoming_packets.append(incoming)

    # 4. Number of incoming packets as a fraction of the total number of packets
    frac_incoming = incoming / total if total > 0 else 0
    frac_incoming_packets.append(frac_incoming)

    # 5. Number of outgoing packets
    outgoing = sum(1 for size in size_seq if size > 0)
    num_outgoing_packets.append(outgoing)

    # 6. Number of outgoing packets as a fraction of the total number of packets
    frac_outgoing = outgoing / total if total > 0 else 0
    frac_outgoing_packets.append(frac_outgoing)

    # 7. Average of the incoming packet ordering list
    incoming_ordering_list = []
    for idx, size in enumerate(size_seq):
        if size < 0:
            incoming_ordering_list.append(idx)

    average_incoming_ordering.append(np.mean(incoming_ordering_list))

    # 8. Standard deviation of the incoming packet ordering list
    std_dev_incoming_ordering.append(np.std(incoming_ordering_list))

    # 9. Average of the outgoing packet ordering list
    outgoing_ordering_list = []
    for idx, size in enumerate(size_seq):
        if size > 0:
            outgoing_ordering_list.append(idx)
    average_outgoing_ordering.append(np.mean(outgoing_ordering_list))

    # 10. Standard deviation of the outgoing packet ordering list
    std_dev_outgoing_ordering.append(np.std(outgoing_ordering_list))

    # 11. mean of the sequence
    chunks = [size_seq[i:i + 20] for i in range(0, len(size_seq), 20)]
    outgoing_counts = [sum(1 for elem in chunk if elem > 0) for chunk in chunks]
    filtered_array = [element for element in outgoing_counts if element != 0]
    average_outgoing_counts = sum(filtered_array) / len(filtered_array)
    mean_of_the_sequence.append(average_outgoing_counts)

In [None]:
# 12. Sum of alternative number packets per second
alternative_packets_per_second_sum = []

for time_seq, size_seq in zip(X1, X2):
    packets_per_subset = len(size_seq) // 20 # 20 even-sized subsets
    subset_sums = []

    for i in range(0, len(size_seq), packets_per_subset):
        start_index = i
        end_index = min(i + packets_per_subset, len(size_seq))

        time_interval = time_seq[end_index - 1] - time_seq[start_index]
        time_interval = max(time_interval, 0.001) # Ensure that the time interval does not become zero.

        packets_per_second = (end_index - start_index) / time_interval
        subset_sums.append(packets_per_second)

        if (end_index == packets_per_subset*20):
            break

    alternative_packets_per_second_sum.append(sum(subset_sums))

In [None]:
print(f"feature 1: {num_total_packets}")
print(f"feature 2: {sum_packets}")
print(f"feature 3: {num_incoming_packets}")
print(f"feature 4: {frac_incoming_packets}")
print(f"feature 5: {num_outgoing_packets}")
print(f"feature 6: {frac_outgoing_packets}")

feature 1: [130, 9927, 359, 9948, 2508, 5430, 1328, 4463, 4470, 9971, 9981, 9410, 1519, 9956, 6935, 1054, 3706, 9948, 597, 55, 4820, 3663, 4872, 304, 6096, 3235, 94, 5061, 9956, 3140, 166, 441, 348, 486, 4356, 1110, 7959, 7103, 2672, 8943, 9974, 9845, 3111, 9952, 57, 3447, 2584, 9464, 9848, 382, 7153, 7815, 4311, 4822, 9950, 1214, 9976, 3863, 9989, 2509, 9977, 7571, 3058, 4375, 9984, 7487, 1688, 79, 4413, 3756, 9985, 2049, 5331, 4420, 8069, 9894, 5434, 4112, 86, 1784, 6124, 361, 9961, 9954, 5112, 390, 6702, 129, 1488, 9967, 5140, 9984, 1841, 3425, 8193, 780, 6637, 373, 7746, 2797, 1610, 547, 1022, 8137, 2968, 3299, 7723, 9980, 87, 3133, 125, 8494, 6627, 1449, 1985, 4513, 7224, 3311, 5258, 9971, 963, 160, 6927, 3539, 9969, 80, 3579, 8010, 8761, 199, 161, 572, 5020, 9928, 235, 5451, 1519, 2236, 100, 1232, 1479, 9964, 6112, 1883, 363, 4027, 3219, 9924, 2979, 488, 2069, 4414, 4631, 5020, 4199, 457, 9958, 3925, 2976, 9965, 2200, 1504, 5378, 6227, 441, 9951, 5005, 1887, 3572, 9975, 2420, 778

In [None]:
print(f'feature 7: {average_outgoing_ordering}');
print(f'feature 8: {std_dev_outgoing_ordering}');
print(f'feature 9: {average_incoming_ordering}');
print(f'feature 10: {std_dev_incoming_ordering}');
print(f'feature 11: {mean_of_the_sequence}');
print(f"feature 12: {alternative_packets_per_second_sum}")

feature 7: [36.758620689655174, 3845.2466124661246, 155.90540540540542, 4667.994454713494, 1227.269624573379, 2483.4398340248963, 585.4, 1604.1443298969073, 2020.4520123839009, 4250.128504672897, 4167.504132231405, 4193.98687664042, 727.8636363636364, 3924.425113464448, 2636.0591133004928, 363.7464788732394, 1590.139534883721, 4353.156193895871, 196.0, 21.25, 1899.6074498567336, 1433.3942857142856, 2145.5634218289088, 100.98214285714286, 2680.3672654690617, 1423.25, 30.52173913043478, 1813.5821596244132, 3894.833027522936, 1235.6623376623377, 61.2, 163.50632911392404, 191.2058823529412, 243.36764705882354, 1844.3111111111111, 472.1333333333333, 4635.222680412371, 3339.3333333333335, 1210.1192982456141, 4250.0301724137935, 4467.924657534247, 5789.548929663609, 1561.576923076923, 3583.1421487603307, 29.857142857142858, 1693.7814960629921, 845.0138888888889, 4967.5424688561725, 5314.505843071786, 156.42857142857142, 3718.358823529412, 3344.5412621359224, 2278.5449735449733, 2137.403755868

In [None]:
incoming_packets_first30 =[] # feature 13
outgoing_packets_first30 = [] # feature 14
transmission_time_Q1 = [] # feature 15
transmission_time_Q2 = [] # feature 16
transmission_time_Q3 = [] # feature 17
transmission_time_Q4 = [] # feature 18

In [None]:
num_packets_per_second = [] # feature 19
mean_packets_per_second = [] # feature 20
std_packets_per_second = [] # feature 21
min_packets_per_second = [] # feature 22
max_packets_per_second = [] # feature 23
med_packets_per_second = [] # feature 24

In [None]:
for size_seq in X2:
    incoming_packets = sum(1 for size in size_seq[:30] if size < 0)
    incoming_packets_first30.append(incoming_packets)

    outgoing_packets = sum(1 for size in size_seq[:30] if size > 0)
    outgoing_packets_first30.append(outgoing_packets)

print("incoming_packets_first30:", incoming_packets_first30)
print("outgoing_packets_first30:", outgoing_packets_first30)

incoming_packets_first30: [17, 18, 17, 21, 27, 27, 18, 19, 20, 17, 20, 22, 26, 21, 21, 21, 20, 25, 18, 22, 21, 20, 22, 18, 20, 20, 19, 18, 20, 21, 18, 19, 18, 24, 16, 17, 19, 20, 16, 22, 21, 21, 22, 20, 19, 16, 20, 16, 20, 21, 20, 19, 21, 22, 21, 19, 24, 22, 19, 17, 20, 16, 22, 21, 21, 19, 25, 21, 19, 21, 19, 22, 20, 27, 21, 19, 27, 19, 17, 19, 24, 16, 16, 22, 20, 27, 22, 19, 20, 21, 19, 17, 21, 19, 21, 20, 19, 17, 21, 19, 21, 20, 20, 20, 16, 22, 21, 21, 21, 18, 21, 20, 21, 27, 20, 20, 21, 19, 25, 19, 21, 18, 17, 16, 21, 21, 21, 20, 22, 21, 22, 16, 19, 20, 18, 22, 27, 27, 18, 19, 20, 27, 27, 22, 20, 21, 22, 19, 20, 19, 20, 22, 21, 22, 21, 19, 26, 21, 18, 25, 19, 22, 20, 21, 18, 21, 19, 16, 16, 17, 17, 20, 17, 25, 19, 26, 17, 20, 22, 22, 26, 20, 20, 18, 25, 21, 20, 21, 20, 21, 21, 22, 20, 20, 21, 27, 21, 17, 20, 17, 16, 20, 21, 18, 27, 17, 25, 19, 21, 22, 20, 22, 19, 20, 20, 16, 20, 27, 18, 22, 21, 20, 17, 17, 21, 17, 21, 20, 26, 20, 21, 17, 17, 19, 20, 22, 22, 20, 20, 19, 27, 19, 20, 2

In [None]:
for i in range(len(X1)):
    Q1 = np.percentile(X1[i], 25)
    Q2 = np.percentile(X1[i], 50)
    Q3 = np.percentile(X1[i], 75)
    Q4 = np.percentile(X1[i], 100)

    transmission_time_Q1.append(Q1)
    transmission_time_Q2.append(Q2)
    transmission_time_Q3.append(Q3)
    transmission_time_Q4.append(Q4)

print("transmission_time_Q1:", transmission_time_Q1)
print("transmission_time_Q2:", transmission_time_Q2)
print("transmission_time_Q3:", transmission_time_Q3)
print("transmission_time_Q4:", transmission_time_Q4)

transmission_time_Q1: [2.92, 9.59, 7.525, 6.84, 7.4, 4.35, 3.32, 8.2, 7.29, 7.52, 9.24, 5.07, 6.66, 26.89, 15.27, 1.75, 5.7, 7.99, 6.45, 3.35, 9.51, 7.0, 13.035, 4.37, 7.0, 20.34, 2.71, 6.7, 6.62, 9.03, 2.62, 8.29, 4.22, 5.09, 14.43, 3.89, 7.62, 4.405, 7.23, 8.98, 11.56, 6.65, 8.87, 11.56, 2.98, 12.6, 11.69, 8.21, 9.86, 6.49, 10.34, 9.35, 5.68, 8.58, 12.79, 3.45, 8.86, 6.015, 5.55, 8.29, 10.95, 4.87, 6.31, 4.87, 6.58, 8.19, 6.41, 2.7, 23.65, 4.1, 15.35, 6.06, 5.7, 5.5, 6.44, 7.17, 5.6, 8.727500000000001, 2.98, 8.0, 6.22, 8.3, 6.56, 9.47, 6.28, 3.53, 8.88, 3.21, 5.047499999999999, 7.45, 7.41, 21.76, 6.08, 7.14, 8.28, 5.26, 8.32, 5.94, 4.35, 8.48, 6.51, 10.385000000000002, 9.26, 7.4, 8.21, 4.54, 7.05, 8.68, 1.52, 9.56, 3.23, 7.085, 6.59, 5.12, 9.8, 8.95, 7.97, 13.8, 9.32, 5.37, 4.64, 2.1375, 4.51, 3.37, 17.79, 4.59, 4.14, 6.61, 7.16, 4.62, 4.97, 4.58, 8.06, 6.07, 3.725, 8.42, 8.86, 4.49, 2.97, 10.16, 9.36, 9.19, 5.96, 14.25, 5.11, 5.16, 5.12, 8.97, 10.01, 8.59, 12.89, 7.33, 18.03, 4.11, 

In [None]:
import statistics

for i in range(len(X1)):
    num_packets = len(X1[i]) / X1[i][-1]
    mean_packets = statistics.mean(X1[i])
    std_packets = statistics.stdev(X1[i])
    med_packets = statistics.median(X1[i])

    num_packets_per_second.append(num_packets)
    mean_packets_per_second.append(mean_packets)
    std_packets_per_second.append(std_packets)
    med_packets_per_second.append(med_packets)

print(f"Num Packets Per Second: {num_packets_per_second}")
print(f"Mean Packets Per Second: {mean_packets_per_second}")
print(f"Std Packets Per Second: {std_packets_per_second}")
print(f"Med Packets Per Second: {med_packets_per_second}")

Num Packets Per Second: [23.38129496402878, 835.6060606060605, 20.157215047725998, 1084.8418756815704, 46.79977607762642, 428.2334384858044, 52.28346456692913, 316.5248226950355, 106.75901600191068, 1024.7687564234327, 808.1781376518219, 375.4988028731046, 128.6198137171888, 251.16044399596367, 141.55950193917124, 30.972671172494856, 222.71634615384616, 729.3255131964809, 65.38882803943045, 11.247443762781186, 90.49943672549756, 251.23456790123456, 122.75132275132276, 54.97287522603978, 224.44771723122238, 89.78628920344157, 22.38095238095238, 605.3827751196172, 1036.0041623309053, 107.42387957577832, 30.74074074074074, 19.6524064171123, 12.798823096726737, 35.140997830802604, 87.76949425750554, 130.89622641509433, 193.1796116504854, 304.7190047190047, 70.98831030818279, 220.21669539522284, 652.7486910994764, 424.9028916702632, 80.20108275328693, 646.6536712150747, 1.046639735585751, 121.03230337078652, 78.70849832470302, 203.921568627451, 411.53363978269954, 13.531703861140631, 161.50

In [None]:
import pandas as pd
data = {'num_total_packets': num_total_packets,
        'sum_packets': sum_packets,
        'num_incoming_packets': num_incoming_packets,
        'frac_incoming_packets': frac_incoming_packets,
        'num_outgoing_packets': num_outgoing_packets,
        'frac_outgoing_packets': frac_outgoing_packets,
        'average_outgoing_ordering': average_outgoing_ordering,
        'std_dev_outgoing_ordering': std_dev_outgoing_ordering,
        'average_incoming_ordering': average_incoming_ordering,
        'std_dev_incoming_ordering': std_dev_incoming_ordering,
        'mean_of_the_sequence': mean_of_the_sequence,
        'alternative_packets_per_second_sum': alternative_packets_per_second_sum}
df1 = pd.DataFrame(data)

print(df1.head())

   num_total_packets  sum_packets  num_incoming_packets  \
0                130        66560                   101   
1               9927      5082624                  9189   
2                359       183808                   285   
3               9948      5093376                  9407   
4               2508      1284096                  2215   

   frac_incoming_packets  num_outgoing_packets  frac_outgoing_packets  \
0               0.776923                    29               0.223077   
1               0.925657                   738               0.074343   
2               0.793872                    74               0.206128   
3               0.945617                   541               0.054383   
4               0.883174                   293               0.116826   

   average_outgoing_ordering  std_dev_outgoing_ordering  \
0                  36.758621                  27.053784   
1                3845.246612                2885.461055   
2                 155.905405 

In [None]:
import pandas as pd

y = [-1] * 10000
data = {'label': y}
df2 = pd.DataFrame(data)

print(df2.head())

   label
0     -1
1     -1
2     -1
3     -1
4     -1


In [None]:
df1.to_csv('unmon_features.csv', index=False)

In [None]:
df2.to_csv('unmon_labels.csv', index=False)

In [None]:
import pandas as pd
data_modified = {'num_total_packets': num_total_packets, #1
                 'sum_packets': sum_packets, #2
                 'num_incoming_packets': num_incoming_packets, #3
                 'frac_incoming_packets': frac_incoming_packets, #4
                 'num_outgoing_packets': num_outgoing_packets, #5
                 'frac_outgoing_packets': frac_outgoing_packets, #6
                 'average_outgoing_ordering': average_outgoing_ordering, #7
                 'std_dev_outgoing_ordering': std_dev_outgoing_ordering, #8
                 'average_incoming_ordering': average_incoming_ordering, #9
                 'std_dev_incoming_ordering': std_dev_incoming_ordering, #10
                 'mean_of_the_sequence': mean_of_the_sequence, #11
                 'alternative_packets_per_second_sum': alternative_packets_per_second_sum, #12
                 'incoming_packets_first30': incoming_packets_first30, #13
                 'outgoing_packets_first30': outgoing_packets_first30, #14
                 'transmission_time_Q1': transmission_time_Q1, #15
                 'transmission_time_Q2': transmission_time_Q2, #16
                 'transmission_time_Q3': transmission_time_Q3, #17
                 'transmission_time_Q4': transmission_time_Q4, #18
                 'num_packets_per_second': num_packets_per_second, #19
                 'mean_packets_per_second': mean_packets_per_second, #20
                 'std_packets_per_second': std_packets_per_second, #21
                 'med_packets_per_second': med_packets_per_second #22
                 }
df1_modified = pd.DataFrame(data_modified)

print(df1_modified.head())

   num_total_packets  sum_packets  num_incoming_packets  \
0                130        66560                   101   
1               9927      5082624                  9189   
2                359       183808                   285   
3               9948      5093376                  9407   
4               2508      1284096                  2215   

   frac_incoming_packets  num_outgoing_packets  frac_outgoing_packets  \
0               0.776923                    29               0.223077   
1               0.925657                   738               0.074343   
2               0.793872                    74               0.206128   
3               0.945617                   541               0.054383   
4               0.883174                   293               0.116826   

   average_outgoing_ordering  std_dev_outgoing_ordering  \
0                  36.758621                  27.053784   
1                3845.246612                2885.461055   
2                 155.905405 

In [None]:
df1_modified.to_csv('unmon_features_modified.csv', index=False)