In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


1. mon_standard.pkl > array code



In [2]:
import pickle

USE_SUBLABEL = False
URL_PER_SITE = 10
TOTAL_URLS   = 950

# Load the pickle file
print("Loading datafile...")
with open("/content/drive/MyDrive/mon_standard.pkl", 'rb') as fi: # Path to mon_standard.pkl in Colab
    data = pickle.load(fi)

X1 = [] # Array to store instances (timestamps) - 19,000 instances, e.g., [[0.0, 0.5, 3.4, ...], [0.0, 4.5, ...], [0.0, 1.5, ...], ... [... ,45.8]]
X2 = [] # Array to store instances (direction*size) - size information
y = [] # Array to store the site of each instance - 19,000 instances, e.g., [0, 0, 0, 0, 0, 0, ..., 94, 94, 94, 94, 94]

# Differentiate instances and sites, and store them in the respective x and y arrays
# x array (direction*timestamp), y array (site label)
for i in range(TOTAL_URLS):
    if USE_SUBLABEL:
        label = i
    else:
        label = i // URL_PER_SITE # Calculate which site's URL the current URL being processed belongs to and set that value as the label. Thus, URLs fetched from the same site are labeled identically.
    for sample in data[i]:
        size_seq = []
        time_seq = []
        for c in sample:
            dr = 1 if c > 0 else -1
            time_seq.append(abs(c))
            size_seq.append(dr * 512)
        X1.append(time_seq)
        X2.append(size_seq)
        y.append(label)
size = len(y)

print(f'Total samples: {size}') # Output: 19000


Loading datafile...
Total samples: 19000


In [3]:
# Features
num_total_packets = [] # feature 1
sum_packets = [] # feature 2
num_incoming_packets = [] # feature 3
frac_incoming_packets = [] # feature 4
num_outgoing_packets=[] # feature 5
frac_outgoing_packets=[] # feature 6
average_incoming_ordering=[] # feature 7
std_dev_incoming_ordering=[] # feature 8
average_outgoing_ordering=[] # feature 9
std_dev_outgoing_ordering=[] # feature 10
mean_of_the_sequence=[] # feature 11
alternative_packets_per_second_sum = [] # feature 12

In [4]:
import numpy as np

for size_seq in X2:
    # 1. Total number of pacekts
    total = len(size_seq)
    num_total_packets.append(total)

    # 2. Sum of packets (absolute value gives the size regardless of direction)
    sum_pckts = sum(abs(size) for size in size_seq)
    sum_packets.append(sum_pckts)

    # 3. Number of incoming packets
    incoming = sum(1 for size in size_seq if size < 0)
    num_incoming_packets.append(incoming)

    # 4. Number of incoming packets as a fraction of the total number of packets
    frac_incoming = incoming / total if total > 0 else 0
    frac_incoming_packets.append(frac_incoming)

    # 5. Number of outgoing packets
    outgoing = sum(1 for size in size_seq if size > 0)
    num_outgoing_packets.append(outgoing)

    # 6. Number of outgoing packets as a fraction of the total number of packets
    frac_outgoing = outgoing / total if total > 0 else 0
    frac_outgoing_packets.append(frac_outgoing)

    # 7. Average of the incoming packet ordering list
    incoming_ordering_list = []
    for idx, size in enumerate(size_seq):
        if size < 0:
            incoming_ordering_list.append(idx)

    average_incoming_ordering.append(np.mean(incoming_ordering_list))

    # 8. Standard deviation of the incoming packet ordering list
    std_dev_incoming_ordering.append(np.std(incoming_ordering_list))

    # 9. Average of the outgoing packet ordering list
    outgoing_ordering_list = []
    for idx, size in enumerate(size_seq):
        if size > 0:
            outgoing_ordering_list.append(idx)
    average_outgoing_ordering.append(np.mean(outgoing_ordering_list))

    # 10. Standard deviation of the outgoing packet ordering list
    std_dev_outgoing_ordering.append(np.std(outgoing_ordering_list))

    # 11. mean of the sequence
    chunks = [size_seq[i:i + 20] for i in range(0, len(size_seq), 20)]
    outgoing_counts = [sum(1 for elem in chunk if elem > 0) for chunk in chunks]
    filtered_array = [element for element in outgoing_counts if element != 0]
    average_outgoing_counts = sum(filtered_array) / len(filtered_array)
    mean_of_the_sequence.append(average_outgoing_counts)

In [5]:
# 12. Sum of alternative number packets per second
alternative_packets_per_second_sum = []

for time_seq, size_seq in zip(X1, X2):
    packets_per_subset = len(size_seq) // 20 # 20 even-sized subsets
    subset_sums = []

    for i in range(0, len(size_seq), packets_per_subset):
        start_index = i
        end_index = min(i + packets_per_subset, len(size_seq))

        time_interval = time_seq[end_index - 1] - time_seq[start_index]
        time_interval = max(time_interval, 0.001) # Ensure that the time interval does not become zero.

        packets_per_second = (end_index - start_index) / time_interval
        subset_sums.append(packets_per_second)

        if (end_index == packets_per_subset*20):
            break

    alternative_packets_per_second_sum.append(sum(subset_sums))

In [6]:
import pandas as pd

data = {
    'feature 1': num_total_packets,
    'feature 2': sum_packets,
    'feature 3': num_incoming_packets,
    'feature 4': frac_incoming_packets,
    'feature 5': num_outgoing_packets,
    'feature 6': frac_outgoing_packets
}

df = pd.DataFrame(data)
print(df.head())


   feature 1  feature 2  feature 3  feature 4  feature 5  feature 6
0       1421     727552       1300   0.914849        121   0.085151
1        518     265216        438   0.845560         80   0.154440
2       1358     695296       1240   0.913108        118   0.086892
3       1446     740352       1324   0.915629        122   0.084371
4       1406     719872       1291   0.918208        115   0.081792


In [7]:
data1 = {
    'feature 7': average_outgoing_ordering,
    'feature 8': std_dev_outgoing_ordering,
    'feature 9': average_incoming_ordering,
    'feature 10': std_dev_incoming_ordering,
    'feature 11': mean_of_the_sequence,
    'feature 12': alternative_packets_per_second_sum
}

df = pd.DataFrame(data1)
print(df.head())

    feature 7   feature 8   feature 9  feature 10  feature 11     feature 12
0  773.322314  515.483953  704.106154  398.486335    2.630435   25281.847992
1  226.162500  139.231951  264.406393  150.591146    3.809524   34200.937813
2  786.110169  472.735508  668.259677  381.878642    2.744186  107531.177098
3  820.139344  513.916038  713.503021  406.205288    2.595745   23241.614790
4  789.608696  503.993490  694.740511  395.028889    2.500000   55080.330639


In [8]:
print(len(num_total_packets))
print(len(sum_packets))
print(len(num_incoming_packets))
print(len(frac_incoming_packets))
print(len(num_outgoing_packets))
print(len(frac_outgoing_packets))
print(len(average_outgoing_ordering))
print(len(std_dev_outgoing_ordering))
print(len(average_incoming_ordering))
print(len(std_dev_incoming_ordering))
print(len(mean_of_the_sequence))
print(len(alternative_packets_per_second_sum))

19000
19000
19000
19000
19000
19000
19000
19000
19000
19000
19000
19000


In [9]:
incoming_packets_first30 =[] # feature 13
outgoing_packets_first30 = [] # feature 14
transmission_time_Q1 = [] # feature 15
transmission_time_Q2 = [] # feature 16
transmission_time_Q3 = [] # feature 17
transmission_time_Q4 = [] # feature 18

In [10]:
num_packets_per_second = [] # feature 19
mean_packets_per_second = [] # feature 20
std_packets_per_second = [] # feature 21
med_packets_per_second = [] # feature 22

In [11]:
# RAM 부족으로 이용X
incoming_max_inter_arrival_times = [] # feature 23
incoming_mean_inter_arrival_times = [] # feature 24
incoming_std_inter_arrival_times = [] # feature 25
incoming_third_quartileinter_arrival_times = [] # feature 26
outgoing_max_inter_arrival_times = [] # feature 27
outgoing_mean_inter_arrival_times = [] # feature 28
outgoing_std_inter_arrival_times = [] # feature 29
outgoing_third_quartileinter_arrival_times = [] # feature 30

In [12]:
for size_seq in X2:
    incoming_packets = sum(1 for size in size_seq[:30] if size < 0)
    incoming_packets_first30.append(incoming_packets)

    outgoing_packets = sum(1 for size in size_seq[:30] if size > 0)
    outgoing_packets_first30.append(outgoing_packets)

packets_first30 = {
    'incoming_packets_first30': incoming_packets_first30,
    'outgoing_packets_first30': outgoing_packets_first30
}

df = pd.DataFrame(packets_first30)
print(df.head(30))
print(len(incoming_packets_first30))
print(len(outgoing_packets_first30))

    incoming_packets_first30  outgoing_packets_first30
0                         21                         9
1                         22                         8
2                         23                         7
3                         21                         9
4                         22                         8
5                         22                         8
6                         23                         7
7                         22                         8
8                         22                         8
9                         22                         8
10                        22                         8
11                        22                         8
12                        22                         8
13                        22                         8
14                        22                         8
15                        22                         8
16                        23                         7
17        

In [13]:
for i in range(len(X1)):
    Q1 = np.percentile(X1[i], 25)
    Q2 = np.percentile(X1[i], 50)
    Q3 = np.percentile(X1[i], 75)
    Q4 = np.percentile(X1[i], 100)

    transmission_time_Q1.append(Q1)
    transmission_time_Q2.append(Q2)
    transmission_time_Q3.append(Q3)
    transmission_time_Q4.append(Q4)

transmission_time = {
    'transmission_time_Q1': transmission_time_Q1,
    'transmission_time_Q2': transmission_time_Q2,
    'transmission_time_Q3': transmission_time_Q3,
    'transmission_time_Q4': transmission_time_Q4,
}

df = pd.DataFrame(transmission_time)
print(df.head())

   transmission_time_Q1  transmission_time_Q2  transmission_time_Q3  \
0                  3.88                  4.09                  4.69   
1                  3.69                  4.80                  5.28   
2                  5.06                  5.15                  5.79   
3                  5.32                  5.57                  6.40   
4                  4.18                  4.31                  5.14   

   transmission_time_Q4  
0                 10.14  
1                 10.16  
2                 11.11  
3                 13.36  
4                 10.64  


In [14]:
import statistics

for i in range(len(X1)):
    num_packets = len(X1[i]) / X1[i][-1]
    mean_packets = statistics.mean(X1[i])
    std_packets = statistics.stdev(X1[i])
    med_packets = statistics.median(X1[i])

    num_packets_per_second.append(num_packets)
    mean_packets_per_second.append(mean_packets)
    std_packets_per_second.append(std_packets)
    med_packets_per_second.append(med_packets)

data_per_second = {
    'Num Packets Per Second': num_packets_per_second,
    'Mean Packets Per Second': mean_packets_per_second,
    'Std Packets Per Second': std_packets_per_second,
    'Med Packets Per Second': med_packets_per_second,
}

df = pd.DataFrame(data_per_second)
print(df.head())

   Num Packets Per Second  Mean Packets Per Second  Std Packets Per Second  \
0              140.138067                 4.315749                1.128739   
1               50.984252                 4.611815                1.371935   
2              122.232223                 5.488969                1.179699   
3              108.233533                 5.995512                1.709479   
4              132.142857                 4.698869                1.297546   

   Med Packets Per Second  
0                    4.09  
1                    4.80  
2                    5.15  
3                    5.57  
4                    4.31  


In [15]:
data = {'num_total_packets': num_total_packets,
        'sum_packets': sum_packets,
        'num_incoming_packets': num_incoming_packets,
        'frac_incoming_packets': frac_incoming_packets,
        'num_outgoing_packets': num_outgoing_packets,
        'frac_outgoing_packets': frac_outgoing_packets,
        'average_outgoing_ordering': average_outgoing_ordering,
        'std_dev_outgoing_ordering': std_dev_outgoing_ordering,
        'average_incoming_ordering': average_incoming_ordering,
        'std_dev_incoming_ordering': std_dev_incoming_ordering,
        'mean_of_the_sequence': mean_of_the_sequence,
        'alternative_packets_per_second_sum': alternative_packets_per_second_sum}
df1 = pd.DataFrame(data)

print(df1.head())

   num_total_packets  sum_packets  num_incoming_packets  \
0               1421       727552                  1300   
1                518       265216                   438   
2               1358       695296                  1240   
3               1446       740352                  1324   
4               1406       719872                  1291   

   frac_incoming_packets  num_outgoing_packets  frac_outgoing_packets  \
0               0.914849                   121               0.085151   
1               0.845560                    80               0.154440   
2               0.913108                   118               0.086892   
3               0.915629                   122               0.084371   
4               0.918208                   115               0.081792   

   average_outgoing_ordering  std_dev_outgoing_ordering  \
0                 773.322314                 515.483953   
1                 226.162500                 139.231951   
2                 786.110169 

In [16]:
data = {'label': y}
df2 = pd.DataFrame(data)

print(df2.head())

   label
0      0
1      0
2      0
3      0
4      0


In [17]:
df1.to_csv('mon_features.csv', index=False)

In [18]:
df2.to_csv('mon_labels.csv', index=False)

In [19]:
import pandas as pd
data_modified = {'num_total_packets': num_total_packets, #1
                 'sum_packets': sum_packets, #2
                 'num_incoming_packets': num_incoming_packets, #3
                 'frac_incoming_packets': frac_incoming_packets, #4
                 'num_outgoing_packets': num_outgoing_packets, #5
                 'frac_outgoing_packets': frac_outgoing_packets, #6
                 'average_outgoing_ordering': average_outgoing_ordering, #7
                 'std_dev_outgoing_ordering': std_dev_outgoing_ordering, #8
                 'average_incoming_ordering': average_incoming_ordering, #9
                 'std_dev_incoming_ordering': std_dev_incoming_ordering, #10
                 'mean_of_the_sequence': mean_of_the_sequence, #11
                 'alternative_packets_per_second_sum': alternative_packets_per_second_sum, #12
                 'incoming_packets_first30': incoming_packets_first30, #13
                 'outgoing_packets_first30': outgoing_packets_first30, #14
                 'transmission_time_Q1': transmission_time_Q1, #15
                 'transmission_time_Q2': transmission_time_Q2, #16
                 'transmission_time_Q3': transmission_time_Q3, #17
                 'transmission_time_Q4': transmission_time_Q4, #18
                 'num_packets_per_second': num_packets_per_second, #19
                 'mean_packets_per_second': mean_packets_per_second, #20
                 'std_packets_per_second': std_packets_per_second, #21
                 'med_packets_per_second': med_packets_per_second #22
                 }

df1_modified = pd.DataFrame(data_modified)

print(df1_modified.head())

   num_total_packets  sum_packets  num_incoming_packets  \
0               1421       727552                  1300   
1                518       265216                   438   
2               1358       695296                  1240   
3               1446       740352                  1324   
4               1406       719872                  1291   

   frac_incoming_packets  num_outgoing_packets  frac_outgoing_packets  \
0               0.914849                   121               0.085151   
1               0.845560                    80               0.154440   
2               0.913108                   118               0.086892   
3               0.915629                   122               0.084371   
4               0.918208                   115               0.081792   

   average_outgoing_ordering  std_dev_outgoing_ordering  \
0                 773.322314                 515.483953   
1                 226.162500                 139.231951   
2                 786.110169 

In [20]:
df1_modified.to_csv('mon_features_modified1.csv', index=False)

In [22]:
df1_modified1=pd.read_csv('/content/drive/MyDrive/mon_features_modified1.csv')

In [23]:
import pandas as pd
data_modified2 = {'incoming_max_inter_arrival_times': incoming_max_inter_arrival_times, #23
                  'incoming_mean_inter_arrival_times': incoming_mean_inter_arrival_times, #24
                  'incoming_std_inter_arrival_times': incoming_std_inter_arrival_times, #25
                  'incoming_third_quartileinter_arrival_times': incoming_third_quartileinter_arrival_times, #26
                  'outgoing_max_inter_arrival_times': outgoing_max_inter_arrival_times, #27
                  'outgoing_mean_inter_arrival_times': outgoing_mean_inter_arrival_times, #28
                  'outgoing_std_inter_arrival_times': outgoing_std_inter_arrival_times, #29
                  'outgoing_third_quartileinter_arrival_times': outgoing_third_quartileinter_arrival_times #30
                 }
df1_modified2 = pd.DataFrame(data_modified2)

print(df1_modified.head())

   num_total_packets  sum_packets  num_incoming_packets  \
0               1421       727552                  1300   
1                518       265216                   438   
2               1358       695296                  1240   
3               1446       740352                  1324   
4               1406       719872                  1291   

   frac_incoming_packets  num_outgoing_packets  frac_outgoing_packets  \
0               0.914849                   121               0.085151   
1               0.845560                    80               0.154440   
2               0.913108                   118               0.086892   
3               0.915629                   122               0.084371   
4               0.918208                   115               0.081792   

   average_outgoing_ordering  std_dev_outgoing_ordering  \
0                 773.322314                 515.483953   
1                 226.162500                 139.231951   
2                 786.110169 

2. unmon_standard10.pkl > array code

In [25]:
import pickle

TOTAL_URLS = 10000  # total number in the dataset

# Load 10,000 unmon pickle file
print("Loading datafile...")
with open('/content/drive/MyDrive/unmon_standard.pkl', 'rb') as f:  # Path to unmon_standard.pkl in Colab
    x = pickle.load(f)

size = len(x)
print(f'Total samples: {size}')

X1 = [] # Array to store instances (timestamps) - 10,000 instances, e.g., [[0.0, 0.5, 3.4, ...], [0.0, 4.5, ...], [0.0, 1.5, ...], ... [... ,45.8]]
X2 = [] # Array to store instances (direction*size) - size information

for i in range(TOTAL_URLS):
    size_seq = []
    time_seq = []
    for c in x[i]:
        dr = 1 if c > 0 else -1
        time_seq.append(abs(c))
        size_seq.append(dr * 512) # In the pickle file, there is no size information, so the conversion code is set to multiply by 512 uniformly.
    X1.append(time_seq)
    X2.append(size_seq)

print(len(X1)) # Print the length of X1

Loading datafile...
Total samples: 10000
10000


In [26]:
# Features
num_total_packets = [] # feature 1
sum_packets = [] # feature 2
num_incoming_packets = [] # feature 3
frac_incoming_packets = [] # feature 4
num_outgoing_packets=[] # feature 5
frac_outgoing_packets=[] # feature 6
average_incoming_ordering=[] # feature 7
std_dev_incoming_ordering=[] # feature 8
average_outgoing_ordering=[] # feature 9
std_dev_outgoing_ordering=[] # feature 10
alternative_packets_per_second_sum = [] # feature 11
mean_of_the_sequence=[] # feature 12

In [27]:
import numpy as np

for size_seq in X2:
    # 1. Total number of pacekts
    total = len(size_seq)
    num_total_packets.append(total)

    # 2. Sum of packets (absolute value gives the size regardless of direction)
    sum_pckts = sum(abs(size) for size in size_seq)
    sum_packets.append(sum_pckts)

    # 3. Number of incoming packets
    incoming = sum(1 for size in size_seq if size < 0)
    num_incoming_packets.append(incoming)

    # 4. Number of incoming packets as a fraction of the total number of packets
    frac_incoming = incoming / total if total > 0 else 0
    frac_incoming_packets.append(frac_incoming)

    # 5. Number of outgoing packets
    outgoing = sum(1 for size in size_seq if size > 0)
    num_outgoing_packets.append(outgoing)

    # 6. Number of outgoing packets as a fraction of the total number of packets
    frac_outgoing = outgoing / total if total > 0 else 0
    frac_outgoing_packets.append(frac_outgoing)

    # 7. Average of the incoming packet ordering list
    incoming_ordering_list = []
    for idx, size in enumerate(size_seq):
        if size < 0:
            incoming_ordering_list.append(idx)

    average_incoming_ordering.append(np.mean(incoming_ordering_list))

    # 8. Standard deviation of the incoming packet ordering list
    std_dev_incoming_ordering.append(np.std(incoming_ordering_list))

    # 9. Average of the outgoing packet ordering list
    outgoing_ordering_list = []
    for idx, size in enumerate(size_seq):
        if size > 0:
            outgoing_ordering_list.append(idx)
    average_outgoing_ordering.append(np.mean(outgoing_ordering_list))

    # 10. Standard deviation of the outgoing packet ordering list
    std_dev_outgoing_ordering.append(np.std(outgoing_ordering_list))

    # 11. mean of the sequence
    chunks = [size_seq[i:i + 20] for i in range(0, len(size_seq), 20)]
    outgoing_counts = [sum(1 for elem in chunk if elem > 0) for chunk in chunks]
    filtered_array = [element for element in outgoing_counts if element != 0]
    average_outgoing_counts = sum(filtered_array) / len(filtered_array)
    mean_of_the_sequence.append(average_outgoing_counts)

In [28]:
# 12. Sum of alternative number packets per second
alternative_packets_per_second_sum = []

for time_seq, size_seq in zip(X1, X2):
    packets_per_subset = len(size_seq) // 20 # 20 even-sized subsets
    subset_sums = []

    for i in range(0, len(size_seq), packets_per_subset):
        start_index = i
        end_index = min(i + packets_per_subset, len(size_seq))

        time_interval = time_seq[end_index - 1] - time_seq[start_index]
        time_interval = max(time_interval, 0.001) # Ensure that the time interval does not become zero.

        packets_per_second = (end_index - start_index) / time_interval
        subset_sums.append(packets_per_second)

        if (end_index == packets_per_subset*20):
            break

    alternative_packets_per_second_sum.append(sum(subset_sums))

In [29]:
data = {'feature 1': num_total_packets,
        'feature 2': sum_packets,
        'feature 3': num_incoming_packets,
        'feature 4': frac_incoming_packets,
        'feature 5': num_outgoing_packets,
        'feature 6': frac_outgoing_packets}
df1 = pd.DataFrame(data)

print(df1.head())

   feature 1  feature 2  feature 3  feature 4  feature 5  feature 6
0        130      66560        101   0.776923         29   0.223077
1       9927    5082624       9189   0.925657        738   0.074343
2        359     183808        285   0.793872         74   0.206128
3       9948    5093376       9407   0.945617        541   0.054383
4       2508    1284096       2215   0.883174        293   0.116826


In [30]:
data = {'feature 7': average_outgoing_ordering,
        'feature 8': std_dev_outgoing_ordering,
        'feature 9': average_incoming_ordering,
        'feature 10': std_dev_incoming_ordering,
        'feature 11': mean_of_the_sequence,
        'feature 12': alternative_packets_per_second_sum}
df2 = pd.DataFrame(data)

print(df2.head())

     feature 7    feature 8    feature 9   feature 10  feature 11  \
0    36.758621    27.053784    72.465347    36.304696    4.833333   
1  3845.246612  2885.461055  5052.770595  2845.096286    2.435644   
2   155.905405   111.145044   184.996491   100.730751    4.352941   
3  4667.994455  3120.454082  4991.069735  2855.784563    1.918440   
4  1227.269625   778.715376  1256.969752   716.374241    3.329545   

     feature 12  
0  25309.451476  
1  68180.017464  
2   1518.989991  
3  62061.265205  
4  36068.101946  


In [31]:
incoming_packets_first30 =[] # feature 13
outgoing_packets_first30 = [] # feature 14
transmission_time_Q1 = [] # feature 15
transmission_time_Q2 = [] # feature 16
transmission_time_Q3 = [] # feature 17
transmission_time_Q4 = [] # feature 18

In [32]:
num_packets_per_second = [] # feature 19
mean_packets_per_second = [] # feature 20
std_packets_per_second = [] # feature 21
min_packets_per_second = [] # feature 22
max_packets_per_second = [] # feature 23
med_packets_per_second = [] # feature 24

In [33]:
for size_seq in X2:
    incoming_packets = sum(1 for size in size_seq[:30] if size < 0)
    incoming_packets_first30.append(incoming_packets)

    outgoing_packets = sum(1 for size in size_seq[:30] if size > 0)
    outgoing_packets_first30.append(outgoing_packets)


data = {'incoming_packets_first30': incoming_packets_first30,
        'incoming_packets_first30': outgoing_packets_first30}
df1 = pd.DataFrame(data)

print(df1.head())

   incoming_packets_first30
0                        13
1                        12
2                        13
3                         9
4                         3


In [34]:
for i in range(len(X1)):
    Q1 = np.percentile(X1[i], 25)
    Q2 = np.percentile(X1[i], 50)
    Q3 = np.percentile(X1[i], 75)
    Q4 = np.percentile(X1[i], 100)

    transmission_time_Q1.append(Q1)
    transmission_time_Q2.append(Q2)
    transmission_time_Q3.append(Q3)
    transmission_time_Q4.append(Q4)

data = {'transmission_time_Q1': transmission_time_Q1,
        'transmission_time_Q2': transmission_time_Q2,
        'transmission_time_Q3': transmission_time_Q3,
        'transmission_time_Q4': transmission_time_Q4,}
df1 = pd.DataFrame(data)

print(df1.head())

   transmission_time_Q1  transmission_time_Q2  transmission_time_Q3  \
0                 2.920                  4.18                  4.84   
1                 9.590                 10.23                 11.02   
2                 7.525                  8.68                 12.30   
3                 6.840                  7.64                  8.52   
4                 7.400                  8.08                 13.09   

   transmission_time_Q4  
0                  5.56  
1                 11.88  
2                 17.81  
3                  9.17  
4                 53.59  


In [35]:
import statistics

for i in range(len(X1)):
    num_packets = len(X1[i]) / X1[i][-1]
    mean_packets = statistics.mean(X1[i])
    std_packets = statistics.stdev(X1[i])
    med_packets = statistics.median(X1[i])

    num_packets_per_second.append(num_packets)
    mean_packets_per_second.append(mean_packets)
    std_packets_per_second.append(std_packets)
    med_packets_per_second.append(med_packets)

data = {'Num Packets Per Second': num_packets_per_second,
        'Mean Packets Per Second': mean_packets_per_second,
        'Std Packets Per Second': std_packets_per_second,
        'Med Packets Per Second': med_packets_per_second,}
df1 = pd.DataFrame(data)

print(df1.head())

   Num Packets Per Second  Mean Packets Per Second  Std Packets Per Second  \
0               23.381295                 3.799462                1.309629   
1              835.606061                 9.884457                1.613041   
2               20.157215                 9.328440                3.082876   
3             1084.841876                 7.584566                1.036050   
4               46.799776                10.352085                5.565770   

   Med Packets Per Second  
0                    4.18  
1                   10.23  
2                    8.68  
3                    7.64  
4                    8.08  


In [36]:
import pandas as pd
data = {'num_total_packets': num_total_packets,
        'sum_packets': sum_packets,
        'num_incoming_packets': num_incoming_packets,
        'frac_incoming_packets': frac_incoming_packets,
        'num_outgoing_packets': num_outgoing_packets,
        'frac_outgoing_packets': frac_outgoing_packets,
        'average_outgoing_ordering': average_outgoing_ordering,
        'std_dev_outgoing_ordering': std_dev_outgoing_ordering,
        'average_incoming_ordering': average_incoming_ordering,
        'std_dev_incoming_ordering': std_dev_incoming_ordering,
        'mean_of_the_sequence': mean_of_the_sequence,
        'alternative_packets_per_second_sum': alternative_packets_per_second_sum}
df1 = pd.DataFrame(data)

print(df1.head())

   num_total_packets  sum_packets  num_incoming_packets  \
0                130        66560                   101   
1               9927      5082624                  9189   
2                359       183808                   285   
3               9948      5093376                  9407   
4               2508      1284096                  2215   

   frac_incoming_packets  num_outgoing_packets  frac_outgoing_packets  \
0               0.776923                    29               0.223077   
1               0.925657                   738               0.074343   
2               0.793872                    74               0.206128   
3               0.945617                   541               0.054383   
4               0.883174                   293               0.116826   

   average_outgoing_ordering  std_dev_outgoing_ordering  \
0                  36.758621                  27.053784   
1                3845.246612                2885.461055   
2                 155.905405 

In [37]:
import pandas as pd

y = [-1] * 10000
data = {'label': y}
df2 = pd.DataFrame(data)

print(df2.head())

   label
0     -1
1     -1
2     -1
3     -1
4     -1


In [38]:
df1.to_csv('unmon_features.csv', index=False)

In [39]:
df2.to_csv('unmon_labels.csv', index=False)

In [40]:
import pandas as pd
data_modified = {'num_total_packets': num_total_packets, #1
                 'sum_packets': sum_packets, #2
                 'num_incoming_packets': num_incoming_packets, #3
                 'frac_incoming_packets': frac_incoming_packets, #4
                 'num_outgoing_packets': num_outgoing_packets, #5
                 'frac_outgoing_packets': frac_outgoing_packets, #6
                 'average_outgoing_ordering': average_outgoing_ordering, #7
                 'std_dev_outgoing_ordering': std_dev_outgoing_ordering, #8
                 'average_incoming_ordering': average_incoming_ordering, #9
                 'std_dev_incoming_ordering': std_dev_incoming_ordering, #10
                 'mean_of_the_sequence': mean_of_the_sequence, #11
                 'alternative_packets_per_second_sum': alternative_packets_per_second_sum, #12
                 'incoming_packets_first30': incoming_packets_first30, #13
                 'outgoing_packets_first30': outgoing_packets_first30, #14
                 'transmission_time_Q1': transmission_time_Q1, #15
                 'transmission_time_Q2': transmission_time_Q2, #16
                 'transmission_time_Q3': transmission_time_Q3, #17
                 'transmission_time_Q4': transmission_time_Q4, #18
                 'num_packets_per_second': num_packets_per_second, #19
                 'mean_packets_per_second': mean_packets_per_second, #20
                 'std_packets_per_second': std_packets_per_second, #21
                 'med_packets_per_second': med_packets_per_second #22
                 }
df1_modified = pd.DataFrame(data_modified)

print(df1_modified.head())

   num_total_packets  sum_packets  num_incoming_packets  \
0                130        66560                   101   
1               9927      5082624                  9189   
2                359       183808                   285   
3               9948      5093376                  9407   
4               2508      1284096                  2215   

   frac_incoming_packets  num_outgoing_packets  frac_outgoing_packets  \
0               0.776923                    29               0.223077   
1               0.925657                   738               0.074343   
2               0.793872                    74               0.206128   
3               0.945617                   541               0.054383   
4               0.883174                   293               0.116826   

   average_outgoing_ordering  std_dev_outgoing_ordering  \
0                  36.758621                  27.053784   
1                3845.246612                2885.461055   
2                 155.905405 

In [None]:
df1_modified.to_csv('unmon_features_modified.csv', index=False)