In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob

In [8]:
# Get all csv files in directory
csv_files = glob.glob('*.csv')

# Only use ISCXVPN files
csv_files = [f for f in csv_files if "VNAT_VPN" in f]

results = {}
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    key = "NonVPN" if "NonVPN" in csv_file else "VPN"
    
    result = df[df["file"].apply(lambda x: "nonvpn" in x)]

    print(result)
    if key not in results:
        results[key] = {
            "detected": (df.groupby(["name"]))["result"].sum(),
            "total" : df.groupby(["name"])["name"].count()
        }
    else:
        results[key]["detected"] += (df.groupby(["name"]))["result"].sum()
        results[key]["total"] += df.groupby(["name"])["name"].count()
results


                                      name  \
0                     VPN Detection Opcode   
1                     VPN Detection Opcode   
2                     VPN Detection Opcode   
3                     VPN Detection Opcode   
4                     VPN Detection Opcode   
..                                     ...   
75  VPN Detection ACK with MOD improvement   
76  VPN Detection ACK with MOD improvement   
77  VPN Detection ACK with MOD improvement   
78  VPN Detection ACK with MOD improvement   
79  VPN Detection ACK with MOD improvement   

                                                 file algorithm           ip1  \
0   ../datasets/VNAT_VPN-NonVPN_Network_Applicatio...    opcode  10.116.1.162   
1   ../datasets/VNAT_VPN-NonVPN_Network_Applicatio...    opcode  10.116.1.162   
2   ../datasets/VNAT_VPN-NonVPN_Network_Applicatio...    opcode  10.116.1.162   
3   ../datasets/VNAT_VPN-NonVPN_Network_Applicatio...    opcode  10.116.1.162   
4   ../datasets/VNAT_VPN-NonVPN_Network_Ap

{'NonVPN': {'detected': name
  VPN Detection ACK                             0
  VPN Detection ACK with MOD improvement        0
  VPN Detection Opcode                          0
  VPN Detection Opcode with XOR optimization    0
  Name: result, dtype: int64,
  'total': name
  VPN Detection ACK                             159
  VPN Detection ACK with MOD improvement        159
  VPN Detection Opcode                          159
  VPN Detection Opcode with XOR optimization    159
  Name: name, dtype: int64}}

In [4]:
for csv_file, data in results.items():
    print(f"Results for {csv_file}")
    for name, values in data["detected"].items():
        print(f"{name}: {values}/{data['total'][name]}")
    print("\n")


Results for NonVPN
VPN Detection ACK: 0/159
VPN Detection ACK with MOD improvement: 0/159
VPN Detection Opcode: 0/159
VPN Detection Opcode with XOR optimization: 0/159




In [6]:
from parse_log import parse

In [55]:
# Get all log files in directory
EXPERIMENTS_DIR = 'experiments'
logfile = open(f'{EXPERIMENTS_DIR}/experiments.log', 'r')
parsed_logs = parse(logfile)
print(len(parsed_logs))
    

1188


In [134]:
df = pd.DataFrame(parsed_logs)


def n_files_flagged_ge_1(df):
    return df[df["flagged"] >= 1].groupby(["algorithm"])["conversations"].count()

def print_results(section):

    print("Number of conversations flagged:")
    n_flagged = section.groupby(["algorithm"])["flagged"].sum()
    print(n_flagged)

    print("Number of total conversations:")
    print("_"*50)
    n_conversations = section.groupby(["algorithm"])["conversations"].sum()
    print(n_conversations)
    print("_"*50)

    print("% of conversations flagged:")
    percentage = ((n_flagged / n_conversations) * 100).round(2)
    print(percentage)
    print("_"*50)

    print("Number of files with conversations flagged >= 1:")
    n_files_ge_1 = section[section["flagged"] >= 1].groupby(["algorithm"])["conversations"].count()
    print(n_files_ge_1)
    print("_"*50)

    print("Total number of files:")
    n_files = section.groupby(["algorithm"])["conversations"].count()
    print(n_files)
    print("_"*50)

    print("% of files flagged (>= 1):")
    percentage_files = ((n_files_ge_1 / n_files) * 100).round(0)
    print(f"{percentage_files}%")
    

In [130]:
# ISCX: VPN Connections 
section = df[df["file"].apply(lambda x: "ISCX" in x and "NonVPN" not in x)]
print_results(section)

Number of conversations flagged:
algorithm
VPN Detection ACK                              33
VPN Detection ACK with MOD improvement          7
VPN Detection Opcode                          152
VPN Detection Opcode with XOR optimization     10
Name: flagged, dtype: int64
Number of total conversations:
__________________________________________________
algorithm
VPN Detection ACK                             17668
VPN Detection ACK with MOD improvement        17668
VPN Detection Opcode                          17668
VPN Detection Opcode with XOR optimization    17668
Name: conversations, dtype: int64
__________________________________________________
% of conversations flagged:
algorithm
VPN Detection ACK                             0.19
VPN Detection ACK with MOD improvement        0.04
VPN Detection Opcode                          0.86
VPN Detection Opcode with XOR optimization    0.06
dtype: float64
__________________________________________________
Number of files with conversations f

In [132]:
# ISCX: NonVPN Connections 
section = df[df["file"].apply(lambda x: "ISCX" in x and "NonVPN" in x)]
print_results(section)

Number of conversations flagged:
algorithm
VPN Detection ACK                             113
VPN Detection ACK with MOD improvement         46
VPN Detection Opcode                          679
VPN Detection Opcode with XOR optimization      2
Name: flagged, dtype: int64
Number of total conversations:
__________________________________________________
algorithm
VPN Detection ACK                             163107
VPN Detection ACK with MOD improvement        163107
VPN Detection Opcode                          163107
VPN Detection Opcode with XOR optimization    163107
Name: conversations, dtype: int64
__________________________________________________
% of conversations flagged:
algorithm
VPN Detection ACK                             0.07
VPN Detection ACK with MOD improvement        0.03
VPN Detection Opcode                          0.42
VPN Detection Opcode with XOR optimization    0.00
dtype: float64
__________________________________________________
Number of files with conversatio

In [179]:
def confusion_matrix(df, vpnfilter, nonvpnfilter):
    nonvpn = df[df["file"].apply(nonvpnfilter)]
    vpn = df[df["file"].apply(vpnfilter)]

    print(f"TOTAL conversations (vpn): {vpn['conversations'].sum()}")
    print(f"TOTAL conversations (nonvpn): {nonvpn['conversations'].sum()}")

    non_vpn_total = nonvpn.groupby(["algorithm"])["conversations"].count()["VPN Detection ACK"]
    vpn_total = vpn.groupby(["algorithm"])["conversations"].count()["VPN Detection ACK"]
    total = non_vpn_total + vpn_total

    non_vpn_flagged_ge_1 = n_files_flagged_ge_1(nonvpn)
    non_vpn_flagged_ge_1_inv = non_vpn_total - non_vpn_flagged_ge_1
    vpn_flagged_ge_1 = n_files_flagged_ge_1(vpn)
    vpn_flagged_ge_1_inv = vpn_total - vpn_flagged_ge_1

    percentage_non_vpn = ((non_vpn_flagged_ge_1 / total) * 100).round(2)
    percentage_non_vpn_inv = ((non_vpn_flagged_ge_1_inv / total) * 100).round(2)
    percentage_vpn = ((vpn_flagged_ge_1 / total) * 100).round(2)
    percentage_vpn_inv = ((vpn_flagged_ge_1_inv / total) * 100).round(2)

    print(f"vpn_flagged_ge_1:\n{vpn_flagged_ge_1}")
    print("-"*50)
    print(f"vpn_flagged_ge_1_inv:\n{vpn_flagged_ge_1_inv}")
    print("-"*50)

    print(f"percentage_vpn: \n{percentage_vpn}")
    print("-"*50)
    print(f"percentage_vpn_inv \n{percentage_vpn_inv}")
    print("-"*50)

    print(f"non_vpn_flagged_ge_1:\n{non_vpn_flagged_ge_1}")
    print("-"*50)
    print(f"non_vpn_flagged_ge_1_inv:\n{non_vpn_flagged_ge_1_inv}")
    print("-"*50)
    
    print(f"percentage_non_vpn: \n{percentage_non_vpn}")
    print("-"*50)
    print(f"percentage_non_vpn_inv \n{percentage_non_vpn_inv}")
    print("-"*50)

In [180]:
#confusion_matrix(df, vpnfilter=lambda x: "ISCX" in x and "NonVPN" not in x, nonvpnfilter=lambda x: "ISCX" in x and "NonVPN" in x)
confusion_matrix(df, vpnfilter=lambda x: "VNAT_VPN" in x and "nonvpn" not in x, nonvpnfilter=lambda x: "VNAT_VPN" in x and "nonvpn" in x)


TOTAL conversations (vpn): 1516
TOTAL conversations (nonvpn): 84676
vpn_flagged_ge_1:
Series([], Name: conversations, dtype: int64)
--------------------------------------------------
vpn_flagged_ge_1_inv:
Series([], Name: conversations, dtype: int64)
--------------------------------------------------
percentage_vpn: 
Series([], Name: conversations, dtype: float64)
--------------------------------------------------
percentage_vpn_inv 
Series([], Name: conversations, dtype: float64)
--------------------------------------------------
non_vpn_flagged_ge_1:
algorithm
VPN Detection ACK                              6
VPN Detection Opcode                          16
VPN Detection Opcode with XOR optimization     1
Name: conversations, dtype: int64
--------------------------------------------------
non_vpn_flagged_ge_1_inv:
algorithm
VPN Detection ACK                             74
VPN Detection Opcode                          64
VPN Detection Opcode with XOR optimization    79
Name: conversati