In [1]:
import multiprocessing
import os
import uuid
from itertools import repeat
from multiprocessing import Pool
from pathlib import Path
import shutil

import pandas as pd
from sklearn.cluster import KMeans

import clustering_utils as utils
import pcap_generic_parser as parser

# How to Run
1- Create your project folder in /data/input

2- Place the the pcap files within your project folder

3- Set the project_name below

4- Run the Notebook from the start

5- Check the parser output and clustering output in /data/output/[project_name]

6- Check for extra visualiztion tools

In [2]:
project_name = 'example_project'

In [3]:
input_folder_path = Path(f'../data/input/{project_name}')
output_folder_path = Path(f'../data/output/{project_name}')
json_folder_path = output_folder_path / f'jsonfiles_{uuid.uuid4()}'

output_folder_path.mkdir(parents=True, exist_ok=True)
json_folder_path.mkdir(parents=True, exist_ok=True)

sip_output_filename = 'sip_parser_output.csv'
gtp_output_filename = 'gtp_parser_output.csv'
diameter_output_filename = 'diameter_parser_output.csv'
all_output_csv_filename = 'all_parser_output.csv'
gtp_sip_output_csv_filename = 'gtp_sip_parser_output.csv'

num_processors = multiprocessing.cpu_count()
p = Pool(processes=num_processors)

In [4]:
# Fetch ALL files that end in PCAP
pcap_file_names = [x for x in os.listdir(input_folder_path) if x.endswith("pcap")]
len(pcap_file_names)

4

In [5]:
# Copy ALL files that end in json to the output directory
json_files = [x for x in os.listdir(input_folder_path) if x.endswith("json")]
for file in json_files:
    shutil.copy(input_folder_path / file, json_folder_path)

In [6]:
# Create raw file names of pcap files without extension ".pcap"
files = [file_name.split('.pcap')[0] for file_name in pcap_file_names]

In [7]:
# Convert PCAP to JSON
pcaps_list = p.starmap(parser.convert_pcap, zip(repeat(input_folder_path), repeat(json_folder_path), files))

In [8]:
# Fetch ALL files that end in json
json_files = [x for x in os.listdir(json_folder_path) if x.endswith("json")]
len(json_files)

6

# Sip Parser

In [9]:
# Read and parse json packets, generates a nest List
sip_parsed = p.starmap(parser.read_parse_sip, zip(repeat(json_folder_path), json_files))

In [10]:
# Get output from original parser and concatenate both
sip_df_out_final = pd.DataFrame(data=sip_parsed)
sip_df_out_final.fillna("*", inplace=True)
sip_df_out_final = sip_df_out_final.set_index(sip_df_out_final.columns[0])
sip_df_out_final.index.names = ['pcap']
sip_df_out_final = sip_df_out_final.add_prefix('sip ')

In [11]:
sip_df_out_final.to_csv(output_folder_path / sip_output_filename)
sip_df_out_final

Unnamed: 0_level_0,sip 1,sip 2,sip 3,sip 4,sip 5,sip 6,sip 7,sip 8,sip 9,sip 10,...,sip 63,sip 64,sip 65,sip 66,sip 67,sip 68,sip 69,sip 70,sip 71,sip 72
pcap,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
FAILED-FUNC_EFHT_ICBBL_005-20200713_135220_20200713_1355_RCA_PoC_17_tcID_1174-16_2,UE-INVITE-P-CSCF02A,P-CSCF02A-100-UE,P-CSCF-INVITE-S-CSCF,S-CSCF-100-P-CSCF,"S-CSCF-407&&SIP;cause=407;text=""CC_IMS_SESS_NE...","P-CSCF02A-407&&SIP;cause=407;text=""CC_IMS_SESS...",P-CSCF-ACK-S-CSCF,UE-ACK-P-CSCF02A,UE-INVITE-P-CSCF02A,P-CSCF02A-100-UE,...,*,*,*,*,*,*,*,*,*,*
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_48_00_647_+16692481447_357330090901661_iPhone-XR_12.3_054741,UE-REGISTER-P-CSCF,P-CSCF-401-UE,P-CSCF-200-UE,UE-INVITE-P-CSCF,P-CSCF-100-UE,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-UE,...,*,*,*,*,*,*,*,*,*,*
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_06_00_612_+16692481447_357330090901661_iPhone-XR_12.3_050557,UE-INVITE-P-CSCF,P-CSCF-100-UE,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,Unmapped IP-INVITE-P-CSCF,S-CSCF-INVITE-SR-Signaling,P-CSCF-100-Unmapped IP,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,...,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,*,*,*,*
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_36_00_637_+17014801708_354386060552127_iPhone-6_Plus_11.4.1_053554,Unmapped IP-200-Unmapped IP,UE-INVITE-P-CSCF,P-CSCF-100-UE,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,Unmapped IP-INVITE-P-CSCF,SR-Signaling-100-S-CSCF,P-CSCF-100-Unmapped IP,...,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,*,*,*,*
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_24_00_627_+17014801700_356165090145724_iPhone-XS_12.4_052415,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,UE-INVITE-P-CSCF,P-CSCF-100-UE,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,Unmapped IP-200-Unmapped IP,S-CSCF-INVITE-SR-Signaling,SR-Signaling-100-S-CSCF,...,Unmapped IP-INVITE-P-CSCF,P-CSCF-100-Unmapped IP,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP
FAILED-FUNC_EFHT_ICBBL_005-20200713_142032_20200713_1420_RCA_PoC_17_tcID_1174-16_2,UE-INVITE-P-CSCF02A,P-CSCF02A-100-UE,P-CSCF-INVITE-S-CSCF,S-CSCF-100-P-CSCF,"S-CSCF-407&&SIP;cause=407;text=""CC_IMS_SESS_NE...",P-CSCF-ACK-S-CSCF,"P-CSCF02A-407&&SIP;cause=407;text=""CC_IMS_SESS...",UE-ACK-P-CSCF02A,UE-INVITE-P-CSCF02A,P-CSCF02A-100-UE,...,*,*,*,*,*,*,*,*,*,*


# GTPv2 Parser

In [12]:
# Get all possible gtpv2 causes
protocols_list_from_tshark = p.starmap(parser.tshark_aggregate_gtp_cause, zip(repeat(json_folder_path), pcap_file_names))
# Split each protocol to a new row
values = set()
for x in protocols_list_from_tshark:
    values.update(x)
try:
    values.remove('')
except KeyError as e:
    pass
gtp_causes = list(values)

In [13]:
# Read and parse json packets, generates a nest List
gtp_parse_output = p.starmap(parser.read_parse_gtp, zip(repeat(json_folder_path), json_files, repeat(gtp_causes)))

In [14]:
# Get output from original parser and concatenate both
gtp_df_out_final = pd.DataFrame(data=gtp_parse_output)
gtp_df_out_final.fillna("*", inplace=True)
gtp_df_out_final = gtp_df_out_final.set_index(gtp_df_out_final.columns[0])
gtp_df_out_final.index.names = ['pcap']
gtp_df_out_final = gtp_df_out_final.rename(columns={1: 'n Requests', 2: 'n Responses', 3: 'n unanswered requests'})
gtp_df_out_final = gtp_df_out_final.add_prefix('gtpv2 ')

for i in range(0, len(gtp_causes)):
    gtp_df_out_final = gtp_df_out_final.rename(columns={'gtpv2 ' + str(i + 4): 'gtpv2 cause = ' + str(gtp_causes[i])})

In [15]:
gtp_df_out_final.to_csv(output_folder_path / gtp_output_filename)
gtp_df_out_final

Unnamed: 0_level_0,gtpv2 n Requests,gtpv2 n Responses,gtpv2 n unanswered requests
pcap,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
FAILED-FUNC_EFHT_ICBBL_005-20200713_135220_20200713_1355_RCA_PoC_17_tcID_1174-16_2,0,0,0
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_48_00_647_+16692481447_357330090901661_iPhone-XR_12.3_054741,0,0,0
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_06_00_612_+16692481447_357330090901661_iPhone-XR_12.3_050557,0,0,0
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_36_00_637_+17014801708_354386060552127_iPhone-6_Plus_11.4.1_053554,0,0,0
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_24_00_627_+17014801700_356165090145724_iPhone-XS_12.4_052415,0,0,0
FAILED-FUNC_EFHT_ICBBL_005-20200713_142032_20200713_1420_RCA_PoC_17_tcID_1174-16_2,0,0,0


# Diameter Parser

In [16]:
# Get all possible diameter result codes
diameter_list_from_tshark = p.starmap(parser.tshark_aggregate_diameter_result_code, zip(repeat(json_folder_path), pcap_file_names))
# Split each protocol to a new row
values = set()
for x in diameter_list_from_tshark:
    values.update(x)
try:
    values.remove('')
except KeyError as e:
    pass
diameter_result_codes = list(values)

In [17]:
# Read and parse json packets, generates a nest List
diameter_parse_output = p.starmap(parser.read_parse_diameter, zip(repeat(json_folder_path), json_files, repeat(diameter_result_codes)))

In [18]:
# Get output from original parser and concatenate both
diameter_df_out_final = pd.DataFrame(data=diameter_parse_output)
diameter_df_out_final.fillna("*", inplace=True)
diameter_df_out_final = diameter_df_out_final.set_index(diameter_df_out_final.columns[0])
diameter_df_out_final.index.names = ['pcap']
diameter_df_out_final = diameter_df_out_final.rename(columns={1: 'n Requests', 2: 'n Responses', 3: 'n unanswered requests'})
diameter_df_out_final = diameter_df_out_final.add_prefix('diameter ')

for i in range(0, len(diameter_result_codes)):
    diameter_df_out_final = diameter_df_out_final.rename(columns={'diameter ' + str(i + 4): 'diameter result code = ' + str(diameter_result_codes[i])})

In [19]:
diameter_df_out_final.to_csv(output_folder_path / diameter_output_filename)
diameter_df_out_final

Unnamed: 0_level_0,diameter n Requests,diameter n Responses,diameter n unanswered requests,diameter 4
pcap,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FAILED-FUNC_EFHT_ICBBL_005-20200713_135220_20200713_1355_RCA_PoC_17_tcID_1174-16_2,0,0,0,*
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_48_00_647_+16692481447_357330090901661_iPhone-XR_12.3_054741,0,0,0,3
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_06_00_612_+16692481447_357330090901661_iPhone-XR_12.3_050557,0,0,0,3
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_36_00_637_+17014801708_354386060552127_iPhone-6_Plus_11.4.1_053554,0,0,0,3
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_24_00_627_+17014801700_356165090145724_iPhone-XS_12.4_052415,0,0,0,3
FAILED-FUNC_EFHT_ICBBL_005-20200713_142032_20200713_1420_RCA_PoC_17_tcID_1174-16_2,0,0,0,*


# Merge sip, gtpv2, and diameter Parser Outputs

In [20]:
sip_gtp_df = pd.merge(gtp_df_out_final, sip_df_out_final, on='pcap')
sip_gtp_df.to_csv(output_folder_path / gtp_sip_output_csv_filename)

In [21]:
# all_parser_out_df = diameter_df_out_final.merge(gtp_df_out_final , on='pcap').merge(sip_df_out_final, on='pcap')
all_parser_out_df = diameter_df_out_final.join([gtp_df_out_final, sip_df_out_final])
all_parser_out_df.to_csv(output_folder_path / all_output_csv_filename)
all_parser_out_df

Unnamed: 0_level_0,diameter n Requests,diameter n Responses,diameter n unanswered requests,diameter 4,gtpv2 n Requests,gtpv2 n Responses,gtpv2 n unanswered requests,sip 1,sip 2,sip 3,...,sip 63,sip 64,sip 65,sip 66,sip 67,sip 68,sip 69,sip 70,sip 71,sip 72
pcap,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
FAILED-FUNC_EFHT_ICBBL_005-20200713_135220_20200713_1355_RCA_PoC_17_tcID_1174-16_2,0,0,0,*,0,0,0,UE-INVITE-P-CSCF02A,P-CSCF02A-100-UE,P-CSCF-INVITE-S-CSCF,...,*,*,*,*,*,*,*,*,*,*
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_48_00_647_+16692481447_357330090901661_iPhone-XR_12.3_054741,0,0,0,3,0,0,0,UE-REGISTER-P-CSCF,P-CSCF-401-UE,P-CSCF-200-UE,...,*,*,*,*,*,*,*,*,*,*
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_06_00_612_+16692481447_357330090901661_iPhone-XR_12.3_050557,0,0,0,3,0,0,0,UE-INVITE-P-CSCF,P-CSCF-100-UE,SR-Signaling-100-S-CSCF,...,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,*,*,*,*
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_36_00_637_+17014801708_354386060552127_iPhone-6_Plus_11.4.1_053554,0,0,0,3,0,0,0,Unmapped IP-200-Unmapped IP,UE-INVITE-P-CSCF,P-CSCF-100-UE,...,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,*,*,*,*
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_24_00_627_+17014801700_356165090145724_iPhone-XS_12.4_052415,0,0,0,3,0,0,0,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,UE-INVITE-P-CSCF,...,Unmapped IP-INVITE-P-CSCF,P-CSCF-100-Unmapped IP,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP
FAILED-FUNC_EFHT_ICBBL_005-20200713_142032_20200713_1420_RCA_PoC_17_tcID_1174-16_2,0,0,0,*,0,0,0,UE-INVITE-P-CSCF02A,P-CSCF02A-100-UE,P-CSCF-INVITE-S-CSCF,...,*,*,*,*,*,*,*,*,*,*


# Cluster Parser Output

In [22]:
ignore_cols = ['pcap']
label = None
static_cols = ('gtpv2 n ', 'gtpv2 cause =', 'diameter n ', 'diameter result code =')
replace_none = None
use_encoder = True  # If True calls one_hot_encoder
number_of_clusters = -1  # specify number of cluster. If -1 calculate optimal_cluster_num

In [23]:
all_parser_out_df = all_parser_out_df.reset_index().drop_duplicates()
if replace_none is not None:
    utils.transform_data(all_parser_out_df, replace_none)
all_parser_out_df

Unnamed: 0,pcap,diameter n Requests,diameter n Responses,diameter n unanswered requests,diameter 4,gtpv2 n Requests,gtpv2 n Responses,gtpv2 n unanswered requests,sip 1,sip 2,...,sip 63,sip 64,sip 65,sip 66,sip 67,sip 68,sip 69,sip 70,sip 71,sip 72
0,FAILED-FUNC_EFHT_ICBBL_005-20200713_135220_202...,0,0,0,*,0,0,0,UE-INVITE-P-CSCF02A,P-CSCF02A-100-UE,...,*,*,*,*,*,*,*,*,*,*
1,timothy.vogel@verizonwireless.com_Scheduled_PS...,0,0,0,3,0,0,0,UE-REGISTER-P-CSCF,P-CSCF-401-UE,...,*,*,*,*,*,*,*,*,*,*
2,timothy.vogel@verizonwireless.com_Scheduled_PS...,0,0,0,3,0,0,0,UE-INVITE-P-CSCF,P-CSCF-100-UE,...,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,*,*,*,*
3,timothy.vogel@verizonwireless.com_Scheduled_PS...,0,0,0,3,0,0,0,Unmapped IP-200-Unmapped IP,UE-INVITE-P-CSCF,...,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,*,*,*,*
4,timothy.vogel@verizonwireless.com_Scheduled_PS...,0,0,0,3,0,0,0,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,...,Unmapped IP-INVITE-P-CSCF,P-CSCF-100-Unmapped IP,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP
5,FAILED-FUNC_EFHT_ICBBL_005-20200713_142032_202...,0,0,0,*,0,0,0,UE-INVITE-P-CSCF02A,P-CSCF02A-100-UE,...,*,*,*,*,*,*,*,*,*,*


In [24]:
clustering_data = all_parser_out_df[[x for x in all_parser_out_df.columns if x not in ignore_cols]]
if use_encoder:
    df_static = clustering_data[[x for x in clustering_data.columns if x.startswith(static_cols)]]
    df_dynamic = clustering_data[[x for x in clustering_data.columns if not x in df_static]]

    clustering_data = utils.one_hot_encoder(df_dynamic)
    clustering_data = pd.concat([df_static, clustering_data], axis=1)
clustering_data

Unnamed: 0,diameter n Requests,diameter n Responses,diameter n unanswered requests,gtpv2 n Requests,gtpv2 n Responses,gtpv2 n unanswered requests,3.0,P-CSCF-100-UE,P-CSCF-100-Unmapped IP,P-CSCF-200-UE,...,UE-ACK-P-CSCF02A,UE-INVITE-P-CSCF,UE-INVITE-P-CSCF02A,UE-REGISTER-P-CSCF,Unmapped IP-200-S-CSCF,Unmapped IP-200-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-BYE-S-CSCF,Unmapped IP-INVITE-P-CSCF,Unmapped IP-REGISTER-P-CSCF
0,0,0,0,0,0,0,0,0,0,0,...,1,0,1,0,1,0,0,1,0,0
1,0,0,0,0,0,0,0,1,1,1,...,0,1,0,1,0,0,1,0,1,1
2,0,0,0,0,0,0,0,1,1,0,...,0,1,0,0,0,1,1,0,1,0
3,0,0,0,0,0,0,0,1,1,0,...,0,1,0,0,0,1,1,0,1,0
4,0,0,0,0,0,0,0,1,1,0,...,0,1,0,0,0,1,1,0,1,0
5,0,0,0,0,0,0,0,0,0,0,...,1,0,1,0,1,0,0,1,0,0


In [25]:
if number_of_clusters == -1:
    number_of_clusters = utils.optimal_cluster_num(clustering_data)
params = {'n_clusters': number_of_clusters, 'init': 'k-means++', 'max_iter': 120, 'n_init': 25, 'random_state': 1}
clusterer = KMeans(**params)
clusters, silhouette = utils.cluster(clusterer, clustering_data)
print('number of clusters {}'.format(number_of_clusters))

number of clusters 3


In [26]:
all_df, clusters_df, score, percent_mean, silhouette_mean = utils.score_fun(all_parser_out_df, clusters, silhouette, label)
print('Silhouette Mean {}'.format(silhouette_mean))
all_df.to_csv(output_folder_path / 'clustered_data.csv')
all_df

Silhouette Mean 0.8333333333333334


Unnamed: 0,Cluster,Silhouette,pcap,diameter n Requests,diameter n Responses,diameter n unanswered requests,diameter 4,gtpv2 n Requests,gtpv2 n Responses,gtpv2 n unanswered requests,...,sip 63,sip 64,sip 65,sip 66,sip 67,sip 68,sip 69,sip 70,sip 71,sip 72
0,1,1.0,FAILED-FUNC_EFHT_ICBBL_005-20200713_135220_202...,0,0,0,*,0,0,0,...,*,*,*,*,*,*,*,*,*,*
1,2,0.0,timothy.vogel@verizonwireless.com_Scheduled_PS...,0,0,0,3,0,0,0,...,*,*,*,*,*,*,*,*,*,*
2,0,1.0,timothy.vogel@verizonwireless.com_Scheduled_PS...,0,0,0,3,0,0,0,...,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,*,*,*,*
3,0,1.0,timothy.vogel@verizonwireless.com_Scheduled_PS...,0,0,0,3,0,0,0,...,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,*,*,*,*
4,0,1.0,timothy.vogel@verizonwireless.com_Scheduled_PS...,0,0,0,3,0,0,0,...,Unmapped IP-INVITE-P-CSCF,P-CSCF-100-Unmapped IP,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP
5,1,1.0,FAILED-FUNC_EFHT_ICBBL_005-20200713_142032_202...,0,0,0,*,0,0,0,...,*,*,*,*,*,*,*,*,*,*


In [27]:
clusters_df

Unnamed: 0,Cluster,Cluster_Size,Silhouette_Mean
0,0,3,1.0
1,1,2,1.0
2,2,1,0.0
