In [1]:
import os
import multiprocessing
from multiprocessing import Pool
from pathlib import Path
import uuid

import pandas as pd
from itertools import repeat
import pcap_generic_parser as parser

In [2]:
input_folder = '../data'
call_type_file = os.path.join(input_folder, "type.txt")
output_folder = f'../data/output/{uuid.uuid4()}'
output_folder_path = Path(output_folder)
pcap_folder_path = Path(input_folder)
os.makedirs(output_folder, exist_ok=True)

sip_output_filename = 'sip_parser_output.csv'
gtp_output_filename = 'gtp_parser_output.csv'
diameter_output_filename = 'diameter_parser_output.csv'
all_output_csv_filename = 'all_parser_output.csv'
gtp_sip_output_csv_filename = 'gtp_sip_parser_output.csv'

num_processors = multiprocessing.cpu_count()
p=Pool(processes = num_processors)

In [3]:
# Fetch ALL files that end in PCAP
pcap_file_names = [ x for x in os.listdir(input_folder) if x.endswith("pcap") ]
len(pcap_file_names)

4

In [4]:
# Create raw file names of pcap files without extension ".pcap"
files=[file_name.split('.pcap')[0] for file_name in pcap_file_names]

In [5]:
# Convert PCAP to JSON
pcaps_list = p.starmap(parser.convert_pcap, zip(repeat(pcap_folder_path), repeat(output_folder_path), files))

In [6]:
# Fetch ALL files that end in json
json_files = [ x for x in os.listdir(output_folder_path) if x.endswith("json") ]
len(json_files)

4

# Sip Parser

In [7]:
# Read and parse json packets, generates a nest List
sip_parsed = p.starmap(parser.read_parse_sip, zip(repeat(output_folder_path), json_files))

In [8]:
# Get output from original parser and concatenate both
sip_df_out_final=pd.DataFrame(data=sip_parsed)
sip_df_out_final.fillna("*", inplace = True) 
sip_df_out_final=sip_df_out_final.set_index(sip_df_out_final.columns[0])
sip_df_out_final.index.names = ['pcap']
sip_df_out_final=sip_df_out_final.add_prefix('sip ')

In [9]:
sip_df_out_final.to_csv(output_folder_path / sip_output_filename)
sip_df_out_final

Unnamed: 0_level_0,sip 1,sip 2,sip 3,sip 4,sip 5,sip 6,sip 7,sip 8,sip 9,sip 10,...,sip 63,sip 64,sip 65,sip 66,sip 67,sip 68,sip 69,sip 70,sip 71,sip 72
pcap,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_48_00_647_+16692481447_357330090901661_iPhone-XR_12.3_054741,UE-REGISTER-P-CSCF,P-CSCF-401-UE,P-CSCF-200-UE,UE-INVITE-P-CSCF,P-CSCF-100-UE,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-UE,...,*,*,*,*,*,*,*,*,*,*
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_06_00_612_+16692481447_357330090901661_iPhone-XR_12.3_050557,UE-INVITE-P-CSCF,P-CSCF-100-UE,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,Unmapped IP-INVITE-P-CSCF,S-CSCF-INVITE-SR-Signaling,P-CSCF-100-Unmapped IP,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,...,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,*,*,*,*
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_36_00_637_+17014801708_354386060552127_iPhone-6_Plus_11.4.1_053554,Unmapped IP-200-Unmapped IP,UE-INVITE-P-CSCF,P-CSCF-100-UE,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,Unmapped IP-INVITE-P-CSCF,SR-Signaling-100-S-CSCF,P-CSCF-100-Unmapped IP,...,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,*,*,*,*
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_24_00_627_+17014801700_356165090145724_iPhone-XS_12.4_052415,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP,UE-INVITE-P-CSCF,P-CSCF-100-UE,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,Unmapped IP-200-Unmapped IP,S-CSCF-INVITE-SR-Signaling,SR-Signaling-100-S-CSCF,...,Unmapped IP-INVITE-P-CSCF,P-CSCF-100-Unmapped IP,SR-Signaling-100-S-CSCF,SR-Signaling-404-S-CSCF,S-CSCF-ACK-SR-Signaling,S-CSCF-INVITE-SR-Signaling,P-CSCF-404-Unmapped IP,Unmapped IP-ACK-P-CSCF,Unmapped IP-200-Unmapped IP,SR-Signaling-404-Unmapped IP


# GTPv2 Parser

In [10]:
# Get all possible gtpv2 causes
protocols_list_from_tshark = p.starmap(parser.tshark_aggregate_gtp_cause, zip(repeat(output_folder_path), pcap_file_names))
# Split each protocol to a new row
values = set()
for x in protocols_list_from_tshark:
    values.update(x)
try:
    values.remove('')
except KeyError as e:
    pass
gtp_causes=list(values)

In [11]:
# Read and parse json packets, generates a nest List
gtp_parse_output = p.starmap(parser.read_parse_gtp, zip(repeat(output_folder_path), json_files, repeat(gtp_causes)))

In [12]:
# Get output from original parser and concatenate both
gtp_df_out_final=pd.DataFrame(data=gtp_parse_output)
gtp_df_out_final.fillna("*", inplace = True) 
gtp_df_out_final=gtp_df_out_final.set_index(gtp_df_out_final.columns[0])
gtp_df_out_final.index.names = ['pcap']
gtp_df_out_final = gtp_df_out_final.rename(columns={1: 'n Requests', 2: 'n Responses', 3: 'n unanswered requests'})
gtp_df_out_final=gtp_df_out_final.add_prefix('gtpv2 ')

for i in range(0,len(gtp_causes)):
    gtp_df_out_final = gtp_df_out_final.rename(columns={'gtpv2 '+str(i+4): 'gtpv2 cause = '+str(gtp_causes[i])})

In [13]:
gtp_df_out_final.to_csv(output_folder_path / gtp_output_filename)
gtp_df_out_final

Unnamed: 0_level_0,gtpv2 n Requests,gtpv2 n Responses,gtpv2 n unanswered requests
pcap,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_48_00_647_+16692481447_357330090901661_iPhone-XR_12.3_054741,0,0,0
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_06_00_612_+16692481447_357330090901661_iPhone-XR_12.3_050557,0,0,0
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_36_00_637_+17014801708_354386060552127_iPhone-6_Plus_11.4.1_053554,0,0,0
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_24_00_627_+17014801700_356165090145724_iPhone-XS_12.4_052415,0,0,0


# Diameter Parser

In [14]:
# Get all possible diameter result codes
diameter_list_from_tshark = p.starmap(parser.tshark_aggregate_diameter_result_code, zip(repeat(output_folder_path), pcap_file_names))
# Split each protocol to a new row
values = set()
for x in diameter_list_from_tshark:
    values.update(x)
try:
    values.remove('')
except KeyError as e:
    pass
diameter_result_codes=list(values)

In [15]:
# Read and parse json packets, generates a nest List
diameter_parse_output = p.starmap(parser.read_parse_diameter, zip(repeat(output_folder_path), json_files, repeat(diameter_result_codes)))

In [16]:
# Get output from original parser and concatenate both
diameter_df_out_final=pd.DataFrame(data=diameter_parse_output)
diameter_df_out_final.fillna("*", inplace = True) 
diameter_df_out_final=diameter_df_out_final.set_index(diameter_df_out_final.columns[0])
diameter_df_out_final.index.names = ['pcap']
diameter_df_out_final = diameter_df_out_final.rename(columns={1: 'n Requests', 2: 'n Responses', 3: 'n unanswered requests'})
diameter_df_out_final=diameter_df_out_final.add_prefix('diameter ')

for i in range(0,len(diameter_result_codes)):
    diameter_df_out_final = diameter_df_out_final.rename(columns={'diameter '+str(i+4): 'diameter result code = '+str(diameter_result_codes[i])})

In [17]:
diameter_df_out_final.to_csv(output_folder_path / diameter_output_filename)
diameter_df_out_final

Unnamed: 0_level_0,diameter n Requests,diameter n Responses,diameter n unanswered requests,diameter 4
pcap,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_48_00_647_+16692481447_357330090901661_iPhone-XR_12.3_054741,0,0,0,3
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_06_00_612_+16692481447_357330090901661_iPhone-XR_12.3_050557,0,0,0,3
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_36_00_637_+17014801708_354386060552127_iPhone-6_Plus_11.4.1_053554,0,0,0,3
timothy.vogel@verizonwireless.com_Scheduled_PS911_v4_Eric_4TX_19-09-12_10_24_00_627_+17014801700_356165090145724_iPhone-XS_12.4_052415,0,0,0,3


# Merge sip, gtpv2, and diameter Parser Outputs

In [18]:
pd.concat([diameter_df_out_final, gtp_df_out_final, sip_df_out_final], keys=['pcap', 'pcap', 'pcap'], axis=1).to_csv(output_folder_path / all_output_csv_filename)

In [19]:
pd.merge(gtp_df_out_final, sip_df_out_final, on='pcap').to_csv(output_folder_path / gtp_sip_output_csv_filename)

In [None]:
#pd.concat([gtp_df_out_final, sip_df_out_final], keys=['pcap', 'pcap'], axis=1)