# Testing logic to clean statistics data

In [5]:
import numpy as np
import pandas as pd
import re
import pcap_processor

In [6]:
data = []
with open("system_input/tcp_stats.txt") as file:
    data = list(file)

In [7]:
print(data[:5])



In [8]:
data = data[5:]

In [9]:
print(data[0])

192.168.1.146:59787        <-> 192.30.252.154:80              615    910519     395     27638    1010    938157   748.442405000        57.9930



In [10]:
data = [e.split(" ") for e in data]

In [11]:
print(data)



In [12]:
test_element = data[0]

In [13]:
test_element = [e for e in test_element if e != '']

In [12]:
test_element = [e for e in test_element if e != '<->'] 

In [13]:
'' in test_element

False

In [14]:
print(test_element)

['192.168.1.146:59787', '192.30.252.154:80', '615', '910519', '395', '27638', '1010', '938157', '748.442405000', '57.9930\n']


In [15]:
test_element = [e.split(":") for e in test_element]

In [16]:
print(test_element)

[['192.168.1.146', '59787'], ['192.30.252.154', '80'], ['615'], ['910519'], ['395'], ['27638'], ['1010'], ['938157'], ['748.442405000'], ['57.9930\n']]


In [17]:
_ = []
for e in test_element:
    _ += e
test_element = _

In [18]:
print(test_element)

['192.168.1.146', '59787', '192.30.252.154', '80', '615', '910519', '395', '27638', '1010', '938157', '748.442405000', '57.9930\n']


In [19]:
for i,e in enumerate(test_element):
    if re.search("\\n", e) is not None:
        test_element[i] = e.split("\n")[0]

In [26]:
print(len(test_element))

12


In [25]:
labels = ["src_ip", "src_port", "dst_ip, dst_port", "src_frames", "src_bytes", "dst_frames", "dst_bytes", "total_frames", "total_bytes", "relative_start", "duration"]
print(len(test_element))

12


In [65]:
omit = ['', '|', '->', '<-', '|\n']
labels = [i for i in labels if i not in omit]

# Working with the new pcap_processor class

In [1]:
import numpy as np
import pandas as pd
import re
import pcap_processor

In [2]:
stats = pcap_processor.processorV2("system_input/tcp_stats.txt")

In [3]:
clean_stats = stats.clean()

In [4]:
print(clean_stats)

            src_ip src_port           dst_ip dst_port  src_frames  src_bytes  \
0    192.168.1.146    59787   192.30.252.154       80         615     910519   
1    192.168.1.146    59635   23.222.154.141      443         133     143779   
2    192.168.1.146    59944     52.109.76.32      443         148     217138   
3    192.168.1.146    59664    18.233.104.54      443          88       8924   
4    192.168.1.146    59692   35.155.143.197      443          91      14952   
5    192.168.1.146    59771   192.30.252.154       80         100     141938   
6    192.168.1.146    59715    172.217.4.110      443          76      26011   
7    192.168.1.146    59648   35.155.143.197      443          83       8595   
8    192.168.1.146    59651    52.27.211.200      443          58      11883   
9    192.168.1.146    59666   35.155.143.197      443          80       8404   
10   192.168.1.146    59677     52.32.16.231      443          82       8728   
11   192.168.1.146    59650     52.32.16

In [5]:
clean_stats.describe()

Unnamed: 0,src_frames,src_bytes,dst_frames,dst_bytes,total_frames,total_bytes,relative_start,duration
count,293.0,293.0,293.0,293.0,293.0,293.0,293.0,293.0
mean,19.095563,10179.34471,21.208191,4006.750853,40.303754,14186.095563,1207.415411,230.837514
std,40.173226,55937.597166,30.748555,7396.454078,70.066797,58192.854937,1042.354789,474.123838
min,0.0,0.0,1.0,54.0,1.0,54.0,0.0,0.0
25%,4.0,271.0,8.0,660.0,10.0,780.0,214.724186,0.4882
50%,12.0,4505.0,15.0,2007.0,26.0,7513.0,1143.740392,44.8184
75%,22.0,6923.0,25.0,5046.0,47.0,11966.0,1705.959666,220.8553
max,615.0,910519.0,395.0,92034.0,1010.0,938157.0,3032.452213,2984.2177


# Visualize the data

In [8]:
stats.tcp_labels

['src_ip',
 'src_port',
 'dst_ip',
 'dst_port',
 'src_frames',
 'src_bytes',
 'dst_frames',
 'dst_bytes',
 'total_frames',
 'total_bytes',
 'relative_start',
 'duration']

In [16]:
from prettytable import PrettyTable
from plotly import __version__
import plotly.offline

In [38]:
def plot_bar(x_data, y_data, x_label, y_label, title=None):
    plotly.offline.iplot({"data":[plotly.graph_objs.Bar(x=x_data,
                                                            y=y_data)], 
                          "layout":plotly.graph_objs.Layout(title=title, 
                                                            xaxis=dict(title=x_label), 
                                                            yaxis=dict(title=y_label))})

In [35]:
plot_bar(clean_stats["dst_port"], clean_stats["src_bytes"], "dst port", "source bytes", "DestinationPort/SourceBytes")

In [36]:
plot_bar(clean_stats["src_port"], clean_stats["src_bytes"], "src port", "source bytes", "SourcePort/SourceBytes")

In [40]:
plot_bar(clean_stats["relative_start"], clean_stats["total_bytes"], "Time", "Total Bytes", "Bytes by Time")

In [25]:
#Create the graph
plotly.offline.iplot({"data":[plotly.graph_objs.Scatter(x=clean_stats["src_ip"], y=clean_stats["total_frames"])], "layout":plotly.graph_objs.Layout(title="Frames per IP ", xaxis=dict(title="IP"), yaxis=dict(title="Frames"))})