# Import modules

In [104]:
import pandas as pd
import json
import re
import warnings
warnings.filterwarnings('ignore')

pcap_data = pd.read_csv('pcap_export.csv', index_col='No.')


In [105]:
pcap_data

Unnamed: 0_level_0,Time,Source,Destination,Protocol,Length,Info
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,0.000000e+00,10.25.22.253,10.25.22.250,TCP,62,2546 > 80 [SYN] Seq=0 Win=64240 Len=0 MSS=14...
2,3.500000e-05,10.25.22.250,10.25.22.253,TCP,62,"80 > 2546 [SYN, ACK] Seq=0 Ack=1 Win=14600 L..."
3,2.250000e-04,10.25.22.253,10.25.22.250,TCP,60,2546 > 80 [ACK] Seq=1 Ack=1 Win=64240 Len=0
4,4.550000e-04,10.25.22.253,10.25.22.250,HTTP,360,GET / HTTP/1.1
5,4.820000e-04,10.25.22.250,10.25.22.253,TCP,54,80 > 2546 [ACK] Seq=1 Ack=307 Win=15544 Len=0
6,9.570000e-04,10.25.22.250,10.25.22.253,HTTP,1315,HTTP/1.1 200 OK (text/html)
7,3.018000e-03,10.25.22.253,10.25.22.250,HTTP,340,GET /default.css HTTP/1.1
8,3.181000e-03,10.25.22.250,10.25.22.253,TCP,1514,[TCP segment of a reassembled PDU]
9,3.298000e-03,10.25.22.250,10.25.22.253,HTTP,1194,HTTP/1.1 200 OK (text/css)
10,3.531000e-03,10.25.22.253,10.25.22.250,TCP,60,2546 > 80 [ACK] Seq=593 Ack=3862 Win=64240 L...


# Rename dataframe for tutorial

In [106]:
dataframe = pcap_data

In [107]:
src_dst = dataframe[["Source","Destination"]]

In [108]:
src_dst.sample(10)

Unnamed: 0_level_0,Source,Destination
No.,Unnamed: 1_level_1,Unnamed: 2_level_1
16018,10.25.22.253,192.190.173.45
37930,10.16.92.79,10.16.92.103
60240,10.16.92.103,10.16.92.79
16772,192.190.173.45,10.25.22.253
98884,10.16.92.103,10.16.92.79
18082,10.25.22.253,10.21.22.253
127799,10.16.92.103,10.16.92.79
109308,10.16.92.103,10.16.92.79
113511,10.16.92.79,10.16.92.103
68852,10.16.92.103,10.16.92.79


# IP Matcher to filter out hostnames

In [109]:
def ip_matcher(address):
    # Used to validate if string is an ipaddress
    ip = re.match(
        '^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$', address)
    if ip:
        return True
    else:
        return False

In [110]:
src_dst.rename(columns={"Source":"source","Destination":"target"}, inplace=True)
src_dst['valid_src'] = src_dst.source.apply(ip_matcher)
src_dst['valid_target'] = src_dst.target.apply(ip_matcher)


In [111]:
valid_src_dest = src_dst[(src_dst.valid_src==True) & (src_dst.valid_target==True)]

In [112]:
grouped_src_dst = valid_src_dest.groupby(["source","target"]).size().reset_index()

In [113]:
unique_ips = pd.Index(grouped_src_dst['source']
                      .append(grouped_src_dst['target'])
                      .reset_index(drop=True).unique())

In [133]:
unique_ips[:5]

Index([u'0.0.0.0', u'10.16.11.5', u'10.16.92.103', u'10.16.92.79',
       u'10.2.2.2'],
      dtype='object')

In [115]:
group_dict = {}
counter = 0
for ip in unique_ips:
    breakout_ip = re.match("^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$", ip)
    if breakout_ip:
        net_id = '.'.join(breakout_ip.group(1,2,3))
        if net_id not in group_dict:
            counter += 1
            group_dict[net_id] = counter
        else:
            pass

In [116]:
grouped_src_dst.rename(columns={0:'count'}, inplace=True)
temp_links_list = list(grouped_src_dst.apply(lambda row: {"source": row['source'], "target": row['target'], "value": row['count']}, axis=1))

In [134]:
grouped_src_dst.head()

Unnamed: 0,source,target,count
0,0.0.0.0,255.255.255.255,157
1,10.16.11.5,10.25.22.253,24
2,10.16.92.103,10.16.92.79,105742
3,10.16.92.79,10.16.92.103,36543
4,10.2.2.2,10.22.11.9,3410


In [118]:
temp_links_list

[{'source': '0.0.0.0', 'target': '255.255.255.255', 'value': 157},
 {'source': '10.16.11.5', 'target': '10.25.22.253', 'value': 24},
 {'source': '10.16.92.103', 'target': '10.16.92.79', 'value': 105742},
 {'source': '10.16.92.79', 'target': '10.16.92.103', 'value': 36543},
 {'source': '10.2.2.2', 'target': '10.22.11.9', 'value': 3410},
 {'source': '10.2.2.2', 'target': '10.25.22.253', 'value': 57},
 {'source': '10.21.22.1', 'target': '10.21.22.22', 'value': 1},
 {'source': '10.21.22.1', 'target': '10.21.22.23', 'value': 1},
 {'source': '10.21.22.1', 'target': '10.21.22.24', 'value': 1},
 {'source': '10.21.22.1', 'target': '10.21.22.253', 'value': 19},
 {'source': '10.21.22.10', 'target': '10.21.22.22', 'value': 54},
 {'source': '10.21.22.10', 'target': '10.21.22.23', 'value': 96},
 {'source': '10.21.22.10', 'target': '10.21.22.24', 'value': 156},
 {'source': '10.21.22.10', 'target': '10.21.22.253', 'value': 14},
 {'source': '10.21.22.22', 'target': '10.21.22.1', 'value': 3},
 {'source'

In [119]:
len(unique_ips)

60

In [120]:
links_list = []
for link in temp_links_list:
    record = {"value":link['value'], "source":unique_ips.get_loc(link['source']),
     "target": unique_ips.get_loc(link['target'])}
    links_list.append(record)

In [121]:
nodes_list = []

for ip in unique_ips:
    breakout_ip = re.match("^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$", ip)
    if breakout_ip:
        net_id = '.'.join(breakout_ip.group(1,2,3))
        nodes_list.append({"name":ip, "group": group_dict.get(net_id)})

In [136]:
links_list[:5]

[{'source': 0, 'target': 58, 'value': 157},
 {'source': 1, 'target': 23, 'value': 24},
 {'source': 2, 'target': 3, 'value': 105742},
 {'source': 3, 'target': 2, 'value': 36543},
 {'source': 4, 'target': 11, 'value': 3410}]

In [135]:
nodes_list[:5]

[{'group': 1, 'name': '0.0.0.0'},
 {'group': 2, 'name': '10.16.11.5'},
 {'group': 3, 'name': '10.16.92.103'},
 {'group': 3, 'name': '10.16.92.79'},
 {'group': 4, 'name': '10.2.2.2'}]

In [124]:
json_prep = {"links":links_list, "nodes":nodes_list}

json_prep.keys()

['nodes', 'links']

In [125]:
json_dump = json.dumps(json_prep, indent=1, sort_keys=True)

In [126]:
print json_dump

{
 "links": [
  {
   "source": 0, 
   "target": 58, 
   "value": 157
  }, 
  {
   "source": 1, 
   "target": 23, 
   "value": 24
  }, 
  {
   "source": 2, 
   "target": 3, 
   "value": 105742
  }, 
  {
   "source": 3, 
   "target": 2, 
   "value": 36543
  }, 
  {
   "source": 4, 
   "target": 11, 
   "value": 3410
  }, 
  {
   "source": 4, 
   "target": 23, 
   "value": 57
  }, 
  {
   "source": 5, 
   "target": 7, 
   "value": 1
  }, 
  {
   "source": 5, 
   "target": 8, 
   "value": 1
  }, 
  {
   "source": 5, 
   "target": 9, 
   "value": 1
  }, 
  {
   "source": 5, 
   "target": 10, 
   "value": 19
  }, 
  {
   "source": 6, 
   "target": 7, 
   "value": 54
  }, 
  {
   "source": 6, 
   "target": 8, 
   "value": 96
  }, 
  {
   "source": 6, 
   "target": 9, 
   "value": 156
  }, 
  {
   "source": 6, 
   "target": 10, 
   "value": 14
  }, 
  {
   "source": 7, 
   "target": 5, 
   "value": 3
  }, 
  {
   "source": 7, 
   "target": 6, 
   "value": 40
  }, 
  {
   "source": 7, 
   "targ

In [127]:
pd.DataFrame(json_prep['nodes']).head()

Unnamed: 0,group,name
0,1,0.0.0.0
1,2,10.16.11.5
2,3,10.16.92.103
3,3,10.16.92.79
4,4,10.2.2.2


In [128]:
pd.DataFrame(json_prep['links']).head()

Unnamed: 0,source,target,value
0,0,58,157
1,1,23,24
2,2,3,105742
3,3,2,36543
4,4,11,3410


In [129]:
filename_out = 'pcap_export.json'
json_out = open(filename_out,'w')
json_out.write(json_dump)
json_out.close()