# Wireshark Honeypot attack analysis

In [None]:
# imports
import os
import sys
import pyasn
import csv
from copy import deepcopy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pycountry
import geopandas as gpd
import geoplot as gplt
import geoplot.crs as gcrs
from shapely.geometry import Point
from scapy.all import *
# install with: pip3 install maxminddb-geolite2 --user
from geolite2 import geolite2

Configuration:

In [None]:
# location of complete wireshark capture

file_name = "tcpdump/honeypot_traffic_filtered.pcapng"
# location file filtered for payload 1
file_name_cve_2018_14847_tcp_streams = "tcpdump/honeypot_traffic_cve_2018_14847.pcapng"
file_name_dns_redirection = "tcpdump/honeypot_traffic_dns_redirection.pcapng"
file_name_mirai = "tcpdump/honeypot_traffic_rewrite_mirai.pcap"
# ip address of the server
server_ip = "0.0.0.0" # change
real_server_ip = "0.0.0.0" # update to real ip

# time stamp period_
timestamp_period = 43200 # 12 hours

# ASN database location
# see pyasn documentation: https://github.com/Yelp/pyasn
asndb_file = "utils/ipasndb-2019-0529.dat"
asndb = pyasn.pyasn(asndb_file)

# ASN List file with names
aslist_csv = "utils/aslist.csv"
aslist_dict = {}
import csv
with open(aslist_csv, 'rt') as csvfile:
    csvreader = csv.reader(csvfile, delimiter=',')
    for row in csvreader:
        aslist_dict[row[0]] = row[1]

# ip info reader
ip_reader = geolite2.reader()

# countries shapefile
countries_shapefile = 'utils/110m_cultural/ne_110m_admin_0_countries.shp'
countries_gdf = gpd.read_file(countries_shapefile)[['ADMIN', 'ADM0_A3', 'geometry']]
countries_gdf.columns = ['country', 'country_code', 'geometry']

ip_reader.get(real_server_ip)

# General statistics

Lets count the number of packets:

In [None]:
general_count = 0
general_data_count = 0
if not os.path.isfile(file_name):
        print('"{}" does not exist'.format(file_name), file=sys.stderr)
        sys.exit(-1)
for (pkt_data, pkt_metadata,) in RawPcapReader(file_name):
    general_count += 1
    general_data_count += len(pkt_data)
print('{} contains {} packets'.format(file_name, general_count))
print('{} has a size of {} bytes'.format(file_name, general_data_count))


Number of unique ip's

In [None]:
general_ip_data = {}
general_src_port_count = {}
general_dest_port_count = {}
general_data_time_continent_count = {}
count=0
for (pkt_data, pkt_metadata,) in RawPcapReader(file_name):   
    ether_pkt = Ether(pkt_data)
    ip_pkt = ether_pkt[IP]
    
    # skip non tcp packets
    if not ether_pkt.haslayer(TCP):
        print("skipped a packet")
        continue
        
    tcp_pkt = ip_pkt[TCP]
    
    # skip packets in the direction server -> client
    src = str(ip_pkt.src)
    if src == server_ip:
        continue      
        
    count += 1
    
    # packet size
    pkt_len = len(pkt_data)
    
    if src not in general_ip_data:
        asn, bgp_range = asndb.lookup(src)
        asn = "AS" + str(asn)
        asn_owner = aslist_dict[asn] if asn in aslist_dict else ""
        ip_dict = ip_reader.get(src)
        if ip_dict:
            country_code = ip_dict['country']['iso_code'] if 'country' in ip_dict.keys() else "N/A"
            country = ip_dict['country']['names']['en'] if 'country' in ip_dict.keys() else "N/A"
            continent = ip_dict['continent']['code'] if 'continent' in ip_dict.keys() else "N/A"
            city = ip_dict['city']['names']['en'] if 'city' in ip_dict.keys() else "N/A"
            lat = ip_dict['location']['latitude'] if 'location'in ip_dict.keys() else "N/A"
            long = ip_dict['location']['longitude'] if "location" in ip_dict.keys() else "N/A"
            lat_long = (lat, long)
        else:
            country_code = "N/A"
            country = "N/A"
            continent = "N/A"
            city = "N/A"
            lat_long = ""
        general_ip_data[src] = [1, pkt_len, asn, bgp_range, asn_owner, country_code, country, continent, city, lat_long]
    else:
        general_ip_data[src][0] += 1
        general_ip_data[src][1] += pkt_len
        
    # generate timestamp
    pkt_timestamp = (pkt_metadata.tshigh << 32) | pkt_metadata.tslow
    pkt_timestamp_resol = pkt_metadata.tsresol # microseconds (1.000.000) or nanoseconds (1.000.000.000)
    pkt_sec = pkt_timestamp // pkt_timestamp_resol
    
    # set timestamp seconds of first packet and last packet
    if count == 1:
        first_pkt_sec = pkt_timestamp // pkt_timestamp_resol
    last_pkt_sec = pkt_sec
    
    # create 10 minute timeframe timestamp and set time at middle of time frame
    time_period_middle_sec = pkt_sec - (pkt_sec - first_pkt_sec) % timestamp_period + (timestamp_period // 2)
        
    continent = general_ip_data[src][7]
    # generate continent time data dictionary
    if continent not in general_data_time_continent_count:
        general_data_time_continent_count[continent] = {str(time_period_middle_sec): pkt_len}
    elif str(time_period_middle_sec) not in general_data_time_continent_count[continent]:
        general_data_time_continent_count[continent][str(time_period_middle_sec)] = pkt_len
    else:
        general_data_time_continent_count[continent][str(time_period_middle_sec)] += pkt_len 
        
    # source port count
    src_port = str(tcp_pkt.sport)
    if src_port not in general_src_port_count:
        general_src_port_count[src_port] = 1
    else:
        general_src_port_count[src_port] += 1
    
    # destination port count
    dest_port = str(tcp_pkt.dport)
    if dest_port not in general_dest_port_count:
        general_dest_port_count[dest_port] = 1
    else:
        general_dest_port_count[dest_port] += 1
    
print("A total of {} incoming packets were analyzed".format(count))

In [None]:
# create normal dict for data_time_continent that can be converted to dataframe
number_time_periods = (last_pkt_sec - first_pkt_sec) // timestamp_period
general_data_time_continent_table = {}
general_data_time_continent_table_columns = []
# initialize lisr for each continent
for continent in general_data_time_continent_count.keys():
    general_data_time_continent_table[continent] = []
# initialize timestamp and sizes for continents
for i in range(0, number_time_periods):
    timestamp = first_pkt_sec + i * timestamp_period + timestamp_period // 2
    # convert timestamp in seconds to pandas timestamp in UTC
    unix_timestamp = pd.Timestamp(timestamp, unit='s') 
    general_data_time_continent_table_columns.append(unix_timestamp)
    # add packet traffic for each continent for this timestamp
    for continent in general_data_time_continent_count.keys():
        if str(timestamp) in general_data_time_continent_count[continent]:
            pkt_len = general_data_time_continent_count[continent][str(timestamp)]
        else:
            pkt_len = 0
        general_data_time_continent_table[continent].append(pkt_len)

# create dataframe 
general_data_time_continent_df = pd.DataFrame.from_dict(general_data_time_continent_table, orient="index", columns=general_data_time_continent_table_columns)
general_data_time_continent_df = general_data_time_continent_df.T
general_data_time_continent_df


In [None]:
general_data_time_continent_plot = general_data_time_continent_df.plot.area(figsize=(5, 3.3), cmap='YlGnBu')
general_data_time_continent_plot.set_ylabel('bytes')
general_data_time_continent_plot.set_xlabel('time')
plt.savefig("images/data_time_continent.pdf", bbox_inches='tight', pad_inches=0.1)

IP-addresses it belong to:

In [None]:
general_ip_res = []
for ip, info in general_ip_data.items():
    res = [ip]
    res.extend(info)
    general_ip_res.append(res)
print("There are {} unique ip addresses.".format(len(general_ip_data)))
general_ip_df = pd.DataFrame(general_ip_res, columns=['ip_address', 'no_of_packets', "no_of_bytes", "asn", "bgp_range", "asn_owner", "country_code", "country", "continent", "city", "location"])
# general_ip_df.set_index('ip_address', inplace=True)
general_ip_df.sort_values(by=['no_of_packets'], ascending=[False], inplace=True)
# reset index and start at 1
general_ip_df.index = np.arange(1, len(general_ip_df) + 1)
# print top 20
general_ip_df[:20]

half_packet_ip_count = 0
half_packet_count = 0
while half_packet_count < count // 2:
    half_packet_ip_count += 1
    half_packet_count += general_ip_df.at[half_packet_ip_count,'no_of_packets']
    
print("Half of the packets are send by the top {} IP addresses".format(half_packet_ip_count))
print("This is {}% of the total IP".format(100/len(general_ip_data)*half_packet_ip_count))

In [None]:
general_ip_df[:20]

Number of packets per ip plot

In [None]:
ip_packet_plot = general_ip_df.plot(kind='line', y='no_of_packets', ylim=(0.5,10000), logy=True, logx=False, legend=False)
ip_packet_plot.set_xlabel('Top # IP address')
ip_packet_plot.set_ylabel('Number of packets')
plt.savefig("images/ip_packet_plot.pdf", bbox_inches='tight', pad_inches=0)

In [None]:
general_ip_packet_list = general_ip_df['no_of_packets'].values.tolist()
general_ip_packet_series = pd.Series(general_ip_packet_list)
general_ip_packet_np_arr = np.array(general_ip_packet_list)
general_ip_packet_np_arr_sorted = np.sort(general_ip_packet_np_arr)[::-1]
general_ip_packet_np_arr_sorted_cumsum = np.cumsum(general_ip_packet_np_arr_sorted)
max_val = general_ip_packet_np_arr_sorted_cumsum[len(general_ip_packet_np_arr_sorted_cumsum)-1]
arr = [0]
arr.extend([x / max_val for x in general_ip_packet_np_arr_sorted_cumsum])
# print(arr)
general_ip_packet_np_ar_perc = np.array(arr)

# plot the sorted data:
fig = plt.figure(figsize=(4,2.2))
ax2 = fig.add_subplot()
ax2.plot(general_ip_packet_np_ar_perc)
ax2.set_xlabel('# IP addresses')
ax2.set_ylabel('percentage of packets')
ax2.set_xscale('symlog')
ax2.set_ylim(bottom=0)
ax2.set_xlim(xmin=0)
plt.savefig("images/ip_packet_cdf.pdf", bbox_inches='tight', pad_inches=0.1)

Map of data

In [None]:
# generate dict of location and bytes:
general_loc_dict = {}
for ip, data in general_ip_data.items():
    no_packets = data[0]
    loc = data[9]
    if not loc:
        continue
    lat, long = loc
    if lat == "N/A" or long == "N/A":
        continue
    if str(loc) in general_loc_dict:
        old_no_packets, _, _ = general_loc_dict[str(loc)]
        general_loc_dict[str(loc)] = (old_no_packets + no_packets, lat, long)
    else:
        general_loc_dict[str(loc)] = (no_packets, lat, long)

# filter locations with less than 10 packets
keys_to_pop = []
for loc, data in general_loc_dict.items():
    no_packets, lat, long = data
    if no_packets < 10:
        keys_to_pop.append(loc)
for key in keys_to_pop:
    general_loc_dict.pop(key, None)
        
general_loc_list = general_loc_dict.values()
general_loc_df = pd.DataFrame(general_loc_list, columns=['packets', 'lat', 'long'])
general_gpd_loc_df = gpd.GeoDataFrame(general_loc_df, geometry=[Point(x, y) for x, y in zip(general_loc_df.long, general_loc_df.lat)])
general_gpd_loc_df.sort_values(by=['packets'], ascending=[False], inplace=True)
general_gpd_loc_df

world_map_df = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world_map = gplt.polyplot(world_map_df, 
                          figsize=(6, 3.4), 
                          zorder=-1, # place the borders below the data
                          linewidth=0.5 # make borders thinner
                         )
                          
gplt.pointplot(general_gpd_loc_df, 
               scale='packets',
               limits=(1,100), # smallest circle is 100 times smaller than biggest
               hue='packets',
               cmap='Blues', # more blue more data
               k=6, # limit ourselves to seven sizes
               linewidth=0.5,
               edgecolor='black',
               legend=True,
               legend_var='scale', # based on circle size
               legend_values=[100000, 50000, 10000, 5000, 1000, 100],
               legend_labels=['100.000', '50.000', '10.000', '5.000', '1.000', '100'],
               legend_kwargs={'frameon': False, 'loc': 'lower right'},  # ...on the lower right!
               ax=world_map
              )
world_map.axes.get_xaxis().set_visible(False)
world_map.axes.get_yaxis().set_visible(False)
plt.savefig("images/map_attack_locations.pdf", bbox_inches='tight', pad_inches=0.1)



Traffic per country:

In [None]:
general_country_df = general_ip_df.groupby(['country_code', 'country']).agg({'no_of_packets': np.sum, 'no_of_bytes': np.sum, 'ip_address': 'nunique', 'asn': 'nunique'})
general_country_df.sort_values(by=['no_of_packets'], ascending=[False], inplace=True)
general_country_df.reset_index(level=0, inplace=True)

# print top 20
print("There are {} unique countries".format(len(general_country_df)))
general_country_df[:20]


Traffic per country map:

In [None]:
# convert two letter country code to three letters
countries = {}
for country in pycountry.countries:
    countries[country.alpha_2] = country.alpha_3
    
general_country_df_copy = general_country_df.copy()    
for index, row in general_country_df_copy.iterrows():
    c_code = row['country_code']
    general_country_df_copy.at[index, 'country_code'] = countries[c_code] if c_code in countries else "??"
general_country_df_copy

countries_packets_gdf = pd.merge(countries_gdf, general_country_df_copy, how='left', on=['country_code'])
countries_packets_gdf.drop('asn', 1, inplace=True)
countries_packets_gdf.drop('ip_address', 1, inplace=True)
countries_packets_gdf.fillna(0, inplace=True)
countries_packets_gdf

world_plot = gplt.choropleth(countries_packets_gdf, 
                hue='no_of_packets',  # Display data, passed as a Series
                cmap='Blues',
                scheme='fisher_jenks',
                linewidth=0.5, 
                k=10,  # Do not bin our counties.
                legend=True,
                figsize=(8, 4.5),
                legend_labels=['0 - 300', '300 - 950', '950 - 2.100', '2.100 - 3.200', '3.200 - 5.800', '5.800 - 7.600', '7.600 - 9.500', '9.500 - 15.200', '15.200 - 77.000', '77.000 - 200.000']
               )
world_plot.axes.get_xaxis().set_visible(False)
world_plot.axes.get_yaxis().set_visible(False)
plt.savefig("images/map_attack_countries.pdf", bbox_inches='tight', pad_inches=0)


Traffic per continent:

In [None]:
general_continent_df = general_ip_df.groupby('continent').agg({'no_of_packets': np.sum, 'no_of_bytes': np.sum, 'ip_address': 'count', 'asn': 'nunique'})
general_continent_df.sort_values(by=['no_of_packets'], ascending=[False], inplace=True)
general_continent_df

Port/packet distribution:

In [None]:
general_dest_port_df = pd.DataFrame(general_dest_port_count.items(), columns=['dest_port', 'no_of_packets'])
general_dest_port_df.set_index('dest_port', inplace=True)
general_dest_port_df.sort_values(by=['no_of_packets'], ascending=[False], inplace=True)
general_dest_port_df

In [None]:
dest_port_bar_graph = general_dest_port_df.plot(kind="bar", y='no_of_packets', legend=False, figsize=(4,2.4))
dest_port_bar_graph.set_xlabel('Destination port')
dest_port_bar_graph.set_ylabel('Number of packets')
plt.savefig("images/dest_port_packet_graph.pdf", bbox_inches='tight', pad_inches=0.1)


Source port distribution

In [None]:
general_src_port_df = pd.DataFrame(general_src_port_count.items(), columns=['src_port', 'no_of_packets'])
general_src_port_df.set_index('src_port', inplace=True)
general_src_port_df.sort_values(by=['no_of_packets'], ascending=[False], inplace=True)
# print the top 20
print("There are {} unique source ports".format(len(general_src_port_df)))
general_src_port_df[:20]

The Autonomous Systems these IPs belong to:

In [None]:
general_asn_df = general_ip_df.groupby(['asn', 'asn_owner']).agg({'no_of_packets': np.sum, 'no_of_bytes': np.sum, 'ip_address': 'count', 'bgp_range': 'unique'})
general_asn_df.sort_values(by=['no_of_packets'], ascending=[False], inplace=True)
# print the top 20
print("There are {} unique ASN's".format(len(general_asn_df)))
general_asn_df[:20]

# CVE-2018-14847
Let's filter for packages containing the CVE-2017-14847 payloads 

First take a look at the packets containing the first payload:
```
{bff0005:1, uff0006:5, uff0007:7, s1: 
'/////./..//////./..//////./../flash/rw/store/user.dat', 
Uff0002:[0,8], Uff0001:[2,2]}
```

this has already been prepared by first filtering for the tcp streams containing the payload: `tshark -r merged_filtered.pcapng -Y "tcp contains \"/////./..//////./..//////./../flash/rw/store/user.dat\"" -T fields -e tcp.stream | sort -un | sed ':a;N;$!ba;s/\n/ or tcp.stream==/g'`

and then that can be applied as a wireshark filter:




In [None]:
count = 0
for (pkt_data, pkt_metadata,) in RawPcapReader(file_name_cve_2018_14847_tcp_streams):
    count+=1
print("There are {} packets containing payload 1".format(count))

Now we will discover all unique ip addresses with this payload

In [None]:
cve_14847_ip_data = {}
cve_14847_traffic = {}
count = 0
for (pkt_data, pkt_metadata,) in RawPcapReader(file_name_cve_2018_14847_tcp_streams):
    ether_pkt = Ether(pkt_data)
    ip_pkt = ether_pkt[IP]
    tcp_pkt = ip_pkt[TCP]
    
    # skip packets in the direction server -> client
    src = str(ip_pkt.src)
    if src == server_ip:
        continue  
    
    count +=1
    
    # packet size
    pkt_len = len(pkt_data)
    
    if src not in cve_14847_ip_data:
        asn, bgp_range = asndb.lookup(src)
        asn = "AS" + str(asn)
        asn_owner = aslist_dict[asn] if asn in aslist_dict else ""
        ip_dict = ip_reader.get(src)
        if ip_dict:
            country_code = ip_dict['country']['iso_code'] if 'country' in ip_dict.keys() else "N/A"
            country = ip_dict['country']['names']['en'] if 'country' in ip_dict.keys() else "N/A"
            continent = ip_dict['continent']['code']
            city = ip_dict['city']['names']['en'] if 'city' in ip_dict.keys() else "N/A"
            lat = ip_dict['location']['latitude']
            long = ip_dict['location']['longitude']
            lat_long = (lat, long)
        else:
            country_code = ""
            country = ""
            continent = ""
            city = ""
            lat_long = ""
        cve_14847_ip_data[src] = [1, pkt_len, asn, bgp_range, asn_owner, country_code, country, continent, city, lat_long]
    else:
        old_cve_14847_ip_data = cve_14847_ip_data[src]
        new_cve_14847_ip_data = deepcopy(old_cve_14847_ip_data)
        new_cve_14847_ip_data[0] += 1
        new_cve_14847_ip_data[1] += pkt_len
        cve_14847_ip_data[src] = new_cve_14847_ip_data
        
    # generate timestamp
    pkt_timestamp = (pkt_metadata.tshigh << 32) | pkt_metadata.tslow
    pkt_timestamp_resol = pkt_metadata.tsresol # microseconds (1.000.000) or nanoseconds (1.000.000.000)
    pkt_sec = pkt_timestamp // pkt_timestamp_resol
    
    # set timestamp seconds of first packet and last packet
    if count == 1:
        first_pkt_sec = pkt_timestamp // pkt_timestamp_resol
    last_pkt_sec = pkt_timestamp // pkt_timestamp_resol
    
    # create 10 minute timeframe timestamp and set time at middle of time frame
    time_period_middle_sec = pkt_sec - (pkt_sec - first_pkt_sec) % timestamp_period + timestamp_period // 2
        
    # generate continent time data dictionary
    if str(time_period_middle_sec) not in cve_14847_traffic:
        cve_14847_traffic[str(time_period_middle_sec)] = pkt_len
    else:
        cve_14847_traffic[str(time_period_middle_sec)] += pkt_len        

print("There are {} unique ip addresses!".format(len(cve_14847_ip_data)))

cve_14847_ip_res = []
for ip, info in cve_14847_ip_data.items():
    res = [ip]
    res.extend(info)
    cve_14847_ip_res.append(res)
cve_14847_ip_df = pd.DataFrame(cve_14847_ip_res, columns=['ip_address', 'no_of_packets', "no_of_bytes", "asn", "bgp_range", "asn_owner", "country_code", "country", "continent", "city", "location"])
cve_14847_ip_df.set_index('ip_address', inplace=True)
cve_14847_ip_df.sort_values(by=['no_of_packets'], ascending=[False], inplace=True)
# print top 20
cve_14847_ip_df[:20]

# string = ""
# for ip, _ in cve_14847_ip_data.items():
#     if len(string) != 0:
#         string += " or "
#     string += "ip.addr == "
#     string += ip
# print("Wireshark filter:")
# print(string)
print(cve_14847_traffic)
count = 0
for key, value in cve_14847_traffic.items():
    count += value
print("Total number of: {}  pakcets".format(count))

In [None]:
# create df:
cve_14847_cols = []
cve_14847_pckts = []
for key, val in cve_14847_traffic.items():
    unix_timestamp = pd.Timestamp(int(key), unit='s')
    cve_14847_cols.append(unix_timestamp)
    cve_14847_pckts.append(val)
cve_14847_cols.pop()
cve_14847_pckts.pop()
cve_14847_traffic_df = pd.DataFrame(cve_14847_pckts, index=cve_14847_cols, columns=["CVE-2018-14847 traffic"])
# cve_14847_traffic_df = cve_14847_traffic_df.T
cve_14847_traffic_df



In [None]:
cve_14847_traffic_plot = cve_14847_traffic_df.plot.line(figsize=(5, 3), legend=False)
cve_14847_traffic_plot.set_ylabel('bytes')
cve_14847_traffic_plot.set_xlabel('time')
plt.savefig("images/cve_14847_traffic.pdf", bbox_inches='tight', pad_inches=0.1)

What are the AS numbers corresponding to these IP's?

In [None]:
cve_14847_asn_df = cve_14847_ip_df.copy()
cve_14847_asn_df.reset_index(level=0, inplace=True)
cve_14847_asn_df = cve_14847_asn_df.groupby(['asn', 'asn_owner']).agg({'no_of_packets': 'sum', 'no_of_bytes': 'sum', 'ip_address': 'nunique', 'bgp_range': 'unique'})
cve_14847_asn_df.sort_values(by=['no_of_packets'], ascending=[False], inplace=True)
cve_14847_asn_df


# DNS redirection

In [None]:
dns_red_ip_data = {}
dns_red_traffic = {}
count = 0
for (pkt_data, pkt_metadata,) in RawPcapReader(file_name_dns_redirection):
    ether_pkt = Ether(pkt_data)
    ip_pkt = ether_pkt[IP]
    tcp_pkt = ip_pkt[TCP]
    
    # skip packets in the direction server -> client
    src = str(ip_pkt.src)
    if src == server_ip:
        continue
    
    count +=1
    
    # packet size
    pkt_len = len(pkt_data)
    
    if src not in dns_red_ip_data:
        asn, bgp_range = asndb.lookup(src)
        asn = "AS" + str(asn)
        asn_owner = aslist_dict[asn] if asn in aslist_dict else ""
        ip_dict = ip_reader.get(src)
        if ip_dict:
            country_code = ip_dict['country']['iso_code'] if 'country' in ip_dict.keys() else "N/A"
            country = ip_dict['country']['names']['en'] if 'country' in ip_dict.keys() else "N/A"
            continent = ip_dict['continent']['code']
            city = ip_dict['city']['names']['en'] if 'city' in ip_dict.keys() else "N/A"
            lat = ip_dict['location']['latitude']
            long = ip_dict['location']['longitude']
            lat_long = (lat, long)
        else:
            country_code = ""
            country = ""
            continent = ""
            city = ""
            lat_long = ""
        dns_red_ip_data[src] = [1, pkt_len, asn, bgp_range, asn_owner, country_code, country, continent, city, lat_long]
    else:
        old_dns_red_ip_data = dns_red_ip_data[src]
        new_dns_red_ip_data = deepcopy(old_dns_red_ip_data)
        new_dns_red_ip_data[0] += 1
        new_dns_red_ip_data[1] += pkt_len
        dns_red_ip_data[src] = new_dns_red_ip_data
        
    # generate timestamp
    pkt_timestamp = (pkt_metadata.tshigh << 32) | pkt_metadata.tslow
    pkt_timestamp_resol = pkt_metadata.tsresol # microseconds (1.000.000) or nanoseconds (1.000.000.000)
    pkt_sec = pkt_timestamp // pkt_timestamp_resol
    
    # set timestamp seconds of first packet and last packet
    if count == 1:
        first_pkt_sec = pkt_timestamp // pkt_timestamp_resol
    last_pkt_sec = pkt_timestamp // pkt_timestamp_resol
    
    # create 10 minute timeframe timestamp and set time at middle of time frame
    time_period_middle_sec = pkt_sec - (pkt_sec - first_pkt_sec) % timestamp_period + timestamp_period // 2
        
    # generate continent time data dictionary
    if str(time_period_middle_sec) not in dns_red_traffic:
        dns_red_traffic[str(time_period_middle_sec)] = pkt_len
    else:
        dns_red_traffic[str(time_period_middle_sec)] += pkt_len        

print("There are {} unique ip addresses!".format(len(dns_red_ip_data)))

dns_red_ip_res = []
for ip, info in dns_red_ip_data.items():
    res = [ip]
    res.extend(info)
    dns_red_ip_res.append(res)
dns_red_ip_df = pd.DataFrame(dns_red_ip_res, columns=['ip_address', 'no_of_packets', "no_of_bytes", "asn", "bgp_range", "asn_owner", "country_code", "country", "continent", "city", "location"])
dns_red_ip_df.set_index('ip_address', inplace=True)
dns_red_ip_df.sort_values(by=['no_of_packets'], ascending=[False], inplace=True)
# print top 20
dns_red_ip_df[:20]

# string = ""
# for ip, _ in cve_14847_ip_data.items():
#     if len(string) != 0:
#         string += " or "
#     string += "ip.addr == "
#     string += ip
# print("Wireshark filter:")
# # print(string)
# print(cve_14847_traffic)
# count = 0
# for key, value in cve_14847_traffic.items():
#     count += value
# print("Total number of: {}  pakcets".format(count))

In [None]:
dns_red_asn_df = dns_red_ip_df.copy()
dns_red_asn_df.reset_index(level=0, inplace=True)
dns_red_asn_df = dns_red_asn_df.groupby(['asn', 'asn_owner']).agg({'no_of_packets': 'sum', 'no_of_bytes': 'sum', 'ip_address': 'nunique', 'bgp_range': 'unique'})
dns_red_asn_df.sort_values(by=['no_of_packets'], ascending=[False], inplace=True)
dns_red_asn_df

# Mirai

In [None]:
dns_red_ip_data = {}
dns_red_traffic = {}
count = 0
for (pkt_data, pkt_metadata,) in RawPcapReader(file_name_mirai):
    ether_pkt = Ether(pkt_data)
    ip_pkt = ether_pkt[IP]
    tcp_pkt = ip_pkt[TCP]
    
    # skip packets in the direction server -> client
    src = str(ip_pkt.src)
    if src == real_server_ip:
        continue
    
    count +=1
    
    # packet size
    pkt_len = len(pkt_data)
    
    if src not in dns_red_ip_data:
        asn, bgp_range = asndb.lookup(src)
        asn = "AS" + str(asn)
        asn_owner = aslist_dict[asn] if asn in aslist_dict else ""
        ip_dict = ip_reader.get(src)
        if ip_dict:
            country_code = ip_dict['country']['iso_code'] if 'country' in ip_dict.keys() else "N/A"
            country = ip_dict['country']['names']['en'] if 'country' in ip_dict.keys() else "N/A"
            continent = ip_dict['continent']['code'] if 'continent'in ip_dict.keys() else "N/A"
            city = ip_dict['city']['names']['en'] if 'city' in ip_dict.keys() else "N/A"
            lat = ip_dict['location']['latitude'] if 'location' in ip_dict.keys() else "N/A"
            long = ip_dict['location']['longitude'] if 'location' in ip_dict.keys() else "N/A"
            lat_long = (lat, long)
        else:
            country_code = ""
            country = ""
            continent = ""
            city = ""
            lat_long = ""
        dns_red_ip_data[src] = [1, pkt_len, asn, bgp_range, asn_owner, country_code, country, continent, city, lat_long]
    else:
        old_dns_red_ip_data = dns_red_ip_data[src]
        new_dns_red_ip_data = deepcopy(old_dns_red_ip_data)
        new_dns_red_ip_data[0] += 1
        new_dns_red_ip_data[1] += pkt_len
        dns_red_ip_data[src] = new_dns_red_ip_data
        
    # generate timestamp
    pkt_sec = 1 # not available here
    
    # set timestamp seconds of first packet and last packet
    if count == 1:
        first_pkt_sec = pkt_timestamp // pkt_timestamp_resol
    last_pkt_sec = pkt_timestamp // pkt_timestamp_resol
    
    # create 10 minute timeframe timestamp and set time at middle of time frame
    time_period_middle_sec = pkt_sec - (pkt_sec - first_pkt_sec) % timestamp_period + timestamp_period // 2
        
    # generate continent time data dictionary
    if str(time_period_middle_sec) not in dns_red_traffic:
        dns_red_traffic[str(time_period_middle_sec)] = pkt_len
    else:
        dns_red_traffic[str(time_period_middle_sec)] += pkt_len        

print("There are {} unique ip addresses!".format(len(dns_red_ip_data)))

dns_red_ip_res = []
for ip, info in dns_red_ip_data.items():
    res = [ip]
    res.extend(info)
    dns_red_ip_res.append(res)
dns_red_ip_df = pd.DataFrame(dns_red_ip_res, columns=['ip_address', 'no_of_packets', "no_of_bytes", "asn", "bgp_range", "asn_owner", "country_code", "country", "continent", "city", "location"])
dns_red_ip_df.set_index('ip_address', inplace=True)
dns_red_ip_df.sort_values(by=['no_of_packets'], ascending=[False], inplace=True)
# print top 20
dns_red_ip_df[:20]

In [None]:
dns_red_asn_df = dns_red_ip_df.copy()
dns_red_asn_df.reset_index(level=0, inplace=True)
dns_red_asn_df = dns_red_asn_df.groupby(['asn', 'asn_owner']).agg({'no_of_packets': 'sum', 'no_of_bytes': 'sum', 'ip_address': 'nunique', 'bgp_range': 'unique'})
dns_red_asn_df.sort_values(by=['ip_address'], ascending=[False], inplace=True)
dns_red_asn_df