In [127]:
import sqlite3
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [53]:
db_path = os.path.realpath("../DC4-data/firewall_data.db")
conn = sqlite3.connect(db_path)
print(conn.total_changes)

cur = conn.cursor()

0


In [19]:
# check colnames and types
cur.execute("PRAGMA table_info(data);")
print(cur.fetchall())

[(0, 'Date/time', 'date', 0, None, 0), (1, 'Syslog priority', 'text', 0, None, 0), (2, 'Operation', 'text', 0, None, 0), (3, 'Message code', 'text', 0, None, 0), (4, 'Protocol', 'text', 0, None, 0), (5, 'Source IP', 'text', 0, None, 0), (6, 'Destination IP', 'text', 0, None, 0), (7, 'Source hostname', 'text', 0, None, 0), (8, 'Destination hostname', 'text', 0, None, 0), (9, 'Source port', 'text', 0, None, 0), (10, 'Destination port', 'text', 0, None, 0), (11, 'Destination service', 'text', 0, None, 0), (12, 'Direction', 'text', 0, None, 0), (13, 'Connections built', 'integer', 0, None, 0), (14, 'Connections torn down', 'integer', 0, None, 0)]


In [50]:
# Get top 10 ports
cur.execute("SELECT `Destination port`, COUNT(`Destination port`) ct \
         FROM data \
         GROUP BY `Destination port` \
         ORDER BY ct DESC \
         LIMIT 10;")
print(cur.fetchall())

[('80', 21331506), ('6667', 2329914), ('(empty)', 3204), ('21', 1825), ('53', 752), ('22', 538), ('1026', 199), ('137', 130), ('113', 62), ('1025', 52)]


In [185]:
# prep port data
q = "SELECT strftime('%Y-%m-%d %H:00:00', `Date/time`) date_hour, \
         CASE \
             WHEN (`Destination port` = '80'\
                 OR `Destination port` = '6667' \
                 OR `Destination port` = '(empty)' \
                 OR `Destination port` = '21' \
                 OR `Destination port` = '53' \
                 OR `Destination port` = '22' \
                 OR `Destination port` = '137' \
                 OR `Destination port` = '113') \
                 THEN `Destination port` \
             ELSE 'OTHER' \
         END AS port_classification, \
         COUNT(`Date/time`) ct \
         FROM data \
         GROUP BY date_hour, port_classification;"

ports_per_hour = pd.read_sql_query(q, conn)

In [188]:
ports = ['80', '6667', '(empty)', '21', '53', '22', '137', '113', 'OTHER']

In [206]:
# Get unique operations
q = "SELECT DISTINCT `Operation` \
         FROM data;"
cur.execute(q)
decisions = cur.fetchall()
decisions

[('Teardown',),
 ('Built',),
 ('(empty)',),
 ('Command executed',),
 ('Deny',),
 ('Deny by ACL',)]

In [None]:
# Get unique statuses
q = "SELECT DISTINCT `Syslog Priority` \
         FROM data;"
cur.execute(q)
priorities = cur.fetchall()
priorities_df = pd.read_sql_query(q, conn)

In [196]:
cur.execute("SELECT strftime('%Y-%m-%d %H:00:00', `Date/time`) date_hour, COUNT(`Destination port`) n \
    FROM data \
    WHERE `Destination port` == '22' \
    GROUP BY date_hour")
print(cur.fetchall())

[('2012-04-05 20:00:00', 190), ('2012-04-05 21:00:00', 157), ('2012-04-05 22:00:00', 60), ('2012-04-05 23:00:00', 95), ('2012-04-06 00:00:00', 24), ('2012-04-06 01:00:00', 8), ('2012-04-06 03:00:00', 4)]


In [141]:
q = "SELECT strftime('%Y-%m-%d %H:00:00', `Date/time`) date_hour, `Syslog Priority` priority, COUNT(`Date/time`) n \
         FROM data \
         GROUP BY date_hour, `Syslog Priority`;"
cxns_per_hour = pd.read_sql_query(q, conn, parse_dates=[0])

date_hour    object
priority     object
n             int64
dtype: object

In [163]:
cxns_per_hour['date_hour'] = pd.to_datetime(cxns_per_hour['date_hour'])

In [164]:
priority_series = pd.Series(np.tile(priorities_df['Syslog priority'].to_numpy(), cxns_per_hour['date_hour'].nunique()))

In [178]:
idx_series = cxns_per_hour[cxns_per_hour['priority'] == 'Info']['date_hour'] \
    .repeat(len(priorities)).reset_index(drop=True)
new_df = pd.DataFrame({ 'date_hour': idx_series, 'priority': priority_series })
cxns_with_nas = pd.merge(new_df, cxns_per_hour, on=['date_hour', 'priority'], how='left')

In [200]:
fig = go.Figure()
for i in range(1, len(priorities)):
    expr = cxns_with_nas['priority'] == priorities[i][0]
    fig.add_trace(go.Scatter(
        x=cxns_with_nas[expr]['date_hour'],
        y=cxns_with_nas[expr]['n'],
        name=priorities[i][0],
        connectgaps=False)
    )
fig.show()

In [209]:
fig = go.Figure()
for i in range(2, len(ports)):
    expr = ports_per_hour['port_classification'] == ports[i]
    fig.add_trace(go.Scatter(
        x=ports_per_hour[expr]['date_hour'],
        y=ports_per_hour[expr]['ct'],
        name=ports[i],
        connectgaps=False)
    )
fig.show()

In [205]:
cur.execute("SELECT * \
         FROM data \
         WHERE `Syslog priority` = 'Critical';")
print(cur.fetchone())

('2012-04-05 20:25:35', 'Critical', 'Deny', 'ASA-2-106001', 'TCP', '10.32.5.56', '10.32.0.1', '(empty)', '(empty)', '6667', '1688', '1688_tcp', 'inbound', 0, 0)


In [33]:
cur.close()
conn.close()