In [None]:
import pandas as pd
import polars as pl
import numpy as np
import networkx as nx
from math import pi, sin, cos
import sys
sys.path.insert(1, '../framework')
from racetrack import *
rt = RACETrack()
_base_ = '../../data/2013_vast_challenge/mc3_netflow/nf/'
df_orig = pl.concat([pl.read_csv(_base_ + 'nf-chunk1.csv'),
                     pl.read_csv(_base_ + 'nf-chunk2.csv'),
                     pl.read_csv(_base_ + 'nf-chunk3.csv')])
df_orig = df_orig.rename({'TimeSeconds':'secs',                  'parsedDate':'timestamp',                'dateTimeStr':'timestamp_str',
                          'ipLayerProtocol':'pro_str',           'ipLayerProtocolCode':'pro',             'firstSeenSrcIp':'sip',
                          'firstSeenDestIp':'dip',               'firstSeenSrcPort':'spt',                'firstSeenDestPort':'dpt',
                          'moreFragments':'mfrag',               'contFragments':'cfrag',                 'durationSeconds':'dur',
                          'firstSeenSrcPayloadBytes':'soct_pay', 'firstSeenDestPayloadBytes':'doct_pay',  'firstSeenSrcTotalBytes':'soct',
                          'firstSeenDestTotalBytes':'doct',      'firstSeenSrcPacketCount':'spkt',        'firstSeenDestPacketCount':'dpkt',
                          'recordForceOut':'out'})
df_orig = rt.columnsAreTimestamps(df_orig, 'timestamp')
_ips_ = ['172.10.0.4','172.20.0.4']
df = df_orig.filter(pl.col('sip').is_in(_ips_))
cd = rt.chordDiagram(df, [('sip','dip')], equal_size_nodes=False)
cd

In [None]:
import hdbscan
handled    = set()
fmto_list  = []
angle_list = []
span_list  = []
xs_list    = []
ys_list    = []
for node in cd.node_dir_arc:
    for fm in cd.node_dir_arc[node]:
        for to in cd.node_dir_arc[node][fm]:
            key = str(fm) + '|||' + str(to)
            if key not in handled:
                handled.add(key)
                fm_span  = cd.node_dir_arc[node][fm][to]
                fm_coord = (fm_span[0]+fm_span[1])/720.0 - 0.5
                xs_list.append(fm_coord)
                if fm == node:
                    to_span  = cd.node_dir_arc[to][fm][to]
                    to_coord = (to_span[0]+to_span[1])/720.0 - 0.5
                    fmto_list.append(key)
                    span_list.append((fm_coord,to_coord))
                    ys_list.append(to_coord)
                else:
                    to_span  = cd.node_dir_arc[fm][fm][to]
                    to_coord = (to_span[0]+to_span[1])/720.0 - 0.5
                    fmto_list.append(key)
                    span_list.append((fm_coord,to_coord))
                    ys_list.append(to_coord)
                angle_list.append((fm_span,to_span))
            else:
                pass
                # print(f'"{key}" already handled')
clusterer = hdbscan.HDBSCAN()
clusterer.fit(span_list)
print("n_clusters =", len(set(clusterer.labels_)))
rt.xy(pd.DataFrame({'x':xs_list,'y':ys_list,'c':clusterer.labels_}),x_field='x',y_field='y',color_by='c')

In [None]:
svg = f'<svg x="0" y="0" width="440" height="440">'
cx0, cy0 = 210,220
last_fm_xy, last_to_xy, skeleton, skeleton_pos, last_ring, second_to_last_ring, node_to_pos = None, None, nx.DiGraph(), {}, None, None, {}
d, d_inc, r, r_dec = 0.00, 0.02, 200, 50
while d <= 0.07:
    svg += f'<circle cx="{cx0}" cy="{cy0}" r="{r}" fill="none", stroke="#a0a0a0" stroke-width="0.5" />'
    _labels_ = clusterer.single_linkage_tree_.get_clusters(d, min_cluster_size=1)
    second_to_last_ring = last_ring
    fm_xy, to_xy, last_ring = {}, {}, {}
    for _label_ in set(_labels_):
        if _label_ != -1:
            fm_sum, to_sum, samples = 0, 0, 0
            for i in range(len(angle_list)):
                if _labels_[i] == _label_:
                    fm_sum  += angle_list[i][0][0]+angle_list[i][0][1]
                    to_sum  += angle_list[i][1][0]+angle_list[i][1][1]
                    samples += 2
            if samples > 0:
                fm_angle = fm_sum/samples
                fmx,fmy = cx0 + r*cos(2*pi*fm_angle/360.0), cy0 + r*sin(2*pi*fm_angle/360.0)
                last_ring[fm_angle] = (fmx,fmy)
                svg += f'<circle cx="{fmx}" cy="{fmy}" r="2" fill="#a0a0ff"/>'
                to_angle = to_sum/samples
                tox,toy = cx0 + r*cos(2*pi*to_angle/360.0), cy0 + r*sin(2*pi*to_angle/360.0)
                last_ring[to_angle] = (tox,toy)
                svg += f'<circle cx="{tox}" cy="{toy}" r="2" fill="#ff0000"/>'
                for i in range(len(angle_list)):
                    if _labels_[i] == _label_:
                        fm_xy[i] = (fmx,fmy)
                        if last_fm_xy is not None and i in last_fm_xy.keys():
                            svg += f'<line x1="{last_fm_xy[i][0]}" y1="{last_fm_xy[i][1]}" x2="{fmx}" y2="{fmy}" stroke="#a0a0ff" stroke-width="0.5" />'
                            skeleton.add_edge(last_fm_xy[i], fm_xy[i], weight=1.0/rt.segmentLength((last_fm_xy[i],fm_xy[i])))
                            skeleton_pos[last_fm_xy[i]] = last_fm_xy[i]
                            skeleton_pos[fm_xy[i]]      = fm_xy[i]
                        to_xy[i] = (tox,toy)
                        if last_to_xy is not None and i in last_to_xy.keys():
                            svg += f'<line x1="{last_to_xy[i][0]}" y1="{last_to_xy[i][1]}" x2="{tox}" y2="{toy}" stroke="#ff0000" stroke-width="0.5" />'
                            skeleton.add_edge(to_xy[i], last_to_xy[i], weight=1.0/rt.segmentLength((to_xy[i],last_to_xy[i])))
                            skeleton_pos[to_xy[i]]      = to_xy[i]
                            skeleton_pos[last_to_xy[i]] = last_to_xy[i]
                        node_to_pos[fmto_list[i].split('|||')[0]] = (fmx,fmy)
                        node_to_pos[fmto_list[i].split('|||')[1]] = (tox,toy)

    last_fm_xy = fm_xy
    last_to_xy = to_xy
    d += d_inc
    r -= r_dec
svg += '</svg>'

_ls_ = list(last_ring.keys())
_ls_.sort()
for i in range(len(_ls_)):
    j = (i+1)%len(_ls_)
    n0, n1 = last_ring[_ls_[i]], last_ring[_ls_[j]]
    skeleton.add_edge(n0,n1,weight=1.0/rt.segmentLength((n0,n1)))
    skeleton.add_edge(n0,n1,weight=1.0/rt.segmentLength((n1,n0)))

_ls_ = list(second_to_last_ring.keys())
_ls_.sort()
for i in range(len(_ls_)):
    j = (i+1)%len(_ls_)
    n0, n1 = second_to_last_ring[_ls_[i]], second_to_last_ring[_ls_[j]]
    skeleton.add_edge(n0,n1,weight=1.0/rt.segmentLength((n0,n1)))
    skeleton.add_edge(n0,n1,weight=1.0/rt.segmentLength((n1,n0)))

rt.tile([svg])

In [None]:
nx.draw(skeleton, pos=skeleton_pos, node_size=3)

In [None]:
_src_ = '172.10.0.4'
_dst_ = '10.170.148.135'
nx.shortest_path(skeleton,node_to_pos[_src_],node_to_pos[_dst_],weight='weight')