In [None]:
import pandas as pd
import polars as pl
import numpy as np
import random
import rtsvg
rt = rtsvg.RACETrack()

In [None]:
#
# spreadLines() - attempt to implement this visualization
#
# Based on:
#
# @misc{kuo2024spreadlinevisualizingegocentricdynamic,
#       title={SpreadLine: Visualizing Egocentric Dynamic Influence}, 
#       author={Yun-Hsin Kuo and Dongyu Liu and Kwan-Liu Ma},
#       year={2024},
#       eprint={2408.08992},
#       archivePrefix={arXiv},
#       primaryClass={cs.HC},
#       url={https://arxiv.org/abs/2408.08992}, 
# }
# 
def spreadLines(rt_self,
                df,
                relationships,
                node_focus,
                ts_field        = None,              # Will attempt to guess based on datatypes
                ts_bins         = 'year_month_day',  # From rt.transforms
                color_by        = None,
                count_by        = None,
                count_by_set    = False,
                widget_id       = None,
                w               = 1024,
                h               = 512,
                txt_h           = 12):
    return SpreadLines(**locals())

#
#
#
class SpreadLines(object):
    #
    #
    #
    def __transformFields__(self):
        # Gather up all of the fields that are going to be used
        _all_columns_ = [self.ts_field]
        if self.color_by is not None: _all_columns_.append(self.color_by)
        if self.count_by is not None: _all_columns_.append(self.count_by)
        for _relationship_ in self.relationships:
            _fm_, _to_ = _relationship_[0], _relationship_[1]
            if   type(_fm_) is str: _all_columns_.append(_fm_)
            elif type(_fm_) is tuple:
                for i in range(len(_fm_)): _all_columns_.append(_fm_[i])
            if   type(_to_) is str: _all_columns_.append(_to_)
            elif type(_to_) is tuple:
                for i in range(len(_to_)): _all_columns_.append(_to_[i])
        # Transform the fields
        self.df, _new_columns_ = self.rt_self.transformFieldListAndDataFrame(self.df, _all_columns_)
        # Remap them
        col_i = 0
        self.ts_field        = _new_columns_[col_i]
        col_i += 1
        if self.color_by is not None: 
            self.color_by = _new_columns_[col_i]
            col_i += 1
        if self.count_by is not None:
            self.count_by = _new_columns_[col_i]
            col_i += 1
        _new_relationships_ = []
        for _relationship_ in self.relationships:
            _fm_, _to_ = _relationship_[0], _relationship_[1]
            if   type(_fm_) is str: 
                _fm_ = _new_columns_[col_i]
                col_i += 1
            elif type(_fm_) is tuple:
                for i in range(len(_fm_)): 
                    _fm_[i] = _new_columns_[col_i]
                    col_i += 1
            if   type(_to_) is str: 
                _to_ = _new_columns_[col_i]
                col_i += 1
            elif type(_to_) is tuple:
                for i in range(len(_to_)): 
                    _to_[i] = _new_columns_[col_i]
                    col_i += 1
            _new_relationships_.append((_fm_, _to_))
        self.relationships = _new_relationships_

    #
    #
    #
    def __init__(self, rt_self, **kwargs):
        self.rt_self       = rt_self
        self.df            = rt_self.copyDataFrame(kwargs['df'])
        self.relationships = kwargs['relationships']
        self.node_focus    = kwargs['node_focus']
        self.ts_field      = self.rt_self.guessTimestampField(self.df) if kwargs['ts_field'] is None else kwargs['ts_field']
        self.ts_bins       = kwargs['ts_bins']
        self.color_by      = kwargs['color_by']
        self.count_by      = kwargs['count_by']
        self.count_by_set  = kwargs['count_by_set']
        self.widget_id     = f'spreadlines_{random.randint(0,65535)}' if kwargs['widget_id'] is None else kwargs['widget_id']
        self.w             = kwargs['w']
        self.h             = kwargs['h']
        self.txt_h         = kwargs['txt_h']

        # Unwrap any fields
        self.__transformFields__()


In [None]:
df = pl.read_csv('../../data/2013_vast_challenge/mc3_netflow/nf/nf-chunk1.csv')
df = rt.columnsAreTimestamps(df, 'parsedDate')
df = df.rename({'TimeSeconds':                '_del1_',
                'parsedDate':                 'timestamp',
                'dateTimeStr':                '_del2_',
                'ipLayerProtocol':            'pro',
                'ipLayerProtocolCode':        '_del3_',
                'firstSeenSrcIp':             'sip',
                'firstSeenDestIp':            'dip',
                'firstSeenSrcPort':           'spt',
                'firstSeenDestPort':          'dpt',
                'moreFragments':              '_del4_',
                'contFragments':              '_del5_',
                'durationSeconds':            'dur',
                'firstSeenSrcPayloadBytes':   '_del6_',
                'firstSeenDestPayloadBytes':  '_del7_',
                'firstSeenSrcTotalBytes':     'soct',
                'firstSeenDestTotalBytes':    'doct',
                'firstSeenSrcPacketCount':    'spkt',
                'firstSeenDestPacketCount':   'dpkt',
                'recordForceOut':             '_del8_'})
df = df.drop(['_del1_', '_del2_', '_del3_', '_del4_', '_del5_', '_del6_', '_del7_', '_del8_'])
spreadLines(rt, df, [('sip', 'dip')], '172.30.0.4')