In [6]:
import logging
import os
import socket
from datetime import datetime
import argparse

import json
from pathlib import Path
import csv

import numpy as np
from tqdm import tqdm

import networkx as nx
import matplotlib.pyplot as plt

import pickle

from bokeh.io import show, output_notebook, push_notebook
from bokeh.models import Circle, MultiLine
from bokeh.plotting import figure, from_networkx, output_file, save
output_notebook()

import ipywidgets as widgets
from IPython.display import clear_output

import warnings
warnings.filterwarnings('ignore')


In [2]:
DATASET_BASE = Path('~/workspace/SyssecLab/data')

DATASET_PATHS = [
    DATASET_BASE / 'input',
    DATASET_BASE / 'output'
]

In [3]:
color_dict = {
    'SocketChannelNode': 'yellow',
    'FileNode': 'pink',
    'ProcessNode': 'green',
}

attr_dict = {
    'SocketChannelNode': 'REMOTE_INET_ADDR',
    'FileNode': 'FILENAME_SET',
    'ProcessNode': 'CMD',
}

graph_physical_features = [
    'clustering_coefficient', 'clustering_triangles', 'degree_centrality',
    'betweenness_centrality', 'closeness_centrality', 'eigenvector_centrality'
]

In [4]:
def load_labels(dataset_path: Path) -> tuple[Path, dict[str, str]]:
    with (dataset_path / 'graph_labels.pickle').open('rb') as f:
        return dataset_path, pickle.load(f)

dataset_picker = widgets.interactive(load_labels,
                                     dataset_path=DATASET_PATHS)
display(dataset_picker)

interactive(children=(Dropdown(description='dataset_path', options=(PosixPath('/syssec-nas1/prov-research/data…

In [21]:
dataset_path, graph_labels = dataset_picker.result

unique_labels = sorted(set(graph_labels.values()))

def filter_by_label(label: str) -> list[Path]:
    all_examples = []
    for split in ['train', 'test', 'validation']:
        examples_in_split = [candidate / 'graph.json' for candidate in (dataset_path / split).iterdir() if graph_labels.get(candidate.name, None) == label]
        all_examples.extend(examples_in_split)
    return all_examples

label_picker = widgets.interactive(filter_by_label, label=unique_labels)
display(label_picker)

interactive(children=(Dropdown(description='label', options=('A', 'B', 'C', 'D', 'E', 'F'), value='A'), Output…

In [39]:
def networkXconverterNoFeatures(graph_json_path):
    '''
    functions takes in the json of asi-sv graph
    and converts it to NetworkX graph
    does not import node or edge features

    :param PATH: json file path
    :return: directed graph
    '''

    directed_graph = nx.DiGraph()

    try:
        # read the json file
        data = open(graph_json_path, 'r')
        loaded_data = json.load(data)

        node_color_dict = {}
        node_label_dict = {}
        node_type_dict = {}

        # get the vertices index and add it to the graph
        for v in loaded_data['vertices']:

            node_id = int(v['_id'])
            node_type = v['TYPE']['value']

            try:
                node_data = v[attr_dict[node_type]]['value'][0]['value']
            except TypeError:
                node_data = v[attr_dict[node_type]]['value']
            except IndexError:
                node_data = v[attr_dict[node_type]]['value']
            except KeyError:
                node_data = ''

            # print(f'{node_id} {node_type} {color_dict[node_type]} {node_data}')

            node_color_dict[node_id] = color_dict[node_type]
            node_label_dict[node_id] = node_data  # str(node_id)
            node_type_dict[node_id] = node_type
            directed_graph.add_node(node_id)

        # get the edge index (src, dst) and add it to the graph
        for e in loaded_data['edges']:
            directed_graph.add_edge(int(e['_outV']), int(e['_inV']))

        # log.info(f"# nodes: {directed_graph.number_of_nodes()} # edges: {directed_graph.number_of_edges()}")

        # return the filled graph
        return directed_graph, node_color_dict, node_label_dict, node_type_dict
    except UnicodeDecodeError:
        #log.info(f'.json graph error: {graph_json_path}')
        return [], [], [], []
    
def bokehPlot(G, node_color_dict, node_label_dict, path, save_graph: bool = False):

    title = path.name

    # Establish which categories will appear when hovering over each node
    HOVER_TOOLTIPS = [('data', '@label'),
                      ('clustering_coefficient', '@clustering_coefficient'),
                      ('triangles', '@clustering_triangles'),
                      ('degree_centrality', '@degree_centrality'),
                      ('betweenness_centrality', '@betweenness_centrality'),
                      ('closeness_centrality', '@closeness_centrality'),
                      ('eigenvector_centrality', '@eigenvector_centrality')]

    # Create a plot — set dimensions, toolbar, and title
    plot = figure(tooltips=HOVER_TOOLTIPS,
                  tools='pan,wheel_zoom,save,reset',
                  active_scroll='wheel_zoom',
                  title=title)
    plot.xgrid.grid_line_color = None
    plot.ygrid.grid_line_color = None

    network_graph = from_networkx(G, nx.nx_pydot.graphviz_layout(G))
    # Add stuff
    with open(path / 'graph_properties_ext.json') as ext_graph_data:
        graph_data = json.load(ext_graph_data)

        for feat in graph_physical_features:
            if feat == 'eigenvector_centrality' and 'error' in list(
                    graph_data[feat].keys()):
                print(graph_data[feat])
                graph_data[feat] = {
                    k: 0
                    for k in graph_data['closeness_centrality'].keys()
                }

            network_graph.node_renderer.data_source.data[feat] = list(
                graph_data[feat].values())

        network_graph.node_renderer.data_source.data['index'] = list(
            node_color_dict.keys())
        network_graph.node_renderer.data_source.data['label'] = list(
            node_label_dict.values())
        network_graph.node_renderer.data_source.data['colors'] = list(
            node_color_dict.values())

        # Set node size and color
        network_graph.node_renderer.glyph = Circle(size=15,
                                                   fill_color='colors')

    # Set edge opacity and width
    network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)

    # Add network graph to the plot
    plot.renderers.append(network_graph)

    if save_graph:
        out_file = Path(f'{dataset_path.name}/{label_picker.kwargs["label"]}/{title}.html')
        out_file.parent.mkdir(parents=True, exist_ok=True)
        save(plot, str(out_file.resolve()))
    else:
        show(plot, notebook_handle=True)


In [40]:
label_examples = label_picker.result

current_index = 0

progress = widgets.IntProgress(value=current_index, min=0, max=len(label_examples), description=f'{current_index}/{len(label_examples)}')

def process_graph(graph_path, save: bool = False):
    g, node_color, node_label, node_type = networkXconverterNoFeatures(graph_path)
    return bokehPlot(g, node_color, node_label, graph_path.parent, save)

def handle_next(arg):
    global current_index, progress
    if current_index < len(label_examples) - 1:
        current_index += 1
    clear_output(True)
    progress.value = current_index
    progress.description = f'{progress.value}/{progress.max}'
    process_graph(label_examples[current_index])

def handle_prev(arg):
    global current_index, progress
    if current_index > 0:
        current_index -= 1
    clear_output(True)
    progress.value = current_index
    progress.description = f'{progress.value}/{progress.max}'
    process_graph(label_examples[current_index])

def handle_save(arg):
    process_graph(label_examples[current_index], save=True)

next_button = widgets.Button(description='next')
next_button.on_click(handle_next)

prev_button = widgets.Button(description='prev')
prev_button.on_click(handle_prev)

save_button = widgets.Button(description='save')
save_button.on_click(handle_save)

# kickstart
process_graph(label_examples[current_index])

In [5]:
current_index

NameError: name 'current_index' is not defined

In [41]:
display(widgets.Label(f'Viewing {dataset_path.parent.name}/{dataset_path.name}, label {label_picker.kwargs["label"]}'), widgets.HBox([prev_button, next_button, save_button]), progress)


Label(value='Viewing multi-class/POWERSHELL.EXE, label F')

HBox(children=(Button(description='prev', style=ButtonStyle()), Button(description='next', style=ButtonStyle()…

IntProgress(value=0, description='0/644', max=644)