## Loading iGraph

In [18]:
# Basic imports
import igraph as ig # type: ignore
import matplotlib.pyplot as plt # type: ignore
import lzma
import pickle

path="h2_attack.pickle.xz"
# Function to load the pickled graph (from graph_helpers.py)
def pickle_read(path):
    if path.endswith("xz"):
        opener = lzma.open
    else:
        opener = open
    with opener(path, "rb") as f:
        return pickle.load(f)

# Load the attack graph from pickle
graph = pickle_read(path)
print(f"Graph loaded: {len(graph.vs)} vertices, {len(graph.es)} edges")

Graph loaded: 32893 vertices, 201979 edges


In [2]:
pip install -r requirements.txt

[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[0mCollecting igraph (from -r requirements.txt (line 1))
  Using cached igraph-0.11.6-cp39-abi3-macosx_10_9_x86_64.whl.metadata (3.9 kB)
Collecting texttable>=1.6.2 (from igraph->-r requirements.txt (line 1))
  Using cached texttable-1.7.0-py2.py3-none-any.whl.metadata (9.8 kB)
Using cached igraph-0.11.6-cp39-abi3-macosx_10_9_x86_64.whl (1.9 MB)
Using cached texttable-1.7.0-py2.py3-none-any.whl (10 kB)
Installing collected packages: texttable, igraph
[33m  DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m

## Filtering graph by time 

In [33]:
from dateutil.parser import parse
import datetime

def convert_to_unix_time(utc_timestamp, for_carbanak=True):
    """
    Converts a UTC timestamp string to Unix time.
    
    Args:
        utc_timestamp (str): The UTC timestamp string.
        for_carbanak (bool): If True, returns Unix time in microseconds (for Carbanak).
                             If False, returns Unix time in nanoseconds (for others).
                             
    Returns:
        int: Unix time in microseconds or nanoseconds.
    """
    dt_object = parse(utc_timestamp)
    
    # Convert to Unix timestamp in seconds
    unix_timestamp = datetime.datetime.timestamp(dt_object)
    
    if for_carbanak:
        # Return timestamp in microseconds
        return int(unix_timestamp * 1000000)
    else:
        # Return timestamp in nanoseconds
        return int(unix_timestamp * 1000000000)

def filter_graph_by_time(graph, start_time, end_time):
    """Filters the graph to include only nodes and edges within the specified time range."""
    start_unix = convert_to_unix_time(start_time)
    end_unix = convert_to_unix_time(end_time)

    # Filter vertices and edges by time
    subgraph = graph.subgraph_edges(
        [e.index for e in graph.es if start_unix <= e['time'] <= end_unix]
    )
    return subgraph

# UTC times (easier to normalize)
start_time = "2024-04-8 00:00:00 +0000"
end_time = "2024-05-30 23:59:59 +0000"
start_time = "2024-05-10 00:00:00 +0000"
end_time = "2024-05-10 23:59:59 +0000"
# filtering 
slicedgraph = filter_graph_by_time(graph, start_time, end_time)
print(f"Graph filtered by time from {start_time} to {end_time}, updated graph: {len(slicedgraph.vs)} vertices, {len(slicedgraph.es)} edges")
graph=slicedgraph

Graph filtered by time from 2024-05-10 00:00:00 +0000 to 2024-05-10 23:59:59 +0000, updated graph: 12788 vertices, 45288 edges



## Relabling Nodes and Edges

In [34]:
# Apply relabeling to the graph
from graph_helpers import relabels

relabels(graph)
print("Node and edge relabeling complete.")

Node and edge relabeling complete.


## Find Nodes by Keywords

Useful to get uuids of nodes for seeding. 

In [37]:
# Function to find nodes by a keyword in the node name
def find_nodes_by_keyword(attack_graph, keyword):
    matching_nodes = []
    for vertex in attack_graph.vs:
        if keyword.lower() in vertex['name'].lower():
            matching_nodes.append({
                'uuid': vertex['uuid'],
                'name': vertex['name'],
                'type': vertex['type']
            })
    return matching_nodes

# Example usage:
keyword = "Java-Update" # root cause IP 
matching_nodes = find_nodes_by_keyword(graph, keyword)

# Print the matching nodes
for node in matching_nodes:
    print(f" Type: {node['type']}, UUID: {[node['uuid']]}, Name: {node['name']}")

# Adding Min,Max Times, Contaminating graph and Coloring Graph (Labelling Methodology)

In [36]:
# adding min, max time to vertex nodes 
from graph_helpers import vertex_times

vertex_times(graph)

print(graph.vs.attributes())

['type', 'name', 'time', 'uuid', 'min_time', 'max_time']


In [None]:
from graph_helpers import contaminate_graph

seed_file = "h2_attack.csv" # add seed file here

contaminate_graph(graph, seed_file)

In [None]:
from graph_helpers import color_graph

# label/color the graph based on seed file
color_graph(graph, seed_file)

In [None]:
# labels
for v in range(0,len(graph.vs)):
    vertex = graph.vs[v]
    if vertex["attack_label"]:
        vertex["label"] = "attack"
    elif vertex["contaminate_label"]:
        vertex["label"] = "contaminated"
    else:
        vertex["label"] = "benign"

c = 0
for v in range(0,len(graph.vs)):
    vertex = graph.vs[v]
    if vertex["type"] == "process" and vertex["label"] == "attack":
        print("%s, %s, %s" % (vertex["uuid"], vertex["name"], vertex["label"]))
        c+=1 

print(c)

In [None]:
c = 0
for v in range(0,len(graph.vs)):
    vertex = graph.vs[v]
    if vertex["type"] == "process" and vertex["label"] == "contaminated":
        print("%s, %s, %s" % (vertex["uuid"], vertex["name"], vertex["label"]))
        c+=1 
print(c)

# Visualization (Pruning)

In [None]:
from graph_helpers import attack_only, declone_processes, prune_edges, merge_vertices, prune_vertices

print("Filtering attack only igraph")
attack_only(graph)
print(f"Updated graph: {len(graph.vs)} vertices, {len(graph.es)} edges")
print("Decloning processes...")
declone_processes(graph)
print(f"Updated graph: {len(graph.vs)} vertices, {len(graph.es)} edges")
print("Pruning Edges...")
prune_edges(graph)
print(f"Updated graph: {len(graph.vs)} vertices, {len(graph.es)} edges")
print("Merging Vertices...")
merge_vertices(graph)
print(f"Updated graph: {len(graph.vs)} vertices, {len(graph.es)} edges")
print("Re-pruning Edges...")
prune_edges(graph)
print(f"Updated graph: {len(graph.vs)} vertices, {len(graph.es)} edges")
print("Pruning Vertices...")
prune_vertices(graph) 
print(f"Updated graph: {len(graph.vs)} vertices, {len(graph.es)} edges") 

In [None]:
pdf_file = "plots/theia_3.3_success_gatech_profile.pdf"

g = graph

print("Plotting %s of size V=%d, E=%d..." %(pdf_file, len(g.vs), len(g.es)))
layout = g.layout_davidson_harel()
g.vs["label"] = [g.vs[i]["name"] for i in range(0,len(g.vs))]
g.vs["shape"] = ["rectangle" for t in g.vs["name"]]
g.vs["height"] = [25 for n in g.vs["name"]]
g.vs["width"] = [20 + 10*(len(n)-1) for n in g.vs["name"]]
g.es["label"] = [t for t in g.es["type"]]
#lamport_timestamps(g)
g.es["arrow_size"] = [1.25 for t in g.es["type"]]
ig.plot(g, pdf_file, layout=layout, bbox=(2560, 1080), margin=200)
