In [23]:
import pandas as pd
import numpy as np
import random
from tqdm import tqdm
from pprint import pprint
from collections import Counter
from pyattck import Attck
import networkx as nx
from pyvis.network import Network
from itertools import product
import itertools

# Init pyatt lib
attck = Attck()

# Using n-gram edge pairs

In [3]:
# Gather and store edge pairs using n-gram(Works)
edgelist = []

for actor in attck.enterprise.actors:
    edge_i = [x.id for x in actor.techniques]
    for i in range(len(edge_i)-1):
        edgelist.append((edge_i[i], edge_i[i+1]))

In [4]:
newgraph = nx.DiGraph(edgelist[100:250])
nt = Network('1000px', '1000px', directed=True)
nt.toggle_physics(False)
nt.from_nx(newgraph)
nt.show('nx.html')

# Using co-relational pairs

In [5]:
# Gather and store edge pairs using co-relation(does not work)
edgelist = []

for actor in attck.enterprise.actors:
    edge_i = [x.id for x in actor.techniques]
    edge_j = [x.id for x in actor.techniques]
    
    for x in edge_i:
        for y in edge_i:
            if not x == y:
                edgelist.append((x, y))

In [6]:
newgraph = nx.DiGraph(edgelist[100:250])
nt = Network('1000px', '1000px', directed=True)
nt.toggle_physics(False)
nt.from_nx(newgraph)
nt.show('nx.html')

In [7]:
# User counter to count number of time edges appear to create weights
counter = Counter(sorted(edgelist))
counter

Counter({('T1001', 'T1001.002'): 1,
         ('T1001', 'T1003'): 1,
         ('T1001', 'T1003.001'): 1,
         ('T1001', 'T1003.006'): 1,
         ('T1001', 'T1005'): 1,
         ('T1001', 'T1007'): 1,
         ('T1001', 'T1012'): 1,
         ('T1001', 'T1016'): 1,
         ('T1001', 'T1018'): 1,
         ('T1001', 'T1021.001'): 1,
         ('T1001', 'T1021.002'): 1,
         ('T1001', 'T1027'): 1,
         ('T1001', 'T1027.005'): 1,
         ('T1001', 'T1033'): 1,
         ('T1001', 'T1041'): 1,
         ('T1001', 'T1046'): 1,
         ('T1001', 'T1047'): 1,
         ('T1001', 'T1049'): 1,
         ('T1001', 'T1053.005'): 1,
         ('T1001', 'T1055'): 1,
         ('T1001', 'T1056.001'): 1,
         ('T1001', 'T1057'): 1,
         ('T1001', 'T1059.001'): 1,
         ('T1001', 'T1059.003'): 1,
         ('T1001', 'T1059.005'): 1,
         ('T1001', 'T1059.006'): 1,
         ('T1001', 'T1069.001'): 1,
         ('T1001', 'T1070.001'): 1,
         ('T1001', 'T1070.004'): 1,
         ('T

In [8]:
# Store each value in array list
sourcelist = []
targetlist = []
weightlist = []
for key, value in counter.items():
    sourcelist.append(key[0])
    targetlist.append(key[1])
    weightlist.append(value)
    
# check if the len are same value
print(len(sourcelist))
print(len(targetlist))
print(len(weightlist))

38402
38402
38402


In [9]:
# Store the nodes and weights in dataframe
pd.set_option('display.max_rows', 20)
edges = pd.DataFrame(
    {
        "source": sourcelist,#sourceId,
        "target": targetlist, #targetlist
        "weight": weightlist
    }
)
edges

Unnamed: 0,source,target,weight
0,T1001,T1001.002,1
1,T1001,T1003,1
2,T1001,T1003.001,1
3,T1001,T1003.006,1
4,T1001,T1005,1
...,...,...,...
38397,T1608.005,T1588.004,1
38398,T1608.005,T1589.002,1
38399,T1608.005,T1589.003,1
38400,T1608.005,T1594,1


In [10]:
# Create a graph using the dataframe
G = nx.from_pandas_edgelist(edges, edge_attr=True, 
                            create_using=nx.DiGraph()
                           )

# All shortest path

In [11]:
paths = []
for node in G.nodes():
    if G.out_degree(node)==0: #it's a leaf
        paths.append(nx.shortest_path(G, 'T1003.001', node))
paths

[]

# all connected component

In [12]:
def getpath(rootnode):
    # Get the BFS tree. 1 is the center, 100 is the BFS length. Note, that
    # long lengths MAY waste a lot of computing time
    B = nx.bfs_tree(G, rootnode, 100)
    # Get our center
    root = list(v for v, d in B.in_degree() if d == 0)[0]
    # Get all leaves _in_BFS_tree
    leaves = (v for v, d in B.out_degree() if d == 0)
    # Get all paths
    all_paths = [nx.shortest_path(B, root, l) for l in leaves]
    # Get all sorted pairs [path, path_length]
    result = sorted(
        [
            (
                path, sum((G.edges[path[i+1], path[i]]['weight'])
                for i in range(len(path) - 1))
            )
            for path in all_paths
        ],
        key=lambda x: x[1],
        reverse=True
    )

    path = result[0][0]
    return path

In [13]:
[getpath(x) for x in getpath('T1053.001')]

NetworkXError: The node T1053.001 is not in the digraph.

# MITRE Matrix Visualisation

In [15]:
tacticcolumns = {}
for tactic in attck.enterprise.tactics:
    templist = []
    for teachniques in tactic.techniques:
        templist.append(teachniques.id)
    tacticcolumns[tactic.id] = sorted(templist)

import collections
tacticcolumns = collections.OrderedDict(sorted(tacticcolumns.items()))
pd.set_option('display.max_rows', None)
df = pd.DataFrame.from_dict(tacticcolumns, orient='index')
df = df.fillna("-")
df = df.transpose()
df

Unnamed: 0,TA0001,TA0002,TA0003,TA0004,TA0005,TA0006,TA0007,TA0008,TA0009,TA0010,TA0011,TA0040,TA0042,TA0043
0,T1078,T1047,T1034,T1034,T1006,T1003,T1007,T1021,T1005,T1011,T1001,T1485,T1583,T1589
1,T1078.001,T1053,T1037,T1037,T1014,T1003.001,T1010,T1021.001,T1025,T1011.001,T1001.001,T1486,T1583.001,T1589.001
2,T1078.002,T1053.001,T1037.001,T1037.001,T1027,T1003.002,T1012,T1021.002,T1039,T1020,T1001.002,T1489,T1583.002,T1589.002
3,T1078.003,T1053.002,T1037.002,T1037.002,T1027.001,T1003.003,T1016,T1021.003,T1056,T1020.001,T1001.003,T1490,T1583.003,T1589.003
4,T1078.004,T1053.003,T1037.003,T1037.003,T1027.002,T1003.004,T1016.001,T1021.004,T1056.001,T1029,T1008,T1491,T1583.004,T1590
5,T1091,T1053.004,T1037.004,T1037.004,T1027.003,T1003.005,T1018,T1021.005,T1056.002,T1030,T1026,T1491.001,T1583.005,T1590.001
6,T1133,T1053.005,T1037.005,T1037.005,T1027.004,T1003.006,T1033,T1021.006,T1056.003,T1041,T1043,T1491.002,T1583.006,T1590.002
7,T1189,T1053.006,T1053,T1053,T1027.005,T1003.007,T1040,T1051,T1056.004,T1048,T1071,T1495,T1584,T1590.003
8,T1190,T1053.007,T1053.001,T1053.001,T1036,T1003.008,T1046,T1072,T1074,T1048.001,T1071.001,T1496,T1584.001,T1590.004
9,T1195,T1059,T1053.002,T1053.002,T1036.001,T1040,T1049,T1080,T1074.001,T1048.002,T1071.002,T1498,T1584.002,T1590.005


In [None]:
selectedpath = 'T1018'
def create_colors(x):
    #copy df to new - original data are not changed
    df1 = x.copy()
    #select all values to default value - no color
    df1.loc[:,:] = 'background-color: '
    #overwrite values with green and red color
    for i in getpath(selectedpath):
        valuehighlight = i
        getcell = [(df[col][df[col].eq(valuehighlight)].index[i], col) for col in df.columns for i in range(len(df[col][df[col].eq(valuehighlight)].index))]
        getcell = getcell[0]
        df1.loc[getcell[0], getcell[1]] = 'background-color: green'
    
    return df1      

df.style.apply(create_colors, axis=None)

In [None]:
selectedpath = 'T1556.002'
def create_colors(x):
    #copy df to new - original data are not changed
    df1 = x.copy()
    #select all values to default value - no color
    df1.loc[:,:] = 'background-color: '
    #overwrite values with green and red color
    for i in getpath('T1484.001'):
        valuehighlight = i
        getcell = [(df[col][df[col].eq(valuehighlight)].index[i], col) for col in df.columns for i in range(len(df[col][df[col].eq(valuehighlight)].index))]
        getcell = getcell[0]
        df1.loc[getcell[0], getcell[1]] = 'background-color: green'
    
    return df1      

df.style.apply(create_colors, axis=None)

# Using n-graph for edges, co-relational for pairs

In [None]:
# Gather and store edge pairs using n-gram(Works)
edgelist = []

for actor in attck.enterprise.actors:
    edge_i = [x.id for x in actor.techniques]
    for i in range(len(edge_i)-1):
        edgelist.append((edge_i[i], edge_i[i+1]))
        
# Gather and store edge pairs using co-relation(does not work)
edgeweight = []

for actor in attck.enterprise.actors:
    edge_i = [x.id for x in actor.techniques]
    edge_j = [x.id for x in actor.techniques]
    
    for x in edge_i:
        for y in edge_i:
            if not x == y:
                edgeweight.append((x, y))

In [None]:
# User counter to count number of time edges appear to create weights
counter = Counter(sorted(edgeweight))

# Store each value in array list
sourcelist = []
targetlist = []
weightlist = []

for items in edgelist:
    for key, value in counter.items():
        if items[0]==key[0] and items[1]==key[1]:
            sourcelist.append(key[0])
            targetlist.append(key[1])
            weightlist.append(value)
    
# check if the len are same value
print(len(sourcelist))
print(len(targetlist))
print(len(weightlist))

In [None]:
# Store the nodes and weights in dataframe
pd.set_option('display.max_rows', 20)
edges = pd.DataFrame(
    {
        "source": sourcelist,#sourceId,
        "target": targetlist, #targetlist
        "weight": weightlist
    }
)
edges

In [None]:
# Create a graph using the dataframe
G = nx.from_pandas_edgelist(edges, edge_attr=True,create_using=nx.DiGraph())

In [None]:
def getpath(rootnode):
    # Get the BFS tree. 1 is the center, 100 is the BFS length. Note, that
    # long lengths MAY waste a lot of computing time
    B = nx.bfs_tree(G, rootnode, 100)
    # Get our center
    root = list(v for v, d in B.in_degree() if d == 0)[0]
    # Get all leaves _in_BFS_tree
    leaves = (v for v, d in B.out_degree() if d == 0)
    # Get all paths
    all_paths = [nx.shortest_path(B, root, l) for l in leaves]
    # Get all sorted pairs [path, path_length]
    result = sorted(
        [
            (
                path, sum((G.edges[path[i+1], path[i]]['weight'])
                for i in range(len(path) - 1))
            )
            for path in all_paths
        ],
        key=lambda x: x[1],
        reverse=True
    )

    path = result[0][0]
    return path

[getpath(x) for x in getpath('T1059.001')]

In [None]:
selectedpath = 'T1566.001'
def create_colors(x):
    #copy df to new - original data are not changed
    df1 = x.copy()
    #select all values to default value - no color
    df1.loc[:,:] = 'background-color: '
    #overwrite values with green and red color
    for i in getpath(selectedpath):
        valuehighlight = i
        getcell = [(df[col][df[col].eq(valuehighlight)].index[i], col) for col in df.columns for i in range(len(df[col][df[col].eq(valuehighlight)].index))]
        getcell = getcell[0]
        df1.loc[getcell[0], getcell[1]] = 'background-color: green'
    
    return df1      

df.style.apply(create_colors, axis=None)