# Sequencial method

In [1]:
import pandas as pd
import numpy as np
import random
from tqdm import tqdm
from pprint import pprint
from collections import Counter
from pyattck import Attck
import networkx as nx
from pyvis.network import Network
from itertools import product
import itertools

# Init pyatt lib
attck = Attck()

In [2]:
tacticcolumns = []
for tactic in attck.enterprise.tactics:
    templist = []
    for teachniques in tactic.techniques:
        if 'Windows' in teachniques.platforms:
            templist.append(teachniques.id)
    tacticcolumns.append((tactic.id, sorted(templist)))

In [97]:
tacticcolumns[0]

('TA0009',
 ['T1005',
  'T1025',
  'T1039',
  'T1056',
  'T1056.001',
  'T1056.002',
  'T1056.003',
  'T1056.004',
  'T1074',
  'T1074.001',
  'T1074.002',
  'T1113',
  'T1114',
  'T1114.001',
  'T1114.002',
  'T1114.003',
  'T1115',
  'T1119',
  'T1123',
  'T1125',
  'T1185',
  'T1213',
  'T1213.002',
  'T1557',
  'T1557.001',
  'T1557.002',
  'T1560',
  'T1560.001',
  'T1560.002',
  'T1560.003'])

In [3]:
# Gather and store edge pairs using n-gram(Works)
edgelist = []

for actor in attck.enterprise.actors:
    tempactorlist = []
    edge_i = [x.id for x in actor.techniques]
    
    for items in tacticcolumns:
        temptech = []
        for tech in edge_i:
            if tech in items[1]:
                temptech.append(tech)
        if temptech:
            tempactorlist.append(temptech)
    edgelist.append(tempactorlist)      

In [4]:
newedgelist = []
for x in edgelist:
    newedgelist.append(list(itertools.combinations(itertools.chain(*x), 2)))
newedgelist = [y for x in newedgelist for y in x]

In [5]:
newgraph = nx.DiGraph(newedgelist[100:250])
nt = Network('1000px', '1000px', directed=True)
nt.toggle_physics(False)
nt.from_nx(newgraph)
nt.show('nx.html')

In [6]:
# User counter to count number of time edges appear to create weights
counter = Counter(sorted(newedgelist))
counter

Counter({('T1001', 'T1001.002'): 1,
         ('T1001', 'T1003'): 1,
         ('T1001', 'T1003.001'): 1,
         ('T1001', 'T1003.006'): 1,
         ('T1001', 'T1007'): 1,
         ('T1001', 'T1012'): 1,
         ('T1001', 'T1016'): 1,
         ('T1001', 'T1018'): 1,
         ('T1001', 'T1021.001'): 1,
         ('T1001', 'T1021.002'): 1,
         ('T1001', 'T1027'): 1,
         ('T1001', 'T1027.005'): 1,
         ('T1001', 'T1033'): 1,
         ('T1001', 'T1041'): 1,
         ('T1001', 'T1046'): 1,
         ('T1001', 'T1047'): 1,
         ('T1001', 'T1049'): 1,
         ('T1001', 'T1053.005'): 3,
         ('T1001', 'T1055'): 2,
         ('T1001', 'T1056.001'): 1,
         ('T1001', 'T1057'): 1,
         ('T1001', 'T1059.001'): 1,
         ('T1001', 'T1059.003'): 1,
         ('T1001', 'T1059.005'): 1,
         ('T1001', 'T1059.006'): 1,
         ('T1001', 'T1069.001'): 1,
         ('T1001', 'T1070.001'): 1,
         ('T1001', 'T1070.004'): 1,
         ('T1001', 'T1078'): 4,
         ('T

In [7]:
# Store each value in array list
sourcelist = []
targetlist = []
weightlist = []
for key, value in counter.items():
    sourcelist.append(key[0])
    targetlist.append(key[1])
    weightlist.append(value)
    
# check if the len are same value
print(len(sourcelist))
print(len(targetlist))
print(len(weightlist))

17543
17543
17543


In [8]:
# Store the nodes and weights in dataframe
pd.set_option('display.max_rows', 20)
edges = pd.DataFrame(
    {
        "source": sourcelist,#sourceId,
        "target": targetlist, #targetlist
        "weight": weightlist
    }
)
edges

Unnamed: 0,source,target,weight
0,T1001,T1001.002,1
1,T1001,T1003,1
2,T1001,T1003.001,1
3,T1001,T1003.006,1
4,T1001,T1007,1
...,...,...,...
17538,T1606.002,T1562.002,2
17539,T1606.002,T1562.004,2
17540,T1606.002,T1566.001,1
17541,T1606.002,T1566.002,1


In [9]:
# Create a graph using the dataframe
G = nx.from_pandas_edgelist(edges, edge_attr=True, 
                            create_using=nx.DiGraph()
                           )

In [107]:
def getpath(rootnode):
    # Get the DFS tree. 1 is the center, 100 is the BFS length. Note, that
    # long lengths MAY waste a lot of computing time
    B = nx.dfs_tree(G, rootnode, 10)
    # Get our center
    root = list(v for v, d in B.in_degree() if d == 0)[0]
    # Get all leaves _in_DFS_tree
    leaves = (v for v, d in B.out_degree() if d == 0)
    # Get all paths
    all_paths = [nx.shortest_path(B, root, l) for l in leaves]
    # Get all sorted pairs [path, path_length]
    result = sorted(
        [
            (
                path, sum((G.edges[path[i], path[i+1]]['weight'])
                for i in range(len(path) - 1))
            )
            for path in all_paths
        ],
        key=lambda x: x[1],
        reverse=True
    )

    path = result[0][0]
    return path


# DFS: exploring path to completion

In [108]:
rootnode = 'T1030'

In [109]:
treeresult = [getpath(x) for x in getpath(rootnode)]
treeresult

[['T1030',
  'T1021.006',
  'T1021.002',
  'T1021.001',
  'T1021.005',
  'T1053.005',
  'T1020',
  'T1052.001',
  'T1055.001',
  'T1027',
  'T1070.004'],
 ['T1021.006',
  'T1021.002',
  'T1021.001',
  'T1021.005',
  'T1053.005',
  'T1020',
  'T1052.001',
  'T1055.001',
  'T1007',
  'T1012',
  'T1078'],
 ['T1021.002',
  'T1021.001',
  'T1021.005',
  'T1053.005',
  'T1020',
  'T1052.001',
  'T1055.001',
  'T1007',
  'T1012',
  'T1016',
  'T1547.001'],
 ['T1021.001',
  'T1021.002',
  'T1021.005',
  'T1053.005',
  'T1020',
  'T1052.001',
  'T1055.001',
  'T1007',
  'T1012',
  'T1016',
  'T1547.001'],
 ['T1021.005',
  'T1053.005',
  'T1020',
  'T1052.001',
  'T1055.001',
  'T1007',
  'T1012',
  'T1016',
  'T1016.001',
  'T1018',
  'T1078'],
 ['T1053.005',
  'T1020',
  'T1052.001',
  'T1055.001',
  'T1007',
  'T1012',
  'T1016',
  'T1016.001',
  'T1018',
  'T1021.001',
  'T1078'],
 ['T1020',
  'T1052.001',
  'T1053.005',
  'T1021.001',
  'T1021.002',
  'T1021.005',
  'T1078',
  'T1007',
  'T