In [34]:
import numpy as np
import pandas as pd
from pgmpy.factors.discrete import DiscreteFactor
from pgmpy.models import FactorGraph
from pgmpy.inference import BeliefPropagation

# install pgmpy using
# pip3 install pgmpy 

## Task 2 

### Task 2.1


In [35]:
G = FactorGraph() ## Create FactorGraph object
###############################
#   TODO: Define factor functions
###############################
f_1 = DiscreteFactor(['S1'],[2],[0.85,0.15])
f_2 = DiscreteFactor(['S1','E1'],[2,2],[0.1,0.2,0,0.5])

###############################
#   TODO: Add random variables
#         and factor functions 
###############################
G.add_nodes_from(['S1','E1'])  ## Add random variables 
G.add_factors(f_1,f_2)     ## Add factor functions

###############################
#   TODO: Add the edges for random 
#   variables and factor functions
###############################
G.add_edges_from([('S1',f_1),('S1',f_2),('E1',f_2)])

### Task 2.2

In [36]:
bp = BeliefPropagation(G)
#https://pgmpy.org/_modules/pgmpy/inference/ExactInference.html
###############################
#   TODO: Compute the marginal probability
###############################
margin = bp.query(variables = ['S1'],show_progress= False)
margin.normalize()
print(margin)

+-------+-----------+
| S1    |   phi(S1) |
| S1(0) |    0.7727 |
+-------+-----------+
| S1(1) |    0.2273 |
+-------+-----------+


### Task 2.3


In [37]:
print('State S1 = 0 (No Attack) maximises marginal probability of S1')

State S1 = 0 (No Attack) maximises marginal probability of S1


### Task 2.4


In [38]:
margin = bp.query(variables = ['S1'],evidence = {'E1':1},show_progress= False)
print('Evidence E1=1')
print(margin)

Evidence E1=1
+-------+-----------+
| S1    |   phi(S1) |
| S1(0) |    0.1700 |
+-------+-----------+
| S1(1) |    0.0750 |
+-------+-----------+


### Task 2.5


In [39]:
print('We see that when E1 = 1 (Scan observed), S1 = 0 (No Attack) is the most probable state.')

We see that when E1 = 1 (Scan observed), S1 = 0 (No Attack) is the most probable state.


### Task 2.6


In [40]:
print('For Task 2.2')

For Task 2.2


Joint Probability is given by $P(E1,S1)=\frac{1}{Z}f(S1)g(E1,S1)$ <br>
where, <br>

$Z = \sum_{S1} \sum_{E1} f(S1)g(E1,S1)$<br>
$Z = 0.85*(0.1+0.2) + 0.15*(0+0.5)$<br>
$z = 0.33$<br>

**To get P(S1) we marginalise E1**<br>
for S1 = 0: <br>
$P(S1=0) = 1/0.33 * 0.85 * (0.1+0.2) $ <br>
**$P(S1=0) = 0.7727$**


for S1 = 1: <br>
$P(S1=0) = 1/0.33 * 0.15 * (0+0.5) $ <br>
**$P(S1=0) = 0.2272$**

In [41]:
print('For Task 2.4')

For Task 2.4


**Conditional Probability when E1 = 1 is observed** <br>

$P(S1 = 0 | E1 = 1) = \frac{P(S1 = 0, E1 = 1)}{P(E1=1)} $ <br>
$P(S1 = 1 | E1 = 1) = \frac{P(S1 = 1, E1 = 1)}{P(E1=1)} $ <br>

P(S1 = 0, E1 = 1) = 0.2 x 0.85 = 0.170 <br>
P(S1 = 1, E1 = 1) = 0.5 x 0.15 = 0.075 <br>

## Task 3

In [42]:
ATTACK_EVENTS_MAP = {
    'Scan':1,
    'Login':2,
    'Sensitive_URI':3,
    'New_Kernel_Module':4,
    'DNS_Tunneling':5
}
ATTACK_STATES_MAP = {
    'benign': 1,
    'discovery': 2,
    'access': 3,
    'lateral_movement': 4,
    'privilege_escalation': 5,
    'persistence': 6,
    'defense_evasion': 7,
    'collection': 8,
    'exfiltration': 9,
    'command_control': 10,
    'execution': 11
}




### Task 3.0 

In [43]:
from collections import Counter,defaultdict

##  Set up DS
attack_events = list(ATTACK_EVENTS_MAP.keys())
attack_states = list(ATTACK_STATES_MAP.keys())

severity_factors = defaultdict(Counter)

## Loop through event reviews file
event_review = open('Datasets/event_review.txt', 'r') 
Lines = event_review.readlines() 
for line in Lines: 
    x = list([i[0] for i in map(str.split, line[7:].strip().split('//[Latent Attack State]'))])
    severity_factors[x[0]][x[1]] +=1
    
# print(severity_factors)  

## normalize the prob distribution
for k in severity_factors.keys():
    count = sum(severity_factors[k].values())
    for i in severity_factors[k].keys():
        severity_factors[k][i] /= count
print('Severity Factor Functions:')
print(severity_factors) 



Severity Factor Functions:
defaultdict(<class 'collections.Counter'>, {'Sensitive_URI': Counter({'benign': 0.5533333333333333, 'privilege_escalation': 0.44666666666666666}), 'Scan': Counter({'benign': 0.936, 'discovery': 0.064}), 'New_Kernel_Module': Counter({'benign': 0.875, 'persistence': 0.125}), 'DNS_Tunneling': Counter({'exfiltration': 0.98, 'benign': 0.02}), 'Login': Counter({'benign': 1.0})})


### Task 3.1

In [49]:
from itertools import islice

def sliding_window(seq, n=3):
    "credits: https://stackoverflow.com/questions/6822725/rolling-or-sliding-window-iterator"
    "Returns a sliding window (of width n) over data from the iterable"
    "   s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...                   "
    it = iter(seq)
    result = tuple(islice(it, n))
    if len(result) == n:
        yield result
    for elem in it:
        result = result[1:] + (elem,)
        yield result


##  Set up DS
commonality_factors = Counter()
repetitive_factors = Counter()

## Loop through attack sequences file

attack_sequences = open('Datasets/attack_sequences.txt', 'r') 
Lines = attack_sequences.readlines() 
for line in Lines:
    line = line.strip().split(' ')
#     print(line)
    for window in sliding_window(line,n = 3):
        if len(window) == 3:
            if len(set(window)) > 1:
                commonality_factors[tuple(window)] += 1
            else:
                repetitive_factors[tuple(window)] +=1
    
print('Most common continuous frequent sequence is ')
print(commonality_factors.most_common(1))
print('\nMost common continuous repetitive frequent sequence is ')
print(repetitive_factors.most_common(1))


Most common continuous frequent sequence is 
[(('Scan', 'Sensitive_URI', 'New_Kernel_Module'), 200)]

Most common continuous repetitive frequent sequence is 
[(('Sensitive_URI', 'Sensitive_URI', 'Sensitive_URI'), 186)]


### Task 3.2  You will have to submit the graph you draw through Compass

Checkpoint 1.5 submitted

### Task 3.3

In [47]:
G = FactorGraph() ## Create FactorGraph object
###############################
#   TODO: Define factor functions
###############################
f_1 = DiscreteFactor(['e1','s1'],[1,11])
f_2 = DiscreteFactor()
...
f_9 = DiscreteFactor()

r = DiscreteFactor()
c = DiscreteFactor() 
###############################
#   TODO: Add random variables
#         and factor functions 
###############################
G.add_nodes_from()  ## Add random variables 
G.add_factors()     ## Add factor functions

###############################
#   TODO: Add the edges for random 
#   variables and factor functions
###############################
G.add_edges_from()

###############################
#   TODO: Do the inference
###############################
bp = BeliefPropagation(G)

TypeError: __init__() missing 1 required positional argument: 'values'

### Task 3.4

#### a. At every time point, provide the marginal probability of each state (Since we have 9 time points and 11 possible states, you should provide 99 probability values here)

#### b. At every time point, provide the most probable state

### Task 3.5

In [None]:
ACTIONS = {
    # each value in an actions' vector corresponds to an attack stage
    'NO-OP':   [1.,   0.61, 0.69, 0.09, 0.2 , 0. ,  0.,   0.,   0. ,  0. ,  0.  ],
    'MONITOR': [0.  , 0.39, 0.31 ,0.84, 0.63, 0.7,  0.07 ,0.1 , 0. ,  0. ,  0.  ],
    'STOP':    [0.  , 0.,   0.  , 0.07, 0.17, 0.3,  0.93 ,0.9 , 1. ,  1. ,  1.  ]
}

### Task 3.6

#### Indicate the earliest stage in which your model should recommend stopping the attack

### Task 3.7

#### a. Judge whether the most probable states for $s_1-s_6,s_8,s_9$ remain the same as Task3.2
#### b. State the reason for your judgement

### Task 3.8

#### a. Draw an HMM model for the attack scenario given the provided states and events.
#### b. What parameters are needed for this HMM model to work?
#### c. Give an example of an advantage of the FG over the HMM model.