In [1]:
import numpy as np
import time
import os
import numpy as np
import scipy as sp
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors
from random import sample
import csv
from collections import Counter
# Imports from the func.py file
from func import *
import random

In [2]:
# Reads the graph from the graphml file.
G = nx.read_graphml('../data/graph0.graphml', node_type=int)

In [3]:
# Creates an adjacency_list for the graph.
adjacency_list = {}

for node in G.nodes:
    adjacency_list[node] = []

for source, dest in G.edges:
    adjacency_list[source].append(dest)

# Important nodes are computers which are connected to others.
important_nodes = []
for node in adjacency_list:
    if len(adjacency_list[node]) > 0:
        important_nodes.append(node)

In [4]:
# Converts list of tuples into just a list of nodes.
def convertTupleListToList(tuple_list):
    ret = []
    
    for t in tuple_list:
        ret.append(t[1])
        
    return ret

In [5]:
# Generates a ranking of nodes from highest in degree to lowest.
def inDegreeRanking(adjacency_list):
    
    # Counts in degree.
    counter = Counter()
    for node in adjacency_list:
        counter[node] = 0
        for neighbour in adjacency_list[node]:
            counter[neighbour]+=1
    
    # Converts count into list of tuples in format (in degree, node)
    ranking = []
    
    for node in counter:
        ranking.append((counter[node], node))
    
    ranking.sort()
    ranking.reverse()
    
    return convertTupleListToList(ranking)

In [6]:
# Generates a ranking of nodes from highest out degree to lowest.
def outDegreeRanking(adjacency_list):
    
    ranking = []
    for node in adjacency_list:
        ranking.append((len(adjacency_list[node]), node))
    
    ranking.sort()
    ranking.reverse()
        
    return convertTupleListToList(ranking)

In [7]:
# Generates a random ranking of nodes.
def randomRanking(adjacency_list):
    
    ranking = []
    
    for node in adjacency_list:
        ranking.append(node)
    
    random.seed(1)
    random.shuffle(ranking)
    
    return ranking

In [8]:
def shortestPathBetweennessRanking(G):
    
    perc = .25
    reduced_weight = .1

    shortest_path_betweenness = nx.edge_betweenness_centrality(G)
    A_sp = create_weighted_adjacency_from_edge_betweenness(G, shortest_path_betweenness, perc, weight=reduced_weight)

    v = A_sp.A
    counter = Counter()
    
    ranking = []

    for i in range(len(v)):

        ranking.append((sum(v[i]), i))

    ranking.sort()
    ranking.reverse()

    return convertTupleListToList(ranking)

In [9]:
def localFlowBetweennessRanking(G):

    # Sets up Julia so that we can run Julia files.
    from julia import Julia
    import julia
    # Make sure to change this to your own julia file path.
    julia.install(julia=r"C:\Users\Daniel\AppData\Local\Programs\Julia-1.7.2\bin\julia.exe")
    julia.Julia(runtime=r"C:\Users\Daniel\AppData\Local\Programs\Julia-1.7.2\bin\julia.exe")
    from julia import Main

    # Includes the Julia files.
    Main.include("../julia/local_flow_betweenness.jl");
    
    perc = .25
    reduced_weight = .1

    local50_betweenness = Main.local_flow_betweenness(list(G.nodes()), list(G.edges()), locality_index=.5)
    A_lf50 = create_weighted_adjacency_from_edge_betweenness(G, local50_betweenness, perc, weight=reduced_weight)

    v = A_lf50.A
    counter = Counter()

    ranking = []

    for i in range(len(v)):

        ranking.append((sum(v[i]), i))

    ranking.sort()
    ranking.reverse()

    return convertTupleListToList(ranking)

In [10]:
def infectNode(adjacency_list, vaccinated, infected):
    
    # List of infectable nodes to select our next infection from.
    infectable = []

    # Go through each infected node and find their neighbours to find nodes to spread to.
    for node in infected:
        for neighbour in adjacency_list[node]:
            if neighbour not in vaccinated and neighbour not in infected:
                infectable.append(neighbour)
        
    # If there are no nodes to infect, return False, indicating that no further nodes can be infected.
    if len(infectable) == 0:
        return False
    
    # Pick a new node randomly from the list of infectable nodes and return True indicating a successful infection.
    infected.add(infectable[random.randrange(0, len(infectable))])
    return True

In [11]:
def vaccinateNode(ranking, vaccinated, infected):
        
    for node in ranking:
        if node not in vaccinated:
            vaccinated.add(node)
            if node in infected:
                infected.remove(node)
            return

In [12]:
def step(ranking, adjacency_list, vaccinated, infected):
    
    done = False
    reward = 0
    
    # If there are no more infections return the reward and say that we're done.
    if not infectNode(adjacency_list, vaccinated, infected):
        done = True
        reward = len(adjacency_list)/num_infected
        
    vaccinateNode(ranking, vaccinated, infected)
    
    return done

In [13]:
# Generates a random ranking of nodes for all the simulations to share.
def generateRandomPolicy(adjacency_list):
    
    ranking = []
    
    for node in adjacency_list:
        ranking.append(node)
    
    random.seed(1)
    random.shuffle(ranking)
    
    return ranking

In [14]:
def finishSimulation(random_policy, node_ranking, adjacency_list, vaccinated, infected, num_infected):
    
    SIMULATION_NUM = 5
    
    # Copy the vaccination so that changes aren't reflected globally.
    vaccinated_copy = vaccinated.copy()
    infected_copy = infected.copy()
    
    # Do a step to test the action we're interested in. If it returns True no new nodes were infected.
    if step(node_ranking, adjacency_list, vaccinated_copy, infected_copy):
        return 0, vaccinated_copy, infected_copy
    
    # Create a copy of the num_infected.
    new_num_infected = num_infected
    done = False
    
    # Repeat the simulation multiple times to account for randomness in spreading.
    for i in range(SIMULATION_NUM):
        
        # Create a copy of the copy of the vaccinated and infected sets for each simulation.
        vaccinated_copy_copy = vaccinated_copy.copy()
        infected_copy_copy = infected_copy.copy()
        
        # While we're still infecting nodes.
        while not done:
            done = step(random_policy, adjacency_list, vaccinated_copy_copy, infected_copy_copy)
            new_num_infected+=1
    
    average_infected = new_num_infected/SIMULATION_NUM
        
    return average_infected, vaccinated_copy, infected_copy

In [15]:
# List of lists where each element is an array paired with a label.
training_data = []
training_label = []

# Number of simulations to build training data from.
simulations = 1

In [16]:
node_ranking = []

node_ranking.append(inDegreeRanking(adjacency_list))
node_ranking.append(outDegreeRanking(adjacency_list))
node_ranking.append(randomRanking(adjacency_list))
# node_ranking.append(localFlowBetweennessRanking(G))
# node_ranking.append(shortestPathBetweennessRanking(G))

In [17]:
# Training loop
# Maybe try training with every important_node?
for i in range(len(important_nodes)):
    # We have to reset the test so we can move onto next epoch.
#     state, reward, done = env.reset()

    print(f"Got up to node {i}.")

    state = 0
    done = False
    
    infected = set()
    vaccinated = set()
    
    infection_source = important_nodes[i]

#     infection_source = important_nodes[random.randrange(0, len(important_nodes))]
    infected.add(infection_source)
    num_infected = 1
    
    # Array storing the vaccination status of each node.
    vaccination_state = [0]*len(adjacency_list)
    assert len(vaccination_state) == 501
        
    while not done:
        
        # Stores the results of the simulations.
        results = []

        # Create a policy to finish off the simulation for each action.
        random_policy = generateRandomPolicy(adjacency_list)

        # Go through each index which is the "action".
        for action in range(len(node_ranking)):

            # Try randomly finishing the simulation using that action.
            result, vaccinated_copy, infected_copy = finishSimulation(random_policy, node_ranking[action], adjacency_list, vaccinated, infected, num_infected)

            # Add the results to an array to pick from later.
            results.append([result, vaccinated_copy, infected_copy])

        best_result = float('inf')
        best_action = 0
        best_vaccinated = None
        best_infected = None

        # Find the action with best result and save it.
        for i, result in enumerate(results):
            res, vaccinated_copy, infected_copy = result
            if res < best_result:
                best_result = res
                best_action = i
                best_vaccinated = vaccinated_copy
                best_infected = infected_copy

        # Update the vaccinated and infected sets from the result we're sticking with.
        vaccinated = best_vaccinated
        infected = best_infected

        # Apply the action to the vaccination_state vector.
        for node in vaccinated:
            vaccination_state[node] = 1
            
        # Append that state and the action to the data.
        training_data.append(vaccination_state)
        training_label.append(best_action)
            
        if num_infected == len(infected):
            done = True
        else:
            num_infected+=1

Got up to node 0.
Got up to node 1.
Got up to node 2.
Got up to node 3.
Got up to node 4.
Got up to node 5.
Got up to node 6.
Got up to node 7.
Got up to node 8.
Got up to node 9.
Got up to node 10.
Got up to node 11.
Got up to node 12.
Got up to node 13.
Got up to node 14.
Got up to node 15.
Got up to node 16.
Got up to node 17.
Got up to node 18.
Got up to node 19.
Got up to node 20.
Got up to node 21.
Got up to node 22.
Got up to node 23.
Got up to node 24.
Got up to node 25.
Got up to node 26.
Got up to node 27.
Got up to node 28.
Got up to node 29.
Got up to node 30.
Got up to node 31.
Got up to node 32.
Got up to node 33.
Got up to node 34.
Got up to node 35.
Got up to node 36.
Got up to node 37.
Got up to node 38.
Got up to node 39.
Got up to node 40.
Got up to node 41.


In [18]:
import tensorflow as tf
from tensorflow import keras

In [19]:
print(len(training_data))

658


In [20]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(training_data, training_label, test_size = 0.2, random_state = 1)

In [21]:
print(y_train)
print(y_test)

[1, 0, 1, 2, 1, 2, 0, 1, 2, 2, 2, 2, 1, 0, 1, 1, 2, 2, 0, 2, 2, 2, 0, 0, 0, 1, 0, 0, 1, 1, 2, 0, 2, 1, 2, 0, 1, 1, 0, 0, 0, 0, 0, 2, 0, 2, 1, 0, 0, 2, 0, 2, 0, 0, 0, 1, 2, 2, 0, 1, 0, 0, 2, 1, 0, 2, 2, 2, 2, 0, 2, 1, 1, 1, 2, 0, 1, 0, 1, 1, 1, 1, 2, 0, 1, 1, 0, 0, 2, 1, 0, 2, 0, 2, 0, 2, 2, 2, 2, 0, 2, 1, 2, 2, 2, 1, 1, 0, 0, 1, 1, 1, 1, 2, 0, 2, 1, 1, 2, 0, 2, 0, 1, 2, 0, 1, 2, 1, 1, 0, 0, 0, 2, 2, 0, 1, 2, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 2, 2, 2, 2, 2, 2, 0, 2, 1, 0, 1, 1, 2, 1, 0, 1, 0, 0, 2, 2, 0, 0, 1, 0, 2, 0, 0, 0, 2, 1, 0, 0, 0, 2, 2, 0, 2, 1, 0, 1, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 0, 1, 0, 2, 1, 0, 0, 1, 0, 1, 1, 0, 2, 1, 1, 2, 2, 0, 0, 1, 2, 0, 0, 2, 0, 1, 1, 0, 0, 2, 1, 1, 0, 0, 2, 2, 2, 1, 0, 1, 1, 0, 0, 0, 0, 2, 1, 1, 1, 1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 1, 1, 0, 2, 1, 2, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 2, 1, 2, 0, 1, 2, 0, 2, 1, 2, 1, 2, 1, 2, 1, 1, 1, 2, 0, 0, 1, 2, 0, 1, 2, 2, 0, 1, 2, 1, 1, 0, 1, 0, 1, 2, 1, 0, 0, 2, 0, 0, 1, 2, 1, 2, 0, 0, 1, 1, 0, 1, 0, 1, 1, 

In [22]:
for i in range(len(node_ranking)):
    print(y_train.count(i))

180
193
153


In [23]:
model = keras.Sequential([
    keras.layers.Dense(units=256, activation='relu'),
    keras.layers.Dense(units=128, activation='relu'),
    keras.layers.Dense(units=64, activation='relu'),
    keras.layers.Dense(units=len(node_ranking), activation='softmax')
])

In [24]:
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(
    X_train, y_train, 
    epochs=50, 
    validation_data=(X_test, y_test)
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# Important nodes version.

# Takes the sets of infected and vaccinated nodes and returns the id of a new node to infect.
# Returns None if there is no new node to infect.
def findNewInfection(infected: set, vaccinated: set) -> int:
    
    # List of infectable nodes to select our next infection from.
    infectable = []

    # Go through each infected node and find their neighbours to find nodes to spread to.
    for node in infected:
        for neighbour in adjacency_list[node]:
            if neighbour not in vaccinated and neighbour not in infected:
                infectable.append(neighbour)
        
    # If there are no nodes to infect, return None.
    if len(infectable) == 0:
        return None
    
    # Otherwise, pick a new node randomly from the list of infectable nodes.
    return infectable[random.randrange(0, len(infectable))]

# Takes the list of node rankings, and the number of vaccinated nodes, returns the id of a new node to vaccinate.
def findNewVaccination(node_ranking, vaccinated, vaccination_state) -> int:
    
    prediction = model.predict(np.array([vaccination_state,]))
    vaccination_method = np.argmax(prediction)
    
    index = 0
    
    while node_ranking[vaccination_method][index] in vaccinated:
        index+=1
        assert index < len(node_ranking[vaccination_method])
    return node_ranking[vaccination_method][index]
    
simulation_num = 1

simulations_per_method = simulation_num*len(important_nodes)

# Keep track of the number of infected nodes with each type of vaccination method. 
num_infected_nodes = 0

# Simulate the infection with all nodes as starting nodes.
for count, infection_source in enumerate(important_nodes):
    
    print(f"Started node {count}!")
    
    vaccination_state = np.array([0]*len(adjacency_list))

    # Run simulation_num simulations.
    for i in range(simulation_num):

        # Set random seed to create reproducable results.
        random.seed(i)

        # List containing the current nodes which are infected.
        infected = set()

        # List containing the vaccinated nodes, so we can pick which ones to intervene on.
        vaccinated = set()

        # The source computer starts off as infected.
        infected.add(infection_source)
        num_infected_nodes+=1

        # While the ransomware has spread in the last round, keep going.
        while True:

            new_infection = findNewInfection(infected, vaccinated)

            # If there are no new nodes to infect, end the simulation.
            if new_infection is None:
                break

            # Otherwise, add it to infected.
            infected.add(new_infection)
            num_infected_nodes+=1

            new_vaccination = findNewVaccination(node_ranking, vaccinated, vaccination_state)

            # Vaccinates the selected node.
            vaccinated.add(new_vaccination)
            vaccination_state[new_vaccination] = 1
            if new_vaccination in infected:
                infected.remove(new_vaccination)

print(f"Neural network had an average of {num_infected_nodes/simulations_per_method} infections per simulation.")

Started node 0!
Started node 1!




Started node 2!
Started node 3!




Started node 4!
Started node 5!




Started node 6!


Started node 7!




Started node 8!




Started node 9!




Started node 10!




Started node 11!




Started node 12!
Started node 13!




Started node 14!




Started node 15!
Started node 16!




Started node 17!




Started node 18!




Started node 19!






In [None]:
print(node_ranking)

In [None]:
print(f"Neural network had an average of {num_infected_nodes/simulations_per_method} infections per simulation.")