In [1]:
from py2neo import Graph, Node, Relationship, NodeMatcher, cypher
import networkx as nx
from pyvis.network import Network

In [2]:
graph = Graph(password="admin")
graph.delete_all()

In [4]:
def matchnode(label, **props):
    nodes = NodeMatcher(graph)
    return nodes.match(label, **props).first()


def createNode(label, **props):
    newnode = Node(label, **props)
    graph.merge(newnode, label, "name")
    return newnode


def createRelationship(node1, relationshiptype, node2):
    newnodelink = Relationship(node1, relationshiptype, node2)
    graph.create(newnodelink)
    
def showgraph(G):
    # view the result in pyviz
    nt = Network(width="1000", height="500", directed=True)
    #nt.show_buttons()
    nt.set_options("""var options = {
      "nodes": {
        "physics": false
      },
      "edges": {
        "color": {
          "inherit": true
        },
        "smooth": false
      },
      "physics": {
        "minVelocity": 0.75
      }
    }""")
    nt.from_nx(G)
    nt.show("nx.html")
    
def vizuliser(tuplist):
    G = nx.DiGraph()
    for x in tuplist:
        G.add_edge(x[1], x[4], name=x[2])
    showgraph(G)
    
def schemabuild(elm):
    node1 = matchnode(elm[0], name=elm[1])
    if node1:
        node2 = createNode(elm[3], name=elm[4])
        createRelationship(node1, elm[2], node2)
    else:
        node1 = createNode(elm[0], name=elm[1])
        node2 = createNode(elm[3], name=elm[4])
        createRelationship(node1, elm[2], node2)
        
    

In [5]:
# Create dummy data

# data = [(nodeclass, nodelabel, relationship, nodeclass, nodelabel)]
data = [
    # these are attack vector
    ('program','cmd','modify','file', 'shadow'),
    ('ip','123.10.10.1','remote_execute','program', 'cmd'),
    ('program', 'cmd','callback','ip','123.10.10.1'),
    ('file','funny.pdf','execute','program', 'cmd'),
    ('website','http://192.2.2.1/ad','download','file','funny.pdf'),
    ('user','Smith','visit','website','http://192.2.2.1/ad'),
    # these are noises
    ('program','cmd','run','program', 'test.py'),
    ('ip','123.10.10.1','dns_connect','server', 'singtel'),
    ('program', 'cmd','close','program','test.py'),
    ('file','funny.pdf','start','program', 'painter.exe'),
    ('website','http://192.2.2.1/ad','navigate','website','google.com'),
    ('user','Smith','opens','program','chrome'),
]

newdata = [
    ('user','James','remote_login','ip','196.123.10.1'),
    ('ip','196.123.10.1','ssh_in','program','cmd'),
    ('program','cmd', 'modify','file', 'shadow'),
]

In [6]:
# Visualise synthetic data
vizuliser(data)

In [7]:
# upload edges to graph database
for x in data:
    schemabuild(x)

In [8]:
# Find all users who access the malicious file "shadow"
cypherQ = """
MATCH (u:user),(f:file{name: "shadow"})
CALL gds.alpha.shortestPath.stream({
  startNode: u,
  endNode: f,
  nodeProjection: "*",
  relationshipProjection: {
    all: {
      type: "*",
      orientation:  "UNDIRECTED"
    }
  }
})
YIELD nodeId, cost
RETURN gds.util.asNode(nodeId).name AS name, cost as dist
"""
result = graph.run(cypherQ)
result = result.data()
result

[{'name': 'Smith', 'dist': 0.0},
 {'name': 'http://192.2.2.1/ad', 'dist': 1.0},
 {'name': 'funny.pdf', 'dist': 2.0},
 {'name': 'cmd', 'dist': 3.0},
 {'name': 'shadow', 'dist': 4.0}]

In [9]:
edgelist = [(result[n]['name'], result[n+1]['name']) for n in range(len(result)-1)]
G1 = nx.DiGraph()
G1.add_edges_from(edgelist)
showgraph(G1)

In [10]:
# Add new synthetic attack vector
for x in newdata:
    schemabuild(x)

In [11]:
cypherQ = """
MATCH (u:user),(f:file{name: "shadow"})
CALL gds.alpha.shortestPath.stream({
  startNode: u,
  endNode: f,
  nodeProjection: "*",
  relationshipProjection: {
    all: {
      type: "*",
      orientation:  "UNDIRECTED"
    }
  }
})
YIELD nodeId, cost
RETURN gds.util.asNode(nodeId).name AS name, cost as dist
"""
result = graph.run(cypherQ)
result = result.data()
result

[{'name': 'Smith', 'dist': 0.0},
 {'name': 'http://192.2.2.1/ad', 'dist': 1.0},
 {'name': 'funny.pdf', 'dist': 2.0},
 {'name': 'cmd', 'dist': 3.0},
 {'name': 'shadow', 'dist': 4.0},
 {'name': 'James', 'dist': 0.0},
 {'name': '196.123.10.1', 'dist': 1.0},
 {'name': 'cmd', 'dist': 2.0},
 {'name': 'shadow', 'dist': 3.0}]

In [12]:
cypherQ = """
MATCH (u:user{name:"James"}),(f:file{name: "shadow"})
CALL gds.alpha.shortestPath.stream({
  startNode: u,
  endNode: f,
  nodeProjection: "*",
  relationshipProjection: {
    all: {
      type: "*",
      orientation:  "UNDIRECTED"
    }
  }
})
YIELD nodeId, cost
RETURN gds.util.asNode(nodeId).name AS name, cost as dist
"""
result = graph.run(cypherQ)
result = result.data()
edgelist = [(result[n]['name'], result[n+1]['name']) for n in range(len(result)-1)]
G1 = nx.DiGraph()
G1.add_edges_from(edgelist)
showgraph(G1)