In [134]:
import xgt
import os
import pandas

from platform import python_version
print (python_version())

3.7.4


In [135]:
if os.environ.get('https_proxy'):
 del os.environ['https_proxy']
if os.environ.get('http_proxy'):
 del os.environ['http_proxy']

In [136]:
conn=xgt.Connection()
conn.server_version

'1.3.0'

In [137]:
try:
  devices = conn.get_vertex_frame('Devices')
except xgt.XgtNameError:
  devices = conn.create_vertex_frame(
      name='Devices',
      schema=[['device', xgt.TEXT]],
      key='device')
devices

<xgt.graph.VertexFrame at 0x7f6ae44fd390>

In [140]:
try:
  netflow = conn.get_edge_frame('Netflow')
except xgt.XgtNameError:
  netflow = conn.create_edge_frame(
      name='Netflow',
      schema=[['epoch_time', xgt.INT],
              ['duration', xgt.INT],
              ['src_device', xgt.TEXT],
              ['dst_device', xgt.TEXT],
              ['protocol', xgt.INT],
              ['src_port', xgt.INT],
              ['dst_port', xgt.INT],
              ['src_packets', xgt.INT],
              ['dst_packets', xgt.INT],
              ['src_bytes', xgt.INT],
              ['dst_bytes', xgt.INT]],
      source=devices,
      target=devices,
      source_key='src_device',
      target_key='dst_device')
netflow

<xgt.graph.EdgeFrame at 0x7f6ae44bec50>

In [139]:
try:
  host_events = conn.get_edge_frame('HostEvents')
except xgt.XgtNameError:
  host_events = conn.create_edge_frame(
      name='HostEvents',
      schema=[['epoch_time', xgt.INT],
              ['event_id', xgt.INT],
              ['log_host', xgt.TEXT],
              ['user_name', xgt.TEXT],
              ['domain_name', xgt.TEXT],
              ['logon_id', xgt.INT],
              ['process_name', xgt.TEXT],
              ['process_id', xgt.INT],
              ['parent_process_name', xgt.TEXT],
              ['parent_process_id', xgt.INT]],
           source=devices,
           target=devices,
           source_key='log_host',
           target_key='log_host')
host_events

<xgt.graph.EdgeFrame at 0x7f6ae44bc590>

In [138]:
try:
  auth_events = conn.get_edge_frame('AuthEvents')
except xgt.XgtNameError:
  auth_events = conn.create_edge_frame(
           name='AuthEvents',
           schema = [['epoch_time',xgt.INT],
                     ['event_id',xgt.INT],
                     ['log_host',xgt.TEXT],
                     ['logon_type',xgt.INT],
                     ['logon_type_description',xgt.TEXT],
                     ['user_name',xgt.TEXT],
                     ['domain_name',xgt.TEXT],
                     ['logon_id',xgt.INT],
                     ['subject_user_name',xgt.TEXT],
                     ['subject_domain_name',xgt.TEXT],
                     ['subject_logon_id',xgt.TEXT],
                     ['status',xgt.TEXT],
                     ['src',xgt.TEXT],
                     ['service_name',xgt.TEXT],
                     ['destination',xgt.TEXT],
                     ['authentication_package',xgt.TEXT],
                     ['failure_reason',xgt.TEXT],
                     ['process_name',xgt.TEXT],
                     ['process_id',xgt.INT],
                     ['parent_process_name',xgt.TEXT],
                     ['parent_process_id',xgt.INT]],
            source = devices,
            target = devices,
            source_key = 'src',
            target_key = 'destination')
auth_events

<xgt.graph.EdgeFrame at 0x7f6ae44be150>

In [141]:
# Utility to print the sizes of data currently in xGT
def print_data_summary():
  print('Devices (vertices): {:,}'.format(devices.num_vertices))
  print('Netflow (edges): {:,}'.format(netflow.num_edges))
  print('Host events (edges): {:,}'.format(host_events.num_edges))
  print('Authentication events (edges): {:,}'.format(auth_events.num_edges))
  print('Total (edges): {:,}'.format(
      netflow.num_edges + host_events.num_edges + auth_events.num_edges))
    
print_data_summary()

Devices (vertices): 0
Netflow (edges): 0
Host events (edges): 0
Authentication events (edges): 0
Total (edges): 0


In [110]:
%%time

# Load the HostEvents event data:
if host_events.num_edges == 0:
    # urls = ["https://datasets.trovares.com/LANL/xgt/wls_day-85_1v.csv"]
    # urls = ["xgtd://nvme_data1/data_1v/wls_day-{:02d}_1v.csv".format(_) for _ in range(2,91)]
    urls = ["xgtd:///nvme_data1/data_1v/wls_day-85_1v.csv"]
    # urls = ["xgtd://data_1v/wls_day-11_1v.csv"]
    host_events.load(urls)
    print_data_summary()

Devices (vertices): 10,324
Netflow (edges): 0
Host events (edges): 18,637,483
Authentication events (edges): 0
Total (edges): 18,637,483
CPU times: user 127 ms, sys: 50.3 ms, total: 178 ms
Wall time: 15.1 s


In [142]:
%%time

# Load the AuthEvents event data:
if auth_events.num_edges == 0:
    # urls = ["https://datasets.trovares.com/LANL/xgt/wls_day-85_2v.csv"]
    # urls = ["xgtd://nvme_data9/data_2v/wls_day-{:02d}_2v.csv".format(_) for _ in range(2,91)]
    urls = ["xgtd:///nvme_data9/data_2v/wls_day-85_2v.csv"]
    #urls = ["xgtd://data_2v/wls_day-11_2v.csv"]
    auth_events.load(urls)
    print_data_summary()

Devices (vertices): 12,288
Netflow (edges): 0
Host events (edges): 0
Authentication events (edges): 47,790,045
Total (edges): 47,790,045
CPU times: user 269 ms, sys: 94.5 ms, total: 363 ms
Wall time: 33.4 s


In [91]:
%%time

# Load the netflow data:
if netflow.num_edges == 0:
    #urls = ["https://datasets.trovares.com/LANL/xgt/nf_day-85.csv"]
    # urls = ["xgtd://nvme_data1/data_nf/nf_day-{:02d}.csv".format(_) for _ in range(2,91)]
    urls = ["xgtd:///nvme_data1/data_nf/nf_day-85.csv"]
    # urls = ["xgtd://nvme_data1/data_nf/nf_day-11.csv"]
    netflow.load(urls)
    print_data_summary()

CPU times: user 0 ns, sys: 779 µs, total: 779 µs
Wall time: 650 µs


In [143]:
# Utility function to launch queries and show job number:
#   The job number may be useful if a long-running job needs
#   to be canceled.

def run_query(query, table_name = "answers8", drop_answer_table=True, show_query=True):
    if drop_answer_table:
        conn.drop_frame(table_name)
    if query[-1] != '\n':
        query += '\n'
    query += 'INTO {}'.format(table_name)
    if show_query:
        print("Query:\n" + query)
    job = conn.schedule_job(query)
    print("Launched job {}".format(job.id))
    conn.wait_for_job(job)
    table = conn.get_table_frame(table_name)
    return table

In [144]:
# Generate a new edge frame for holding only the tgt_req_events edges
import time
query_start_time = time.time()

conn.drop_frame('tgt_req_events')
TGT_Req = conn.create_edge_frame(
            name='tgt_req_events',
            schema=auth_events.schema,
            source=devices,
            target=devices,
            source_key='src',
            target_key='destination')
TGT_Req

<xgt.graph.EdgeFrame at 0x7f6ae44b6bd0>

In [145]:
# Generate a new edge frame for holding only the service_ticket_req edges
import time
query_start_time = time.time()

conn.drop_frame('service_ticket_req')
Service_Req = conn.create_edge_frame(
            name='service_ticket_req',
            schema=auth_events.schema,
            source=devices,
            target=devices,
            source_key='src',
            target_key='destination')
Service_Req

<xgt.graph.EdgeFrame at 0x7f6ae44c5a10>

In [146]:
%%time

#Filter out all the Nodes in which 4768(TGT request has been logged)

q = """
MATCH (n1:Devices)-[r:AuthEvents]->(n2:Devices)
WHERE r.event_id = 4768
CREATE (n1)-[r1:tgt_req_events {epoch_time:r.epoch_time, 
event_id:r.event_id,
log_host:r.log_host, 
logon_type:r.logon_type,
logon_type_description:r.logon_type_description, 
user_name:r.user_name,
domain_name:r.domain_name, 
logon_id:r.logon_id,
subject_user_name:r.subject_user_name, 
subject_domain_name:r.subject_domain_name,
subject_logon_id:r.subject_logon_id, 
status:r.status,
service_name:r.service_name,
authentication_package:r.authentication_package,
failure_reason: r.failure_reason, 
process_name:r.process_name,
process_id:r.process_id, 
parent_process_name:r.parent_process_name,
parent_process_id:r.parent_process_id}]->(n2)
RETURN count(*)
"""
data = run_query(q)
print('Number of answers: {:,}'.format(data.get_data()[0][0]))

Query:

MATCH (n1:Devices)-[r:AuthEvents]->(n2:Devices)
WHERE r.event_id = 4768
CREATE (n1)-[r1:tgt_req_events {epoch_time:r.epoch_time, 
event_id:r.event_id,
log_host:r.log_host, 
logon_type:r.logon_type,
logon_type_description:r.logon_type_description, 
user_name:r.user_name,
domain_name:r.domain_name, 
logon_id:r.logon_id,
subject_user_name:r.subject_user_name, 
subject_domain_name:r.subject_domain_name,
subject_logon_id:r.subject_logon_id, 
status:r.status,
service_name:r.service_name,
authentication_package:r.authentication_package,
failure_reason: r.failure_reason, 
process_name:r.process_name,
process_id:r.process_id, 
parent_process_name:r.parent_process_name,
parent_process_id:r.parent_process_id}]->(n2)
RETURN count(*)
INTO answers8
Launched job 305
Number of answers: 875,992
CPU times: user 57.5 ms, sys: 27.6 ms, total: 85.1 ms
Wall time: 5.59 s


In [147]:
%%time

#Filter out all the Nodes in which 4769(Service ticket request has been logged)

q = """
MATCH (n1:Devices)-[r:AuthEvents]->(n2:Devices)
WHERE r.event_id = 4769
CREATE (n1)-[r1:service_ticket_req {epoch_time:r.epoch_time, 
event_id:r.event_id,
log_host:r.log_host, 
logon_type:r.logon_type,
logon_type_description:r.logon_type_description, 
user_name:r.user_name,
domain_name:r.domain_name, 
logon_id:r.logon_id,
subject_user_name:r.subject_user_name, 
subject_domain_name:r.subject_domain_name,
subject_logon_id:r.subject_logon_id, 
status:r.status,
service_name:r.service_name,
authentication_package:r.authentication_package,
failure_reason: r.failure_reason, 
process_name:r.process_name,
process_id:r.process_id, 
parent_process_name:r.parent_process_name,
parent_process_id:r.parent_process_id}]->(n2)
RETURN count(*)
"""
data = run_query(q)
print('Number of answers: {:,}'.format(data.get_data()[0][0]))

Query:

MATCH (n1:Devices)-[r:AuthEvents]->(n2:Devices)
WHERE r.event_id = 4769
CREATE (n1)-[r1:service_ticket_req {epoch_time:r.epoch_time, 
event_id:r.event_id,
log_host:r.log_host, 
logon_type:r.logon_type,
logon_type_description:r.logon_type_description, 
user_name:r.user_name,
domain_name:r.domain_name, 
logon_id:r.logon_id,
subject_user_name:r.subject_user_name, 
subject_domain_name:r.subject_domain_name,
subject_logon_id:r.subject_logon_id, 
status:r.status,
service_name:r.service_name,
authentication_package:r.authentication_package,
failure_reason: r.failure_reason, 
process_name:r.process_name,
process_id:r.process_id, 
parent_process_name:r.parent_process_name,
parent_process_id:r.parent_process_id}]->(n2)
RETURN count(*)
INTO answers8
Launched job 340
Number of answers: 2,271,788
CPU times: user 52.9 ms, sys: 27.8 ms, total: 80.6 ms
Wall time: 9.24 s


In [148]:
data=None
if Service_Req.num_edges == 0:
    print("service_ticket_req is empty")
elif Service_Req.num_edges <= 1000:
    data = Service_Req.get_data_pandas()
else:
    data = 'service_ticket_req (edges): {:,}'.format(Service_Req.num_edges)
data

'service_ticket_req (edges): 2,271,788'

In [149]:
# Utility to print the data sizes currently in xGT
def print_netflow_data_summary():
  print_data_summary()
  print('service_ticket_req (edges): {:,}'.format(Service_Req.num_edges))

print_netflow_data_summary()

Devices (vertices): 12,288
Netflow (edges): 0
Host events (edges): 0
Authentication events (edges): 47,790,045
Total (edges): 47,790,045
service_ticket_req (edges): 2,271,788


In [151]:
# Delete all the service_ticket_req relationships which does not have tgt_req_events with in the interval

q = """
MATCH (n1:Devices)-[r1:tgt_req_events]->(n2:Devices)
WHERE r1.epoch_time > 0
DELETE r1
RETURN count(*)
"""


answer_table_1 = run_query(q)
print('Number of answers: {:,}'.format(answer_table_1.num_rows))

Query:

MATCH (n1:Devices)-[r1:tgt_req_events]->(n2:Devices)
WHERE r1.epoch_time > 0
DELETE r1
RETURN count(*)
INTO answers8
Launched job 472
Number of answers: 1


In [152]:
q = """
MATCH (n1:Devices)-[r1:service_ticket_req]->(n2:Devices)<-[r2:tgt_req_events]-(n1)
WHERE r1.src = r2.src
AND r1.epoch_time > r2.epoch_time
AND r1.epoch_time - r2.epoch_time < 60      
DELETE r1, r2
RETURN count(*)
"""


answer_table_1 = run_query(q)
print('Number of answers: {:,}'.format(answer_table_1.num_rows))

Query:

MATCH (n1:Devices)-[r1:service_ticket_req]->(n2:Devices)<-[r2:tgt_req_events]-(n1)
WHERE r1.src = r2.src
AND r1.epoch_time > r2.epoch_time
AND r1.epoch_time - r2.epoch_time < 60      
DELETE r1, r2
RETURN count(*)
INTO answers8


XgtInternalError: Low level error occured. More information in the 'detail' field.

In [131]:
%%time

#Lateral Movement Query
#time_threshold_between_step = 3600   # one hour
#time_threshold_hijack = 180          # three minutes
#time_threshold_one_step = 480        # eight minutes
q = """
MATCH (n1:Devices)-[r1:service_ticket_req]->(n2:Devices),
(n2)-[event:HostEvents]->(n2),
(n1)-[r3:AuthEvents]->(n3:Devices),
(n1)-[r4:Netflow]->(n3)
WHERE event.event_id = 4688 AND event.process_name = "lsass.exe"
AND r3.event_id = 4624
AND r3.src = r1.src
AND r3.destination = r1.service_name
AND r3.authentication_package = "Kerberos"
AND r1.epoch_time < r3.epoch_time
AND r3.epoch_time - r1.epoch_time < 3600
AND r4.src_device = r1.src
AND r4.dst_device = r1.service_name
AND r3.epoch_time < r4.epoch_time
AND r4.epoch_time - r3.epoch_time < 200
AND r4.duration < 300
RETURN DISTINCT r1.epoch_time, r1.log_host, r1.src, r1.service_name, r1.authentication_package, r3.epoch_time, r4.epoch_time, r4.duration
"""
answer_table_2 = run_query(q)
print('Number of answers: {:,}'.format(answer_table_2.num_rows))

Query:

MATCH (n1:Devices)-[r1:service_ticket_req]->(n2:Devices),
(n2)-[event:HostEvents]->(n2),
(n1)-[r3:AuthEvents]->(n3:Devices),
(n1)-[r4:Netflow]->(n3)
WHERE event.event_id = 4688 AND event.process_name = "lsass.exe"
AND r3.event_id = 4624
AND r3.src = r1.src
AND r3.destination = r1.service_name
AND r3.authentication_package = "Kerberos"
AND r1.epoch_time < r3.epoch_time
AND r3.epoch_time - r1.epoch_time < 3600
AND r4.src_device = r1.src
AND r4.dst_device = r1.service_name
AND r3.epoch_time < r4.epoch_time
AND r4.epoch_time - r3.epoch_time < 200
AND r4.duration < 300
RETURN DISTINCT r1.epoch_time, r1.log_host, r1.src, r1.service_name, r1.authentication_package, r3.epoch_time, r4.epoch_time, r4.duration
INTO answers7
Launched job 778
Number of answers: 0
CPU times: user 7.22 ms, sys: 106 µs, total: 7.33 ms
Wall time: 496 ms


In [132]:
# retrieve the answer rows to the client in a pandas frame
data = answer_table_2.get_data_pandas()
data[0:10]

Unnamed: 0,r1_epoch_time,r1_log_host,r1_src,r1_service_name,r1_authentication_package,r3_epoch_time,r4_epoch_time,r4_duration


In [None]:
# retrieve the answer rows to the client in a pandas frame
data = answer_table_2.get_data_pandas()
data[0:10]

In [25]:
answer_table_1.detail

NameError: name 'answer_table_1' is not defined

In [24]:
run_query(q)

XgtInternalError: Low level error occured. More information in the 'detail' field.