In [35]:
import xgt
import os
import pandas

from platform import python_version
print (python_version())

3.7.4


In [36]:
if os.environ.get('https_proxy'):
 del os.environ['https_proxy']
if os.environ.get('http_proxy'):
 del os.environ['http_proxy']

In [37]:
conn=xgt.Connection()
conn.server_version

'1.3.0'

In [38]:
try:
  devices = conn.get_vertex_frame('Devices')
except xgt.XgtNameError:
  devices = conn.create_vertex_frame(
      name='Devices',
      schema=[['device', xgt.TEXT]],
      key='device')
devices

<xgt.graph.VertexFrame at 0x7f7ec8213350>

In [39]:
try:
  netflow = conn.get_edge_frame('Netflow')
except xgt.XgtNameError:
  netflow = conn.create_edge_frame(
      name='Netflow',
      schema=[['epoch_time', xgt.INT],
              ['duration', xgt.INT],
              ['src_device', xgt.TEXT],
              ['dst_device', xgt.TEXT],
              ['protocol', xgt.INT],
              ['src_port', xgt.INT],
              ['dst_port', xgt.INT],
              ['src_packets', xgt.INT],
              ['dst_packets', xgt.INT],
              ['src_bytes', xgt.INT],
              ['dst_bytes', xgt.INT]],
      source=devices,
      target=devices,
      source_key='src_device',
      target_key='dst_device')
netflow

<xgt.graph.EdgeFrame at 0x7f7ec821ec50>

In [40]:
try:
  host_events = conn.get_edge_frame('HostEvents')
except xgt.XgtNameError:
  host_events = conn.create_edge_frame(
      name='HostEvents',
      schema=[['epoch_time', xgt.INT],
              ['event_id', xgt.INT],
              ['log_host', xgt.TEXT],
              ['user_name', xgt.TEXT],
              ['domain_name', xgt.TEXT],
              ['logon_id', xgt.INT],
              ['process_name', xgt.TEXT],
              ['process_id', xgt.INT],
              ['parent_process_name', xgt.TEXT],
              ['parent_process_id', xgt.INT]],
           source=devices,
           target=devices,
           source_key='log_host',
           target_key='log_host')
host_events

<xgt.graph.EdgeFrame at 0x7f7ec8216510>

In [41]:
try:
  auth_events = conn.get_edge_frame('AuthEvents')
except xgt.XgtNameError:
  auth_events = conn.create_edge_frame(
           name='AuthEvents',
           schema = [['epoch_time',xgt.INT],
                     ['event_id',xgt.INT],
                     ['log_host',xgt.TEXT],
                     ['logon_type',xgt.INT],
                     ['logon_type_description',xgt.TEXT],
                     ['user_name',xgt.TEXT],
                     ['domain_name',xgt.TEXT],
                     ['logon_id',xgt.INT],
                     ['subject_user_name',xgt.TEXT],
                     ['subject_domain_name',xgt.TEXT],
                     ['subject_logon_id',xgt.TEXT],
                     ['status',xgt.TEXT],
                     ['src',xgt.TEXT],
                     ['service_name',xgt.TEXT],
                     ['destination',xgt.TEXT],
                     ['authentication_package',xgt.TEXT],
                     ['failure_reason',xgt.TEXT],
                     ['process_name',xgt.TEXT],
                     ['process_id',xgt.INT],
                     ['parent_process_name',xgt.TEXT],
                     ['parent_process_id',xgt.INT]],
            source = devices,
            target = devices,
            source_key = 'src',
            target_key = 'destination')
auth_events

<xgt.graph.EdgeFrame at 0x7f7ec8231090>

In [42]:
# Utility to print the sizes of data currently in xGT
def print_data_summary():
  print('Devices (vertices): {:,}'.format(devices.num_vertices))
  print('Netflow (edges): {:,}'.format(netflow.num_edges))
  print('Host events (edges): {:,}'.format(host_events.num_edges))
  print('Authentication events (edges): {:,}'.format(auth_events.num_edges))
  print('Total (edges): {:,}'.format(
      netflow.num_edges + host_events.num_edges + auth_events.num_edges))
    
print_data_summary()

Devices (vertices): 0
Netflow (edges): 0
Host events (edges): 0
Authentication events (edges): 0
Total (edges): 0


In [43]:
%%time

# Load the HostEvents event data:
if host_events.num_edges == 0:
    # urls = ["https://datasets.trovares.com/LANL/xgt/wls_day-85_1v.csv"]
    # urls = ["xgtd://nvme_data1/data_1v/wls_day-{:02d}_1v.csv".format(_) for _ in range(2,91)]
    urls = ["xgtd:///nvme_data1/data_1v/wls_day-85_1v.csv"]
    # urls = ["xgtd://data_1v/wls_day-11_1v.csv"]
    host_events.load(urls)
    print_data_summary()

Devices (vertices): 10,324
Netflow (edges): 0
Host events (edges): 18,637,483
Authentication events (edges): 0
Total (edges): 18,637,483
CPU times: user 86.6 ms, sys: 45 ms, total: 132 ms
Wall time: 14.8 s


In [44]:
%%time

# Load the AuthEvents event data:
if auth_events.num_edges == 0:
    # urls = ["https://datasets.trovares.com/LANL/xgt/wls_day-85_2v.csv"]
    # urls = ["xgtd://nvme_data9/data_2v/wls_day-{:02d}_2v.csv".format(_) for _ in range(2,91)]
    urls = ["xgtd:///nvme_data9/data_2v/wls_day-85_2v.csv"]
    #urls = ["xgtd://data_2v/wls_day-11_2v.csv"]
    auth_events.load(urls)
    print_data_summary()

Devices (vertices): 12,288
Netflow (edges): 0
Host events (edges): 18,637,483
Authentication events (edges): 47,790,045
Total (edges): 66,427,528
CPU times: user 235 ms, sys: 91.3 ms, total: 326 ms
Wall time: 32.5 s


In [20]:
%%time

# Load the netflow data:
if netflow.num_edges == 0:
    #urls = ["https://datasets.trovares.com/LANL/xgt/nf_day-85.csv"]
    # urls = ["xgtd://nvme_data1/data_nf/nf_day-{:02d}.csv".format(_) for _ in range(2,91)]
    urls = ["xgtd:///nvme_data1/data_nf/nf_day-85.csv"]
    # urls = ["xgtd://nvme_data1/data_nf/nf_day-11.csv"]
    netflow.load(urls)
    print_data_summary()

Devices (vertices): 137,812
Netflow (edges): 235,661,328
Host events (edges): 18,637,483
Authentication events (edges): 47,790,045
Total (edges): 302,088,856
CPU times: user 358 ms, sys: 201 ms, total: 559 ms
Wall time: 1min 27s


In [61]:
# Utility function to launch queries and show job number:
#   The job number may be useful if a long-running job needs
#   to be canceled.

def run_query(query, table_name = "answers", drop_answer_table=True, show_query=False):
    if drop_answer_table:
        conn.drop_frame(table_name)
    if query[-1] != '\n':
        query += '\n'
    query += 'INTO {}'.format(table_name)
    if show_query:
        print("Query:\n" + query)
    job = conn.schedule_job(query)
    print("Launched job {}".format(job.id))
    conn.wait_for_job(job)
    table = conn.get_table_frame(table_name)
    return table

In [49]:
# Generate a new edge frame for holding only the tgt_req_events edges
import time
query_start_time = time.time()

conn.drop_frame('tgt_req_events')
TGT_Req = conn.create_edge_frame(
            name='tgt_req_events',
            schema=auth_events.schema,
            source=devices,
            target=devices,
            source_key='src',
            target_key='destination')
TGT_Req

<xgt.graph.EdgeFrame at 0x7f7ec8249e10>

In [55]:
# Generate a new edge frame for holding only the service_ticket_req edges
import time
query_start_time = time.time()

conn.drop_frame('service_ticket_req')
Service_Req = conn.create_edge_frame(
            name='service_ticket_req',
            schema=auth_events.schema,
            source=devices,
            target=devices,
            source_key='src',
            target_key='destination')
Service_Req

<xgt.graph.EdgeFrame at 0x7f7ec81db110>

In [52]:
%%time

#Filter out all the Nodes in which 4768(TGT request has been logged)

q = """
MATCH (n1:Devices)-[r:AuthEvents]->(n2:Devices)
WHERE r.event_id = 4768
CREATE (n1)-[r1:tgt_req_events {epoch_time:r.epoch_time, 
event_id:r.event_id,
log_host:r.log_host, 
logon_type:r.logon_type,
logon_type_description:r.logon_type_description, 
user_name:r.user_name,
domain_name:r.domain_name, 
logon_id:r.logon_id,
subject_user_name:r.subject_user_name, 
subject_domain_name:r.subject_domain_name,
subject_logon_id:r.subject_logon_id, 
status:r.status,
service_name:r.service_name,
authentication_package:r.authentication_package,
failure_reason: r.failure_reason, 
process_name:r.process_name,
process_id:r.process_id, 
parent_process_name:r.parent_process_name,
parent_process_id:r.parent_process_id}]->(n2)
RETURN count(*)
"""
data = run_query(q)
print('Number of answers: {:,}'.format(data.get_data()[0][0]))

Launched job 660
Number of answers: 875,992
CPU times: user 58.9 ms, sys: 24.9 ms, total: 83.8 ms
Wall time: 1min 30s


In [58]:
%%time

#Filter out all the Nodes in which 4769(Service ticket request has been logged)

q = """
MATCH (n1:Devices)-[r:AuthEvents]->(n2:Devices)
WHERE r.event_id = 4769
CREATE (n1)-[r1:service_ticket_req {epoch_time:r.epoch_time, event_id:r.event_id,
 log_host:r.log_host, logon_type:r.logon_type,
 logon_type_description:r.logon_type_description, user_name:r.user_name,
 domain_name:r.domain_name, logon_id:r.logon_id,
 subject_user_name:r.subject_user_name, subject_domain_name:r.subject_domain_name,
 subject_logon_id:r.subject_logon_id, status:r.status,
 service_name:r.service_name,
 authentication_package:r.authentication_package,
 failure_reason: r.failure_reason, process_name:r.process_name,
 process_id:r.process_id, parent_process_name:r.parent_process_name,
 parent_process_id:r.parent_process_id}]->(n2)
RETURN count(*)
"""
data = run_query(q)
print('Number of answers: {:,}'.format(data.get_data()[0][0]))

Launched job 896
Number of answers: 2,271,788
CPU times: user 55.6 ms, sys: 19.4 ms, total: 75 ms
Wall time: 10.7 s


In [59]:
data=None
if service_ticket_req.num_edges == 0:
    print("service_ticket_req is empty")
elif service_ticket_req.num_edges <= 1000:
    data = service_ticket_req.get_data_pandas()
else:
    data = 'service_ticket_req (edges): {:,}'.format(service_ticket_req.num_edges)
data

NameError: name 'service_ticket_req' is not defined

In [62]:
# Delete all the service_ticket_req relationships which does not have tgt_req_events with in the interval

q = """
MATCH (n1:Devices)-[r1:service_ticket_req]->(n2:Devices),
      (n1)-[r2:tgt_req_events]->(n2)
WHERE r1.src = r2.src
AND r1.epoch_time > r2.epoch_time
AND r1.epoch_time - r2.epoch_time < 60
DELETE r1, r2
RETURN count(*)
"""
answer_table = run_query(q)
print('Number of answers: {:,}'.format(answer_table.num_rows))

XgtInternalError: Low level error occured. More information in the 'detail' field.