Hypothesis:

APTs adjust TTPs in response to defensive measures and signs of detection.

# Linux Host Logs

## `audit`

In [1]:
import pandas as pd
import numpy as np
import os
import re

In [2]:
path = os.path.split(os.getcwd())[0] + '/data/unraveled-apt/host-logs/audit/'
files = [path + x for x in os.listdir(path)]
d = dict()

In [3]:
for f in files:
    key = os.path.split(f)[1]
    d[key] = pd.read_csv(f, delimiter=';')
    
audit_df = pd.concat(d.values(), ignore_index=True)

In [4]:
del d

In [5]:
audit_df.shape

(264320, 5)

On the last row, there appears to be some preceeding whitespace in the LogEvent column. Lets handle that:

In [6]:
for col in audit_df.columns:
    try:
        audit_df[col] = audit_df[col].str.strip()
    except:
        continue

We will need to reindex this DF since we combined a bunch of files, basically.

In [7]:
pat = re.compile(r"msg=('.*')")

In [8]:
msg_df = audit_df.LogEvent.str.extract(r"msg=('.*')")
no_msg = audit_df.LogEvent.str.replace(r"msg=('.*')", repl='', regex=True)

We are having to do some funky business to retain the `msg` field. If we were to split the output below on spaces or `=`, it would mangle the message and split it up in a less than ideal way for maintaining data integrity.

In [9]:
msg_df.iloc[5][0]

'\'op=PAM:session_close acct="root" exe="/usr/bin/sudo" hostname=? addr=? terminal=/dev/pts/5 res=success\''

Here is what the `no_msg` series looks like now. We can proceeed with converting this into a DataFrame.

In [10]:
print(no_msg.iloc[111])
print(audit_df.LogEvent.iloc[111])

type=USER_START ts=1621862701.432 tsid=600 pid=15765 uid=0 auid=0 ses=3575 
type=USER_START ts=1621862701.432 tsid=600 pid=15765 uid=0 auid=0 ses=3575 msg='op=PAM:session_open acct="root" exe="/usr/sbin/cron" hostname=? addr=? terminal=cron res=success'


In [12]:
logs = no_msg.str.split()

In [13]:
logs.iloc[logs.shape[0]-1]

['type=USER_START',
 'ts=1625992741.358',
 'tsid=78499',
 'pid=789481',
 'uid=0',
 'auid=1000',
 'ses=5754',
 'subj==unconfined',
 'UID="root"',
 'AUID="ubuntu"']

In [14]:
expand_logs = logs.apply(lambda x: {b[0]: b[1] for b in [a.split('=') for a in x]}).to_dict()
dict(list(expand_logs.items())[:2]) # logs are expanded to a dictionary of dictionaries

{0: {'type': 'DAEMON_START',
  'ts': '1621837767.969',
  'tsid': '9329',
  'op': 'start',
  'ver': '2.8.2',
  'format': 'raw',
  'kernel': '5.3.0-40-generic',
  'auid': '4294967295',
  'pid': '13687',
  'uid': '0',
  'ses': '4294967295',
  'subj': 'unconfined',
  'res': 'success'},
 1: {'type': 'CONFIG_CHANGE',
  'ts': '1621837767.983',
  'tsid': '489',
  'op': 'set',
  'audit_backlog_limit': '8192',
  'old': '64',
  'auid': '4294967295',
  'ses': '4294967295',
  'res': '1'}}

In [15]:
log_df = pd.DataFrame(expand_logs).T
log_df.head(6)

del expand_logs

In [16]:
log_df.columns  # no msg column

Index(['type', 'ts', 'tsid', 'op', 'ver', 'format', 'kernel', 'auid', 'pid',
       'uid', 'ses', 'subj', 'res', 'audit_backlog_limit', 'old',
       'audit_failure', 'audit_backlog_wait_time', 'old-auid', 'tty',
       'old-ses', 'apparmor', 'operation', 'profile', 'name', 'comm',
       'requested_mask', 'denied_mask', 'fsuid', 'ouid', 'gid', 'exe', 'sig',
       'dev', 'prom', 'old_prom', 'AUID', 'UID', 'OLD-AUID', 'ID', 'GID',
       'info'],
      dtype='object')

In [17]:
log_df.shape

(264320, 41)

In [28]:
# Create DataFrame of labeled audit log data
labeled_audit_df = pd.concat([
        msg_df,  # contains the retained msg field
        log_df,  # contains the rest of the log, parsed
        audit_df[audit_df.columns[1:]]  # slice off first column, since we just expanded that.
    ], 
    axis=1)

labeled_audit_df.rename({0: 'msg'}, inplace=True, axis=1)

In [30]:
# reordering the columns to put the msg field in position 11
labeled_audit_df = labeled_audit_df[labeled_audit_df.columns[1:].insert(11, 'msg')]

In [32]:
labeled_audit_df.head()

Unnamed: 0,type,ts,tsid,op,ver,format,kernel,auid,pid,uid,...,AUID,UID,OLD-AUID,ID,GID,info,Activity,Stage,DefenderResponse,Signature
0,DAEMON_START,1621837767.969,9329,start,2.8.2,raw,5.3.0-40-generic,4294967295,13687.0,0.0,...,,,,,,,Normal,Benign,Benign,
1,CONFIG_CHANGE,1621837767.983,489,set,,,,4294967295,,,...,,,,,,,Normal,Benign,Benign,
2,CONFIG_CHANGE,1621837767.983,490,set,,,,4294967295,,,...,,,,,,,Normal,Benign,Benign,
3,CONFIG_CHANGE,1621837767.983,491,set,,,,4294967295,,,...,,,,,,,Normal,Benign,Benign,
4,SERVICE_START,1621837767.987,492,,,,,4294967295,1.0,0.0,...,,,,,,,Normal,Benign,Benign,


----

## `auth`

-----

# Security.evtx