# Imports and Definitions

In [None]:
# imports
import pandas as pd
import numpy as np
import os.path
import json
import re
import collections

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# read file lines into a list
def get_lines_from_file(filename):
    with open(filename) as f:
        return [line.strip() for line in f]

# display all rows in a dataframe
def displaydf(df):
    with pd.option_context("display.max_rows", None, "display.max_columns", None):
        display(df)

# Load Data

In [None]:
# Edit the data filepath accordingly to read in the input files
data_filepath = './drive/My Drive/How does Endpoint Detection use the MITRE ATT&CK Framework?/Data'

In [None]:
techniques = pd.read_csv(os.path.join(data_filepath, "techniques.csv"))

In [None]:
def parse_threat_column(threat):
    if type(threat) != float:
        threat_list = json.loads(threat.replace("'", '"'))
        threat_techniques = set()
        for threat in threat_list:
            techniques = threat.get('technique', [])
            for technique in techniques:
                threat_techniques.add(technique['id'])
        if threat_techniques:
            return list(threat_techniques)

    return np.NaN

def load_ids(x):
    if type(x) == str:
        return json.loads(x.replace('\'', '"'))
    return x

def drop_subtechniques(x):
    if type(x) == float:
        return x
    techniques = set()
    for t in x:
        techniques.add(t[0:5])
    return techniques

def map_tactic(technique):
    try:
        tactic = techniques[techniques['technique'] == technique]['tactics'].drop_duplicates()
        return tactic.values[0]
    except:
        print("error on technique", technique)
        return "unknown"

def get_sigma_techniques(tags):
    if type(tags) == float:
        return []
    rule_techniques = set()
    for tag in tags:
        if tag.startswith('attack.t'):
            technique = tag[7:12].upper()
            if technique in set(techniques.technique.drop_duplicates()):
                rule_techniques.add(technique.upper())
    return list(rule_techniques)


In [None]:
splunk = pd.read_csv(os.path.join(data_filepath, "splunk_rules.csv"))
splunk['rule_index'] = splunk['rule_index'].apply(lambda x: 'splunk' + str(x))
splunk['mitre_attack_id'] = splunk['tags.mitre_attack_id'].apply(load_ids).apply(drop_subtechniques).apply(lambda x: list(x) if type(x) == set else [])
splunk = splunk[splunk['mitre_attack_id'].apply(lambda x: len(x) > 0)]

elastic = pd.read_csv(os.path.join(data_filepath, "elastic_rules.csv"))
elastic = elastic[elastic['metadata.maturity'] == 'production']
elastic['rule_index'] = elastic['rule_index'].apply(lambda x: 'elastic' + str(x))
elastic['mitre_attack_id'] = elastic['rule.threat'].apply(parse_threat_column)
elastic = elastic[pd.notna(elastic['mitre_attack_id'])]

sigma = pd.read_csv(os.path.join(data_filepath, "sigma_rules.csv"))
sigma = sigma.rename({'Unnamed: 0': 'rule_index'}, axis=1)
sigma['rule_index'] = sigma['rule_index'].apply(lambda x: 'sigma' + str(x))
sigma = sigma[sigma['status'].apply(lambda x: x == 'experimental' or x == 'test' or x == 'stable')]
sigma['mitre_attack_id'] = sigma['tags'].apply(lambda tags: get_sigma_techniques(load_ids(tags)))
sigma = sigma[sigma['mitre_attack_id'].apply(lambda x: len(x) > 0)]

In [None]:
tactic_order = ['reconnaissance', 'resource-development', 'initial-access',
                'execution', 'persistence', 'privilege-escalation',
                'defense-evasion', 'credential-access', 'discovery',
                'lateral-movement', 'collection', 'command-and-control',
                'exfiltration', 'impact']

In [None]:
malware_families = get_lines_from_file(os.path.join(data_filepath, "malpedia.txt"))

In [None]:
software_df = pd.read_csv(os.path.join(data_filepath, "rulesets_software.csv"))
groups_df = pd.read_csv(os.path.join(data_filepath, "rulesets_groups.csv"))
campaigns_df = pd.read_csv(os.path.join(data_filepath, "rulesets_campaigns.csv"))

# Grouping rules by malicious entity

## Preprocessing

In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.corpus import words
from nltk.corpus import wordnet as wn
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('words')
nltk.download('wordnet')

stopwords.words("english")[:10] # <-- import the english stopwords

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're"]

In [None]:
def preprocess_text(text: str, remove_stopwords: bool) -> str:
    text = re.sub(r"http\S+", "", text)
    text = re.sub("[^A-Za-z]+", " ", text)
    if remove_stopwords:
        tokens = nltk.word_tokenize(text)
        tokens = [w for w in tokens if not w.lower() in stopwords.words("english")]
        text = " ".join(tokens)
    text = text.lower().strip()
    return text

## DPMeans


functions modified from https://github.com/DrSkippy/Python-DP-Means-Clustering

In [None]:
from pprint import pprint
import sys
import random
import math

class kmeans(object):

	def __init__(self, _X, _k, _xVal = 0, _stop=False):
		# X is sample size lists of dim length
		#
		# _xVal is the number of records to hold out cross-validation.
		# To use this you must randomize input data!
		#
		# Setting _stop=True causes iteration to stop when out of cross-validate
		# error starts to rise.
		#
		self.nFeatures = len(_X[0])
		self.xValSize = _xVal
		self.allSize = len(_X)
		self.size = self.allSize - self.xValSize
		self.X = _X
		self.k = _k
		self.stop = _stop
		# Initialize group memebership
		self.dataClusterId = [-1 for i in range(0, self.allSize)] # index of group for each data pair
		self.clusters = {}
		idx = 0
		# initialize to k random data points
		# don't assign x-val as a strat center
		for i in random.sample(range(0, self.size), self.k):
			self.clusters[idx] = self.X[i]
			idx += 1
		# output records
		self.record = []
		self.errorRecord = []

	def dSquared(self, x, y):
		dist2 = 0.0
		for j,k in zip(x,y):
			dist2 += (j - k)**2
		return dist2

	def error(self):
		res = 0.0
		for i in range(0, self.size):
			res += self.dSquared(self.X[i], self.clusters[self.dataClusterId[i]])
		# error on non training data
		res1 = 0.0
		err1 = 0.0
		for i in range(self.size, self.allSize):
			res1 += self.dSquared(self.X[i], self.clusters[self.dataClusterId[i]])
		if res1 > 0.0:
			err1 = res1/self.xValSize
		return res/self.size, err1

	def nearestCluster(self, x):
		cmin = sys.maxsize
		cidx = -sys.maxsize
		for j in self.clusters:
			dist = math.sqrt(self.dSquared(x, self.clusters[j]))
			if dist < cmin:  # record closest centroid
				cmin = dist
				cidx = j
		return cidx, cmin

	def assign(self):
		for i in range(0, self.allSize):
			self.dataClusterId[i], dmin = self.nearestCluster(self.X[i])

	def updateClusters(self):
		ctemp = {} # dim sums by cluster
		for j in range(0, self.k):
			ctemp[j] = []
			for k in range(0, self.nFeatures):
				ctemp[j].append(0.0) # init sums
			ctemp[j].append(0) # init counter
		# only calculate clusters on training, not cross-validation set
		for i in range(0,self.size):
			for j in range(0, self.nFeatures):
				ctemp[self.dataClusterId[i]][j] += self.X[i][j]
			ctemp[self.dataClusterId[i]][self.nFeatures] += 1 # count
		for c in self.clusters:
			if ctemp[c][self.nFeatures] > 0:
				self.clusters[c] = [ ctemp[c][k]/ctemp[c][self.nFeatures] for k in range(0,self.nFeatures)]
			else:
				# no members in this cluster
				pass
		return

	def run(self, nmax = 100, eps = 1e-7):
		prev = 0.0
		prevXVal = float(sys.maxsize)
		for iter in range(0,nmax):
			# update assignments
			self.assign()
			# calculate error
			err, errXVal = self.error()
			#
			if self.stop and errXVal - prevXVal >= 0.0:
				sys.stderr.write("Cross-validation error increasing at step %d\n"%iter)
				break
			prevXVal = errXVal
			#
			if abs(err-prev) < eps:
				sys.stderr.write("Tolerance reached at step %d\n"%iter)
				break
			prev = err
			# going on...
			self.errorRecord.append((iter, err, errXVal))
			self.output(str(iter))
			self.updateClusters()
		sys.stderr.write("Iterations completed: %d\n"%iter)
		sys.stderr.write("Final error: %f\n"%prev)
		sys.stderr.write("Final cross-validation error: %f\n"%prevXVal)
		# This is a step past stop if using cross-validation...
		self.output("Final")
		return err, errXVal

	def output(self, iter):
		for i in range(0,self.size):
			self.record.append([str(y) for y in self.X[i]] + [str(self.dataClusterId[i])] + ["Iter-%s"%iter])
		for i in range(self.size, self.allSize):
			self.record.append([str(y) for y in self.X[i]] + [str(self.dataClusterId[i])] + ["Xval-Iter-%s"%iter])
		for k in self.clusters:
			self.record.append([str(y) for y in self.clusters[k]] + [str(k)] + ["Cent-Iter-%s"%iter])

	def getOutput(self):
		for x in self.record:
			yield x

	def getErrors(self):
		for x in self.errorRecord:
			yield x


class dpmeans(kmeans):
	def __init__(self, _X, _lam = 1, _xVal = 0, _stop=False):
		# init k-means with 1 cluster
		kmeans.__init__(self, _X, 1, _xVal, _stop)
		self.lam = _lam

	def assign(self):
		for i in range(0, self.size):
			cidx, dmin = self.nearestCluster(self.X[i])
			if dmin > self.lam:
				self.k += 1
				self.clusters[self.k-1] = self.X[i]
				self.dataClusterId[i] = self.k - 1
			else:
				self.dataClusterId[i] = cidx
		# don't create new clusters on cross-validation data
		for i in range(self.size, self.allSize):
			self.dataClusterId[i], dmin = self.nearestCluster(self.X[i])

	def error(self):
		err, xValErr = kmeans.error(self)
		return err + self.lam * self.k, xValErr + self.lam * self.k

## KeyBERT Extractor

In [None]:
! pip install keybert

Collecting keybert
  Downloading keybert-0.8.4.tar.gz (29 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting sentence-transformers>=0.3.8 (from keybert)
  Downloading sentence_transformers-3.0.0-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.7/224.7 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers>=0.3.8->keybert)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers>=0.3.8->keybert)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence-transformers>=0.3.8->keybert)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->

In [None]:
from keybert import KeyBERT

# initiate BERT outside of functions
bert = KeyBERT()

def keybert_extractor(text):
    return bert.extract_keywords(text, keyphrase_ngram_range=(1, 1), stop_words="english", top_n=5, diversity=0.7)


  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

### Elastic

In [None]:
elastic_embeddings = {}
elastic_keywords = []
for i, description in enumerate(elastic['rule.description'].replace(np.nan, "")):
    rule_keywords = []
    if type(description) == str:
        elastic_embeddings[i] = {}
        keywords = keybert_extractor(description)
        for keyword, score in keywords:
            rule_keywords.append(keyword)
            elastic_embeddings[i][keyword] = score
    elastic_keywords.append(rule_keywords)


### Splunk

In [None]:
splunk_embeddings = {}
splunk_keywords = []
for i, description in enumerate(splunk['description'].replace(np.nan, "")):
    rule_keywords = []
    if type(description) == str:
        splunk_embeddings[i] = {}
        keywords = keybert_extractor(description)
        for keyword, score in keywords:
            rule_keywords.append(keyword)
            splunk_embeddings[i][keyword] = score
    splunk_keywords.append(rule_keywords)

## Combined grouping

In [None]:
def find_cve(row_str):
    cve_match = re.search('CVE.\d{4}.\d{4,7}', row_str)
    if cve_match:
        return cve_match.group(0)
    return None

def annotate_rules(rules_df, annotations, terms, labels={}, fields=[], prefix=''):
    for _, row in rules_df.iterrows():
        row_index = row.rule_index
        if prefix:
            row_index = prefix + str(row.rule_index)
        row_str = row.to_json()
        if fields:
            row_str = row[fields].to_json()
        for term in terms:
            regex_str = rf'\b{term}\b'
            term_match = re.search(regex_str, row_str, re.I)
            if term_match:
                if row_index not in annotations:
                    annotations[row_index] = set()
                if term in labels:
                    annotations[row_index].add(labels[term])
                else:
                    annotations[row_index].add(term_match.group(0).lower())

In [None]:
splunk_annotations = {}
elastic_annotations = {}

In [None]:
annotate_rules(splunk, splunk_annotations, ['CVE.\d{4}.\d{4,7}'] + list(software_df.software.values) + list(groups_df.group.values) + malware_families)
annotate_rules(elastic, elastic_annotations, ['CVE.\d{4}.\d{4,7}'] + list(software_df.software.values) + list(groups_df.group.values) + malware_families)

In [None]:
annotate_rules(splunk, splunk_annotations, ['Signed Binary'])
annotate_rules(elastic, elastic_annotations, ['Signed Binary'])

annotate_rules(splunk, splunk_annotations, ['CloudTrail'])
annotate_rules(elastic, elastic_annotations, ['CloudTrail'])

annotate_rules(splunk, splunk_annotations, ['Kubernetes'])
annotate_rules(elastic, elastic_annotations, ['Kubernetes'])

annotate_rules(splunk, splunk_annotations, ['sudo'])
annotate_rules(elastic, elastic_annotations, ['sudo'])

annotate_rules(splunk, splunk_annotations, ['msbuild'])
annotate_rules(elastic, elastic_annotations, ['msbuild'])

annotate_rules(splunk, splunk_annotations, ['powershell'], fields=['name', 'description'])
annotate_rules(elastic, elastic_annotations, ['powershell'], fields=['rule.name', 'rule.description'])

annotate_rules(splunk, splunk_annotations, ['O365'])
annotate_rules(elastic, elastic_annotations, ['O365'])

annotate_rules(splunk, splunk_annotations, ['gsuite'], fields=['name'])
annotate_rules(elastic, elastic_annotations, ['gsuite'], fields=['rule.name'])

annotate_rules(splunk, splunk_annotations, ['github'], fields=['name'])
annotate_rules(elastic, elastic_annotations, ['github'], fields=['rule.name'])

annotate_rules(splunk, splunk_annotations, ['Elastic Endgame'], fields=['name', 'description'])
annotate_rules(elastic, elastic_annotations, ['Elastic Endgame'], fields=['rule.name', 'rule.description'])

annotate_rules(splunk, splunk_annotations, ['wmi', 'wmic'], labels={'wmic': 'wmi'}, fields=['name', 'description'])
annotate_rules(elastic, elastic_annotations, ['wmi', 'wmic'], labels={'wmic': 'wmi'}, fields=['rule.name', 'rule.description'])

annotate_rules(splunk, splunk_annotations, ['lsass'], fields=['name', 'description'])
annotate_rules(elastic, elastic_annotations, ['lsass'], fields=['rule.name', 'rule.description'])

annotate_rules(splunk, splunk_annotations, ['solarwinds', 'solarwind', 'solorigate'], labels={'solarwind': 'solarwinds'}, fields=['name', 'description'])
annotate_rules(elastic, elastic_annotations, ['solarwinds', 'solarwind', 'solorigate'], labels={'solarwind': 'solarwinds'}, fields=['rule.name', 'rule.description'])

annotate_rules(splunk, splunk_annotations, ['GTFOBin'])
annotate_rules(elastic, elastic_annotations, ['GTFOBin'])

annotate_rules(splunk, splunk_annotations, ['rundll32'])
annotate_rules(elastic, elastic_annotations, ['rundll32'])

annotate_rules(splunk, splunk_annotations, ['net.exe'])
annotate_rules(elastic, elastic_annotations, ['net.exe'])

annotate_rules(splunk, splunk_annotations, ['lolbin'], fields=['name', 'description'])
annotate_rules(elastic, elastic_annotations, ['lolbin'], fields=['rule.name', 'rule.description'])

annotate_rules(splunk, splunk_annotations, ['regasm'], fields=['name', 'description'])
annotate_rules(elastic, elastic_annotations, ['regasm'], fields=['rule.name', 'rule.description'])

In [None]:
all_annotations = {}
for key, value in elastic_annotations.items():
    all_annotations[str(key)] = value
for key, value in splunk_annotations.items():
    all_annotations[str(key)] = value

In [None]:
splunk['annotations'] = pd.Series(splunk_annotations)
elastic['annotations'] = pd.Series(elastic_annotations)

# Case Studies

In [None]:
combined_rules_annotated_entities = pd.read_csv(os.path.join(data_filepath, "combined_rules_annotated_entities.csv")).set_index('rule_index')
combined_rules_annotated_entities['annotations'] = combined_rules_annotated_entities['annotations'].apply(lambda x: set([s.strip()[1:-1] for s in x[1:-1].split(',')]) if type(x) == str else set())
rule_label_df_index = set()
for annotations in combined_rules_annotated_entities['annotations']:
  for elem in annotations:
    rule_label_df_index.add(elem)
rule_label_df = pd.DataFrame(combined_rules_annotated_entities['annotations'].apply(lambda x: {item: 1 for item in x} if type(x) == set else np.NaN).to_dict(), index=list(rule_label_df_index)).T.fillna(0)

In [None]:
def print_cluster(label):
    rules = rule_label_df[rule_label_df[label] == 1].index
    for rule in rules:
        print(rule, combined_rules_annotated_entities.loc[rule]['rule'])
        print(combined_rules_annotated_entities.loc[rule]['description'])
        try:
            mitre_ids = json.loads(combined_rules_annotated_entities.loc[rule]['mitre_attack_id'].replace('\'', '"'))
            mitre_labels = [techniques[techniques.id == id].name.values[0] for id in mitre_ids]
            mitre_tactic_labels = [techniques[techniques.id == id].tactics.values[0] for id in mitre_ids]
            print(mitre_labels)
            print(mitre_tactic_labels)
            print()
        except:
            continue

In [None]:
print_cluster('cve-2021-4034')

e69 file where file.path : "/*GCONV_PATH*"

Identifies an attempt to exploit a local privilege escalation in polkit pkexec (CVE-2021-4034) via unsecure environment
variable injection. Successful exploitation allows an unprivileged user to escalate to the root user.

['Hijack Execution Flow (T1574)', 'Exploitation for Privilege Escalation (T1068)']
['persistence|privilege-escalation|defense-evasion', 'privilege-escalation']

s489 | tstats `security_content_summariesonly` count FROM datamodel=Endpoint.Processes where Processes.process_name=pkexec by _time Processes.dest Processes.process_id Processes.parent_process_name Processes.process_name Processes.process Processes.process_path | `drop_dm_object_name(Processes)` | `security_content_ctime(firstTime)` | `security_content_ctime(lastTime)` | regex process="(^.{1}$)" | `linux_pkexec_privilege_escalation_filter`
The following analytic identifies `pkexec` spawning with no command-line arguments. A vulnerability in Polkit's pkexec component

In [None]:
print_cluster('meterpreter')

e479 process where event.type == "start" and
 process.pe.original_file_name in ("Cmd.Exe", "PowerShell.EXE") and
 process.args : "echo" and process.args : ">" and process.args : "\\\\.\\pipe\\*"

Identifies a privilege escalation attempt via named pipe impersonation. An adversary may abuse this technique by
utilizing a framework such Metasploit's meterpreter getsystem command.

['Access Token Manipulation (T1134)']
['defense-evasion|privilege-escalation']

s229 | tstats `security_content_summariesonly` count min(_time) as firstTime max(_time) as lastTime from datamodel=Endpoint.Processes where `process_cmd` OR Processes.process=*%comspec%* (Processes.process=*echo* AND Processes.process=*pipe*) by Processes.dest Processes.user Processes.parent_process Processes.process_name Processes.original_file_name Processes.process Processes.process_id Processes.parent_process_id | `drop_dm_object_name(Processes)` | `security_content_ctime(firstTime)` | `security_content_ctime(lastTime)` | `cmd_ec

In [None]:
print_cluster('fin7')

e175 event.category:(network OR network_traffic) AND type:(tls OR http) AND network.transport:tcp AND destination.domain:/[a-z]{3}.stage.[0-9]{8}\..*/

Cobalt Strike is a threat emulation platform commonly modified and used by adversaries to conduct network attack and
exploitation campaigns. This rule detects a network activity algorithm leveraged by Cobalt Strike implant beacons for
command and control.

['Dynamic Resolution (T1568)', 'Application Layer Protocol (T1071)']
['command-and-control', 'command-and-control']

e177 event.category:(network or network_traffic) and network.protocol:http and
  (url.extension:(ps1 or rar) or url.path:(*.ps1 or *.rar)) and
    not destination.ip:(
      10.0.0.0/8 or
      127.0.0.0/8 or
      169.254.0.0/16 or
      172.16.0.0/12 or
      192.0.0.0/24 or
      192.0.0.0/29 or
      192.0.0.8/32 or
      192.0.0.9/32 or
      192.0.0.10/32 or
      192.0.0.170/32 or
      192.0.0.171/32 or
      192.0.2.0/24 or
      192.31.196.0/24 or
      192.52

In [None]:
print_cluster('cve-2021-34527')

e485 file where event.type : "deletion" and
 not process.name : ("spoolsv.exe", "dllhost.exe", "explorer.exe") and
 file.path : "?:\\Windows\\System32\\spool\\drivers\\x64\\3\\*.dll"

Detects deletion of print driver files by an unusual process. This may indicate a clean up attempt post successful
privilege escalation via Print Spooler service related vulnerabilities.

['Exploitation for Privilege Escalation (T1068)']
['privilege-escalation']

e500 process where event.type == "start" and
 process.parent.name : "spoolsv.exe" and
 (?process.Ext.token.integrity_level_name : "System" or
 ?winlog.event_data.IntegrityLevel : "System") and

 /* exclusions for FP control below */
 not process.name : ("splwow64.exe", "PDFCreator.exe", "acrodist.exe", "spoolsv.exe", "msiexec.exe", "route.exe", "WerFault.exe") and
 not process.command_line : "*\\WINDOWS\\system32\\spool\\DRIVERS*" and
 not (process.name : "net.exe" and process.command_line : ("*stop*", "*start*")) and
 not (process.name : ("cmd.e