-
Notifications
You must be signed in to change notification settings - Fork 14
/
dhparser.py
367 lines (330 loc) · 21.5 KB
/
dhparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
from plistlib import InvalidFileException
import sys
import os
import argparse
import io
import json
import binascii
import re
import time
from datetime import datetime, timedelta
from typing import final
sys.path.append("./")
# DEFINE COMMAND LINE ARGS
arg_parser = argparse.ArgumentParser(description='Parse the contents of the given Windows Defender DetectionHistory file(s) into a readable format.')
arg_parser.version = "DetectionHistory Parser v1.0.1. Thank you for using my parser!"
arg_parser.add_argument('-f',
'--file',
action='store',
help='Path of the file/directory you are parsing.',
required=True)
arg_parser.add_argument('-g',
'--greedy',
action='store_true',
help='(CAUTION) Intended for use with recursive option. Will grab all files without an extension in a directory, even if they do not match Windows\' naming convention of DetectionHistory files.',
required=False)
arg_parser.add_argument('-o',
'--output',
action='store',
help='Desired output folder name.',
required=True)
arg_parser.add_argument('-r',
'--recursive',
action='store_true',
help='Recursive input. MUST SPECIFY if you need to parse a (sub)directory of DetectionHistory files. Only picks up files which match Windows\' naming convention of DetectionHistory files.',
required=False)
arg_parser.add_argument('-v',
'--verbose',
action='store_true',
help='Includes additional output in command line for each file processed, including non-critical/normal warnings. Without this enabled, confirmation of finished processing and critical errors will still be shown.',
required=False)
arg_parser.add_argument('-z',
'--version',
action='version',
help='Displays the version of the application and exits.')
def byte_swap_to_int(hexval, str_flag=False):
# Reads in bytes, swaps endianness, and converts to an integer if needed.
# str_flag gives us the value to return an endian-swapped, hex-string representation of a value. For some fields, this is more appropriate.
hex_int = 0
hexval = [hexval[i:i+2] for i in range(0, len(hexval), 2)] # need to swap endianness of filetime
hexval.reverse()
hexval = b''.join(hexval)
if str_flag: # stopping point if only hex-string needed
return hexval.decode('utf-8')
hexval = [int("0x"+(hexval[i:i+1].decode('utf-8')),16) for i in range(0, len(hexval), 1)]
exponent = len(hexval)-1
while exponent>=0:
hex_int += hexval[abs((len(hexval)-1)-exponent)]*(pow(16,exponent))
exponent += -1
return hex_int
def parse_header_and_guid(file):
# Validates that file is a proper DetectionHistory file, and returns parsed GUID value located directly after header.
header = file.read(6) # 6 = known DetectionHistory header size
if header != b'\x08\x00\x00\x00\x08\x00': # check file header against known valid DetectionHistory file header
print("Invalid DetectionHistory file!")
raise(InvalidFileException)
file.read(18) # skipping over some zeroes, and an unknown 3-byte sequence between offset 08-0A
guid_oct = list()
guid_oct.append(binascii.hexlify(file.read(4)))
guid_oct.append(binascii.hexlify(file.read(2)))
guid_oct.append(binascii.hexlify(file.read(2)))
guid_oct.append(binascii.hexlify(file.read(2))) # this hex is not flipped in file data
guid_oct.append(binascii.hexlify(file.read(6))) # this hex is not flipped in file data
oct_count = 0
while oct_count<=2:
oct = guid_oct[oct_count]
newlist = [oct[i:i+2] for i in range(0, len(oct), 2)]
newlist.reverse()
guid_oct[oct_count] = b''.join(newlist)
oct_count += 1
guid_final = (guid_oct[0].decode('utf-8')+"-"
+guid_oct[1].decode('utf-8')+"-"
+guid_oct[2].decode('utf-8')+"-"
+guid_oct[3].decode('utf-8')+"-"
+guid_oct[4].decode('utf-8'))
return guid_final
def parse_filetime(file):
# Parse known filetime hex string to a readable timestamp.
filetime = ""
filetime_nanoseconds = 0
file.read(4) # skip ahead known distance between "Time" field and FILETIME timestamp
filetime = binascii.hexlify(file.read(8))
filetime_nanoseconds = byte_swap_to_int(filetime)
filetime_epoch = timedelta(microseconds=float(filetime_nanoseconds/10)) # time represented in hundreds of nanoseconds
filetime_date = datetime(1601, 1, 1)+filetime_epoch # FILETIME begins on Jan 1, 1601
return filetime_date.strftime("%m-%d-%Y %H:%M:%S")
def parse_unmapped_value(file):
# Decoding bytes of high hex vals can result in unmapped chars. Returns bytes of some unknown hex string.
# Based on Windows specs, ThreatID should be an Int64, though this does not always seem to be the case.
# https://docs.microsoft.com/en-us/powershell/module/defender/get-mpthreatdetection?view=windowsserver2022-ps
# https://www.windows-security.org/c328023496d244ced5d0c4445e4f1806/threat-id-exclusions
unmapped_val = ""
chunk = file.read(1) # initial byte to perform checks off of in loop
while True: # loop that checks for beginning of unmapped value
if chunk==b'\x00':
chunk = chunk+file.read(1)
if chunk==b'\x00\x00':
chunk = chunk+file.read(1)
if chunk==b'\x00\x00\x00':
break
else:
chunk = file.read(1) # go to next byte
caution_sequences = [b'\x00', b'\x32', b'\x24', b'\x04', b'\x06'] # hex bytes observed in file which are known to delimit data from empty bytes
unmapped_val = file.read(3) # Windows should always allocate at least this much for an unknown hex string in this file.
chunk = file.read(1)
while True: # loop to read in rest of unmapped value, as well as check for ending point
while chunk in caution_sequences:
next_chunk = file.read(1)
if next_chunk==b'\x00':
return binascii.hexlify(unmapped_val)
chunk = next_chunk
# progress to this code if chunk not in 'caution_sequences'
unmapped_val += chunk
chunk = file.read(1)
return 0 # should never get to this point
def parse_detection_history(user_in, user_out):
# Main function to parse given DetectionHistory file and write output fields to a readable file.
filepath = user_in[0] # file directory itself
outfile_name = user_in[1] # filename used for outfile writing
outfolder = user_out
parsed_value_dict = dict()
# DEFINE MODES
KEY_READ_MODE = b'\x00'
VALUE_READ_MODE = b'\x01'
NULL_DATA_MODE = b'\x02'
CURRENT_MODE = KEY_READ_MODE
LAST_READ_MODE = b'\xFF'
# DEFINE SECTIONS
MAGIC_VERSION_SECTION = b'\x04'
GENERAL_SECTION = b'\x05'
NEAREST_EOF_SECTION = b'\x06'
# EXTRA NEEDED VARIABLES
EOF_SECTION_KEYS = ["User","SpawningProcessName","SecurityGroup"] # Fields in this section not explicity defined in file, so fields named off based off current knowledge
CURRENT_EOF_SECTION_KEY = -1
with open(filepath, 'rb') as f:
parsed_value_dict["GUID"] = parse_header_and_guid(f)
f.read(8) # skipping over some empty space
temp_key = ""
while True:
while MAGIC_VERSION_SECTION:
chunk = f.read(2)
if not chunk:
print("End of section or file detected. Moving on...")
MAGIC_VERSION_SECTION = 0 # break out of this section
break
if CURRENT_MODE==KEY_READ_MODE:
if chunk==b'\x3A\x00': # first few sections are delimited by a Windows-1252 colon rather than multiple \x00 bytes
temp_key = re.sub("\x00", "", temp_key)
parsed_value_dict[temp_key] = "" # we will reset temp_key after setting the value
CURRENT_MODE = VALUE_READ_MODE
else:
temp_key = temp_key+chunk.decode('windows-1252')
if "f\x00i\x00l\x00e" in temp_key: # file key/value pair signifies end of values delimited by colons (or \x3A)
print("End of Magic Version section!")
temp_key = re.sub("\x00", "", temp_key)
parsed_value_dict[temp_key] = "" # make sure "file" key gets assigned
CURRENT_MODE = VALUE_READ_MODE
MAGIC_VERSION_SECTION = 0 # break out of this section
f.read(16) # skip some zeroes to next section
elif CURRENT_MODE==VALUE_READ_MODE:
if chunk==b'\x00\x00': # if chunk to be read is empty
if f.read(2)==b'\x00\x00': # if next chunk empty as well
parsed_value_dict[temp_key] = re.sub("\x00", "", parsed_value_dict[temp_key]) # finalize value
temp_key = "" # reset temp key for next run
CURRENT_MODE=NULL_DATA_MODE
else:
parsed_value_dict[temp_key] += chunk.decode('windows-1252')
elif CURRENT_MODE==NULL_DATA_MODE:
if f.tell()==242:
parsed_value_dict["ThreatStatusID"] = byte_swap_to_int(binascii.hexlify(chunk))
if len(re.sub(r'\W+', '', chunk.decode('windows-1252')))>=1: # regex function removes all non-alphanum characters
chunk = chunk+f.read(2) # double check if there are 2 alphanum chars in sequence. sometimes there are isolated, irrelevant hex values in file which are encodable chars
if len(re.sub(r'\W+', '', chunk.decode('windows-1252')))>=2:
temp_key += chunk.decode('windows-1252')
CURRENT_MODE = KEY_READ_MODE
while GENERAL_SECTION:
chunk = f.read(2)
if not chunk:
print("End of section or file detected. Moving on...")
GENERAL_SECTION = 0 # break out of this section
break
elif CURRENT_MODE==NULL_DATA_MODE:
if len(re.sub(r'\W+', '', chunk.decode('windows-1252')))>=1: # regex function removes all non-alphanum characters
chunk = chunk+f.read(2) # double check if there are 2 alphanum chars in sequence. sometimes there are isolated, irrelevant hex values in file which are encodable chars
if len(re.sub(r'\W+', '', chunk.decode('windows-1252')))>=2:
if LAST_READ_MODE==KEY_READ_MODE: # we need to switch back and forth between key and value reading
parsed_value_dict[temp_key] += chunk.decode('windows-1252')
CURRENT_MODE=VALUE_READ_MODE
else:
temp_key += chunk.decode('windows-1252')
CURRENT_MODE=KEY_READ_MODE
elif chunk==b'\x0A\x00' or chunk==b'\x00\x0A':
f.read(2)
chunk = f.read(4)
if chunk==b'\x15\x00\x00\x00' or chunk==chunk==b'\x00\x15\x00\x00': # false positives
continue
print("End of General Section!")
f.read(4) # skip over unneeded section of hex
GENERAL_SECTION = 0 # break out of this section
else: # Applies to KEY_READ or VALUE_READ mode
if chunk==b'\x00\x00': # Check to switch to NULL_MODE must happen in either KEY_READ or VALUE_READ mode
if CURRENT_MODE==KEY_READ_MODE:
temp_key = re.sub("\x00", "", temp_key)
if "Magic." in temp_key[0:6]:
print("WARNING: Extraneous \"Magic Version\" key detected! Continuing...")
temp_key = "" # reset for next KEY_READ_MODE run
LAST_READ_MODE = VALUE_READ_MODE # skip over this key, read in next key
elif "Time" in temp_key:
parsed_value_dict[temp_key] = parse_filetime(f)
temp_key = "" # reset for next KEY_READ_MODE run
LAST_READ_MODE = VALUE_READ_MODE # value just set, read in next key
elif "ThreatTrackingThreatId" in temp_key or "ThreatTrackingSize" in temp_key:
parsed_value_dict[temp_key] = byte_swap_to_int(parse_unmapped_value(f))
temp_key = "" # reset for next KEY_READ_MODE run
LAST_READ_MODE = VALUE_READ_MODE # value just set, read in next key
elif "ThreatTrackingSigSeq" in temp_key:
parsed_value_dict[temp_key] = "0x0000"+byte_swap_to_int(parse_unmapped_value(f), str_flag=True)
temp_key = "" # reset for next KEY_READ_MODE run
LAST_READ_MODE = VALUE_READ_MODE # value just set, read in next key
else:
parsed_value_dict[temp_key] = "" # we will reset temp_key after setting the value in VALUE_READ_MODE
LAST_READ_MODE = KEY_READ_MODE
CURRENT_MODE = NULL_DATA_MODE
elif CURRENT_MODE==VALUE_READ_MODE:
final_value = re.sub("\x00", "", parsed_value_dict[temp_key])
if "Threat" in final_value[0:6] or "regkey" in final_value[0:6]: # check for values that should be keys
print(f"WARNING: Irregularity in file/empty value caused skip in parsing for keys \"{temp_key}\" and \"{final_value}\". Configuring...")
parsed_value_dict[temp_key] = "" # reset extraneous value for temp_key
parsed_value_dict[final_value] = "" # this value containing "Threat" or "regkey" should have been a key
temp_key = final_value # set the final_value to the new key
LAST_READ_MODE = KEY_READ_MODE # for that key, collect the next value
if "ThreatTrackingThreatId" in temp_key or "ThreatTrackingSize" in temp_key:
parsed_value_dict[temp_key] = byte_swap_to_int(parse_unmapped_value(f))
temp_key = "" # reset for next KEY_READ_MODE run
LAST_READ_MODE = VALUE_READ_MODE
else: # when working as intended
parsed_value_dict[temp_key] = final_value # finalize value
LAST_READ_MODE = VALUE_READ_MODE
temp_key = "" # reset temp key for next KEY_READ_MODE
CURRENT_MODE = NULL_DATA_MODE
elif CURRENT_MODE==KEY_READ_MODE:
temp_key += chunk.decode('windows-1252')
elif CURRENT_MODE==VALUE_READ_MODE:
parsed_value_dict[temp_key] += chunk.decode('windows-1252')
while NEAREST_EOF_SECTION:
chunk = f.read(2)
if not chunk: # indicates EOF
NEAREST_EOF_SECTION = 0 # break out of this section
break
elif CURRENT_MODE==NULL_DATA_MODE:
if chunk==b'\x0A\x00' or chunk==b'\x00\x0A': # track lines with "\x0A" bytes. This byte delimits an unknown hex sequence that (for now) we can skip
f.read(10)
continue
try:
if len(re.sub(r'\W+', '', chunk.decode('windows-1252')))>=1 and b'\x00' in chunk: # regex function removes all non-alphanum characters
chunk_extra = f.read(2) # we'll want to make sure these are windows-1252 valid bytes as well
chunk += chunk_extra # double check if there are 2 alphanum chars in sequence. sometimes there are isolated, irrelevant hex values in file which are encodable chars
if len(re.sub(r':(?=..(?<!\d:\d\d))|[^a-zA-Z0-9 ](?<!:)', '', chunk.decode('windows-1252')))>=2 and b'\x00' in chunk_extra: # ensure colons are treated as alphanum chars with regex
CURRENT_EOF_SECTION_KEY=CURRENT_EOF_SECTION_KEY+1
parsed_value_dict[EOF_SECTION_KEYS[CURRENT_EOF_SECTION_KEY]] = chunk.decode('windows-1252')
CURRENT_MODE=VALUE_READ_MODE
except UnicodeDecodeError as e:
print(f"WARNING: ||{e}|| caught for bytes {chunk} : Unreadable hex pattern identified. Continuing...")
elif CURRENT_MODE==VALUE_READ_MODE:
if chunk==b'\x00\x00': # Check to switch to NULL_MODE
parsed_value_dict[EOF_SECTION_KEYS[CURRENT_EOF_SECTION_KEY]] = re.sub("\x00", "", parsed_value_dict[EOF_SECTION_KEYS[CURRENT_EOF_SECTION_KEY]])
CURRENT_MODE=NULL_DATA_MODE
else:
parsed_value_dict[EOF_SECTION_KEYS[CURRENT_EOF_SECTION_KEY]] += chunk.decode('windows-1252')
break
print(f"EOF in \"{filepath}\" detected!")
if not os.path.exists(outfolder):
os.makedirs(outfolder)
with open(f"{outfolder}\\{outfile_name}.json", 'w') as out:
json.dump(parsed_value_dict, out, indent=4)
print("\n---------------------------------------")
def main():
print("\n---------------------------------------\nDetectionHistory Parser v1.0.1 by Jordan Klepser\nhttps://github.com/jklepsercyber/\n---------------------------------------\n")
args = arg_parser.parse_args()
list_files = []
error_count = 0
unverbose = io.StringIO()
if args.recursive and os.path.isdir(args.file): # search directory for DetectionHistory files
for directory, dirnames, files in os.walk(args.file): # dirnames unused, but needs to be included for python's sake
for name in files:
if not "." in name and args.greedy: # grabs all files without extension
list_files.append([os.path.join(directory, name), name]) # save name, so we don't have to regex out filename during outfile writing later on
elif not "." in name and re.match(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}', name): # regex matches default DetectionHistory file naming convention
list_files.append([os.path.join(directory, name), name]) # save name, so we don't have to regex out filename during outfile writing later on
elif os.path.isfile(args.file):
list_files.append(args.file)
else:
print(f"Path \"{args.file}\" is not a valid file or directory. Please validate the desired input and try again. \n")
return
if any("\\ProgramData\\Microsoft\\Windows Defender\\" in pth[0] for pth in list_files):
print("""WARNING: '[root]\\ProgramData\\Microsoft\\Windows Defender\\' is a protected directory. Pointing DHParser there may result in an error or files not found. It is
recommended to first navigate to '[root]\ProgramData\Microsoft\Windows Defender\Scans\History\Service\DetectionHistory', using File Explorer if on Windows, to ensure
your user has access. Alternatively, copy out the '\\DetectionHistory\\' folder to a location of your choice, and point DHParser at that folder with '-r' enabled.\n""")
sys.stdout = unverbose if not args.verbose else sys.__stdout__
for f in list_files:
try:
if len(list_files)==1:
sys.stdout = sys.__stdout__
print(f"Found DetectionHistory file \"{os.path.basename(args.file)}\" at {os.path.normpath(args.file)}.")
sys.stdout = unverbose if not args.verbose else sys.__stdout__
parse_detection_history([os.path.normpath(args.file), os.path.basename(args.file)], args.output)
break
else:
sys.stdout = sys.__stdout__
print(f"Found DetectionHistory file \"{f[1]}\" at {f[0]}.")
sys.stdout = unverbose if not args.verbose else sys.__stdout__
parse_detection_history(f, args.output)
except Exception as e:
sys.stdout = sys.__stdout__
print(f"ERROR: ||{e}|| caught in {f[1]}. Moving on to next file...\n---------------------------------------")
sys.stdout = unverbose if not args.verbose else sys.__stdout__
error_count += 1
sys.stdout = sys.__stdout__
print(f"\n{len(list_files)-error_count} of {len(list_files)} DetectionHistory files found were successfully parsed, with output written to \"{args.output}\" in {time.process_time()} seconds.\n---------------------------------------")
if __name__ == "__main__":
main()