forked from Soinull/assimilate
-
Notifications
You must be signed in to change notification settings - Fork 0
/
assimilate-assess.py
105 lines (91 loc) · 4.45 KB
/
assimilate-assess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#
# Assimilate-Assess.py
# Copyright 2017 Tim Crothers
# Credit for the excellent BroLogReader code is to Mike Sconzo - https://github.com/ClickSecurity/data_hacking/blob/master/browser_fingerprinting/bro_log_reader.py
#
import os, io, csv, datetime, itertools
import numpy
from sklearn.externals import joblib
from pandas import DataFrame
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from optparse import OptionParser
from assimilate_utils import BroLogReader
if __name__ == "__main__":
__version__ = '1.0'
usage = """assimilate-assess [options] bro_http_header_file"""
parser = OptionParser(usage=usage, version=__version__)
parser.add_option("-f", "--headerfile", action="store", type="string", \
default=None, help="the Bro HTTP Header file to analyze")
parser.add_option("-d", "--dirheaderfiles", action="store", type="string", \
default=None, help="directory of Bro HTTP Header files to analyze")
parser.add_option("-b", "--bayesianfile", action="store", type="string", \
default='./nb.pkl', help="the location to load the bayesian classifier")
parser.add_option("-x", "--vectorizerfile", action="store", type="string", \
default='./vectorizers.pkl', help="the location to load the vectorizer")
parser.add_option("-o", "--outputfile", action="store", type="string", \
default=None, help="the file to store results in")
parser.add_option("-v", "--verbose", action="store_true", default=False, \
help="enable verbose output")
(opts, args) = parser.parse_args()
if (opts.headerfile == None) & (opts.dirheaderfiles == None):
parser.error('Need either a bro_http_header_file or a directory of bro_header_files to assess')
blr = BroLogReader()
data = DataFrame({'header': [], 'class': []})
header_rows = []
vectorizer = CountVectorizer()
counts = vectorizer
classifier = MultinomialNB()
print('Loading models...')
classifier = joblib.load(opts.bayesianfile)
vectorizer = joblib.load(opts.vectorizerfile)
if opts.headerfile != None:
print('Assessing HTTP Header file...')
header_rows = blr.dataFrameFromFile(opts.headerfile)
rowindex = 1
if opts.outputfile != None:
of = open(opts.outputfile, "w")
for r1 in header_rows:
if opts.verbose:
print("Checking line "+str(rowindex))
indhdr = [r1['header']]
tstcounts = vectorizer.transform(indhdr)
predictions = classifier.predict(tstcounts)
if predictions[0] == 'bad':
if len(r1['header']) > 60:
print("Line "+str(rowindex)+" looks suspicious: "+r1['header'][:60])
else:
print("Line "+str(rowindex)+" looks suspicious: "+r1['header'])
if opts.outputfile != None:
of.write("Line "+str(rowindex)+" looks suspicious: "+r1['header']+"\n")
rowindex += 1
if opts.outputfile != None:
of.close()
print('Done!')
else:
print('Assessing directory '+opts.dirheaderfiles+'...')
header_rows = blr.AssessdataFrameFromDirectory(opts.dirheaderfiles)
rowindex = 1
fn = header_rows[0]['filename']
if opts.outputfile != None:
of = open(opts.outputfile, "w")
for r1 in header_rows:
if fn != r1['filename']:
rowindex = 1
fn = r1['filename']
if opts.verbose:
print("Checking file "+r1['filename']+" line "+str(rowindex)+" of file "+r1['filename'])
indhdr = [r1['header']]
tstcounts = vectorizer.transform(indhdr)
predictions = classifier.predict(tstcounts)
if predictions[0] == 'bad':
if len(r1['header']) > 40:
print("File "+r1['filename']+" Line "+str(rowindex)+" looks suspicious: "+r1['header'][:40])
else:
print("File "+r1['filename']+" Line "+str(rowindex)+" looks suspicious: "+r1['header'])
if opts.outputfile != None:
of.write("File "+r1['filename']+" Line "+str(rowindex)+" looks suspicious: "+r1['header']+"\n")
rowindex += 1
if opts.outputfile != None:
of.close()
print('Done!')