Skip to content

Commit

Permalink
Interactive 3D scatter plot in jupyter. Banjax log in barplots
Browse files Browse the repository at this point in the history
  • Loading branch information
mazhurin committed Mar 9, 2016
1 parent 565922d commit 6910eb7
Show file tree
Hide file tree
Showing 6 changed files with 942 additions and 528 deletions.
3 changes: 2 additions & 1 deletion requirements.txt
Expand Up @@ -7,4 +7,5 @@ certifi
ua_parser
bokeh
pandas
pycountry
pycountry
plotly
677 changes: 422 additions & 255 deletions src/.ipynb_checkpoints/Clustering-checkpoint.ipynb

Large diffs are not rendered by default.

605 changes: 421 additions & 184 deletions src/Clustering.ipynb

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions src/bothound_tools.py
Expand Up @@ -22,7 +22,8 @@ def connect_to_db(self):
learn2bantools instance and will be used to save data back to the db
"""
self.db = MySQLdb.connect(host = self.db_host, user = self.db_user,
passwd = self.db_password,port = self.db_port)
passwd = self.db_password,port = self.db_port, charset='utf8',
use_unicode=True)

#Create cursor object to allow query execution
self.cur = self.db.cursor(MySQLdb.cursors.DictCursor)
Expand Down Expand Up @@ -209,7 +210,7 @@ def factorize_countries(self, ip_feature_db):

country_name = ""
if (c is not None):
country_name = c.name
country_name = c.name.encode('ascii','ignore')
self.cur.execute("insert into countries(code, name) values ('{}', '{}')".format(country_code, country_name))
ids[country_code] = self.cur.lastrowid
features[feature_index] = self.cur.lastrowid
Expand Down
2 changes: 1 addition & 1 deletion src/session_computer.py
Expand Up @@ -124,7 +124,7 @@ def process_incident(self, incident):
#print ip_feature_db
self.bothound_tools.add_sessions(incident['id'], ip_feature_db, banned_ips)
self.bothound_tools.set_incident_process(incident['id'], False)

print "Incident {} processed.".format(incident['id'])
return ip_feature_db

def extract(self):
Expand Down
178 changes: 93 additions & 85 deletions src/util/es_handler.py
Expand Up @@ -99,53 +99,57 @@ def get(self, start, stop, target):
print >>f1, indexes
print >>f1, ts_start
print >>f1, ts_stop
"""
#pdb.set_trace()
page = self.es.search(index=indexes,
scroll = '5m',
#search_type = 'scan',
size = 10000,
body = es_body
#add index between the quotation marks
)
"""
result = []
sid = page['_scroll_id']
page_index = 0
scroll_size = page['hits']['total']
print "total # of hits : ", scroll_size
# Start scrolling

num_processed = 0
while (scroll_size > 0):
print "Scrolling...", page_index
# Do something with the obtained page
json_result = page['hits']['hits']
try:
#pdb.set_trace()
page = self.es.search(index=indexes,
scroll = '5m',
#search_type = 'scan',
size = 10000,
body = es_body
#add index between the quotation marks
)

for log in json_result:
#print log['_source']['@timestamp']
cur_rec_dict = util.es_log_muncher.parse_es_json_object(log)
if cur_rec_dict:
cur_ats_rec = ATSRecord(cur_rec_dict);
#print cur_ats_rec.payload['time']
result.append(cur_ats_rec);
num_processed = num_processed + 1

print "num_processed: " + str(num_processed)
if(num_processed > 5000000):
break
sid = page['_scroll_id']
page_index = 0
scroll_size = page['hits']['total']
print "total # of hits : ", scroll_size
# Start scrolling

page_index = page_index + 1
tStart = datetime.datetime.now()
page = self.es.scroll(scroll_id = sid, scroll = '5m')
print "scroll time ,sec:", (datetime.datetime.now() - tStart).total_seconds()
num_processed = 0
while (scroll_size > 0):
print "Scrolling...", page_index
# Do something with the obtained page
json_result = page['hits']['hits']
#pdb.set_trace()

# Update the scroll ID
sid = page['_scroll_id']
# Get the number of results that we returned in the last scroll
scroll_size = len(page['hits']['hits'])
#print "scroll size: " + str(scroll_size)

for log in json_result:
#print log['_source']['@timestamp']
cur_rec_dict = util.es_log_muncher.parse_es_json_object(log)
if cur_rec_dict:
cur_ats_rec = ATSRecord(cur_rec_dict);
#print cur_ats_rec.payload['time']
result.append(cur_ats_rec);
num_processed = num_processed + 1

print "num_processed: " + str(num_processed)
if(num_processed > 5000000):
break

page_index = page_index + 1
tStart = datetime.datetime.now()
page = self.es.scroll(scroll_id = sid, scroll = '5m')
print "scroll time ,sec:", (datetime.datetime.now() - tStart).total_seconds()

# Update the scroll ID
sid = page['_scroll_id']
# Get the number of results that we returned in the last scroll
scroll_size = len(page['hits']['hits'])
#print "scroll size: " + str(scroll_size)
except Exception as ex:
print ex

return result

def get_banjax(self, start, stop, target):
Expand All @@ -164,7 +168,7 @@ def get_banjax(self, start, stop, target):
#indexes = [start.strftime('deflect.log-%Y.%m.*')]
#if(start.month != stop.month):
#indexes.append(stop.strftime('deflect.log-%Y.%m.*'))
print "es.search() start..."
print "es.search() start banjax..."
if (target is not None) :
es_body = {
"from" : 0, "size" : 10000,
Expand Down Expand Up @@ -224,53 +228,57 @@ def get_banjax(self, start, stop, target):
print >>f1, ts_start
print >>f1, ts_stop
"""
#pdb.set_trace()
page = self.es.search(index=indexes,
scroll = '5m',
#search_type = 'scan',
size = 10000,
body = es_body
#add index between the quotation marks
)
result = []
sid = page['_scroll_id']
page_index = 0
total_size = page['hits']['total']
print "total # of Banjax hits : ", total_size
# Start scrolling
#pdb.set_trace()
num_processed = 0
scroll_size = total_size
result = {}
while (scroll_size > 0):
print "Scrolling...", page_index
try:
#pdb.set_trace()
page = self.es.search(index=indexes,
scroll = '5m',
#search_type = 'scan',
size = 10000,
body = es_body
#add index between the quotation marks
)
result = []
sid = page['_scroll_id']
page_index = 0
total_size = page['hits']['total']
print "total # of Banjax hits : ", total_size
# Start scrolling
#pdb.set_trace()
num_processed = 0
scroll_size = total_size

while (scroll_size > 0):
print "Scrolling banjax...", page_index

json_result = page['hits']['hits']
for log in json_result:
src = log["_source"]
if "client_ip" not in src:
continue
v = {}
if "rule_type" in src:
v['rule'] = src['rule_type']

result[src['client_ip']] = v
json_result = page['hits']['hits']
for log in json_result:
src = log["_source"]
if "client_ip" not in src:
continue
v = {}
if "rule_type" in src:
v['rule'] = src['rule_type']
result[src['client_ip']] = v

print "num_processed: " + str(num_processed)
if(num_processed > 5000000):
break
print "num_processed: " + str(num_processed)
if(num_processed > 5000000):
break

page_index = page_index + 1
tStart = datetime.datetime.now()
page = self.es.scroll(scroll_id = sid, scroll = '2m')
print "scroll time ,sec:", (datetime.datetime.now() - tStart).total_seconds()
page_index = page_index + 1
tStart = datetime.datetime.now()
page = self.es.scroll(scroll_id = sid, scroll = '2m')
print "scroll time ,sec:", (datetime.datetime.now() - tStart).total_seconds()

# Update the scroll ID
sid = page['_scroll_id']
# Get the number of results that we returned in the last scroll
scroll_size = len(page['hits']['hits'])
print "scroll size: " + str(scroll_size)
except Exception as ex:
print ex

# Update the scroll ID
sid = page['_scroll_id']
# Get the number of results that we returned in the last scroll
scroll_size = len(page['hits']['hits'])
print "scroll size: " + str(scroll_size)

return result


Expand Down

0 comments on commit 6910eb7

Please sign in to comment.