Interactive 3D scatter plot in jupyter. Banjax log in barplots

equalitie · Mar 9, 2016 · 6910eb7 · 6910eb7
1 parent 565922d
commit 6910eb7
Show file tree

Hide file tree

Showing 6 changed files with 942 additions and 528 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -7,4 +7,5 @@ certifi
 ua_parser
 bokeh
 pandas
-pycountry
+pycountry
+plotly
diff --git a/src/.ipynb_checkpoints/Clustering-checkpoint.ipynb b/src/.ipynb_checkpoints/Clustering-checkpoint.ipynb
diff --git a/src/Clustering.ipynb b/src/Clustering.ipynb
diff --git a/src/bothound_tools.py b/src/bothound_tools.py
@@ -22,7 +22,8 @@ def connect_to_db(self):
         learn2bantools instance and will be used to save data back to the db
         """
         self.db = MySQLdb.connect(host = self.db_host, user = self.db_user, 
-            passwd = self.db_password,port = self.db_port)
+            passwd = self.db_password,port = self.db_port, charset='utf8',
+            use_unicode=True)
 
         #Create cursor object to allow query execution
         self.cur = self.db.cursor(MySQLdb.cursors.DictCursor)
@@ -209,7 +210,7 @@ def factorize_countries(self, ip_feature_db):
 
                 country_name = ""
                 if (c is not None):
-                    country_name = c.name
+                    country_name = c.name.encode('ascii','ignore')
                 self.cur.execute("insert into countries(code, name) values ('{}', '{}')".format(country_code, country_name))
                 ids[country_code] = self.cur.lastrowid
                 features[feature_index] = self.cur.lastrowid

diff --git a/src/session_computer.py b/src/session_computer.py
@@ -124,7 +124,7 @@ def process_incident(self, incident):
         #print ip_feature_db
         self.bothound_tools.add_sessions(incident['id'], ip_feature_db, banned_ips)
         self.bothound_tools.set_incident_process(incident['id'], False)
-
+        print "Incident {} processed.".format(incident['id'])
         return ip_feature_db
 
     def extract(self):

diff --git a/src/util/es_handler.py b/src/util/es_handler.py
@@ -99,53 +99,57 @@ def get(self, start, stop, target):
         print >>f1, indexes
         print >>f1, ts_start
         print >>f1, ts_stop
-        """   
-        #pdb.set_trace()
-        page = self.es.search(index=indexes, 
-            scroll = '5m',
-            #search_type = 'scan',
-            size = 10000,
-            body = es_body
-            #add index between the quotation marks
-            )
+        """  
         result = []
-        sid = page['_scroll_id']
-        page_index = 0
-        scroll_size = page['hits']['total'] 
-        print "total # of hits : ", scroll_size
-        # Start scrolling
-
-        num_processed = 0
-        while (scroll_size > 0):
-            print "Scrolling...", page_index
-            # Do something with the obtained page
-            json_result = page['hits']['hits']
+        try: 
             #pdb.set_trace()
+            page = self.es.search(index=indexes, 
+                scroll = '5m',
+                #search_type = 'scan',
+                size = 10000,
+                body = es_body
+                #add index between the quotation marks
+                )
 
-            for log in json_result:
-                #print log['_source']['@timestamp']
-                cur_rec_dict = util.es_log_muncher.parse_es_json_object(log)
-                if cur_rec_dict:
-                    cur_ats_rec = ATSRecord(cur_rec_dict);
-                    #print cur_ats_rec.payload['time']
-                    result.append(cur_ats_rec);
-                    num_processed = num_processed +  1
-
-            print "num_processed: " + str(num_processed)
-            if(num_processed > 5000000):
-                break
+            sid = page['_scroll_id']
+            page_index = 0
+            scroll_size = page['hits']['total'] 
+            print "total # of hits : ", scroll_size
+            # Start scrolling
 
-            page_index = page_index + 1
-            tStart = datetime.datetime.now()
-            page = self.es.scroll(scroll_id = sid, scroll = '5m')
-            print "scroll time ,sec:", (datetime.datetime.now() - tStart).total_seconds()
+            num_processed = 0
+            while (scroll_size > 0):
+                print "Scrolling...", page_index
+                # Do something with the obtained page
+                json_result = page['hits']['hits']
+                #pdb.set_trace()
 
-            # Update the scroll ID
-            sid = page['_scroll_id']
-            # Get the number of results that we returned in the last scroll
-            scroll_size = len(page['hits']['hits'])
-            #print "scroll size: " + str(scroll_size)
-
+                for log in json_result:
+                    #print log['_source']['@timestamp']
+                    cur_rec_dict = util.es_log_muncher.parse_es_json_object(log)
+                    if cur_rec_dict:
+                        cur_ats_rec = ATSRecord(cur_rec_dict);
+                        #print cur_ats_rec.payload['time']
+                        result.append(cur_ats_rec);
+                        num_processed = num_processed +  1
+
+                print "num_processed: " + str(num_processed)
+                if(num_processed > 5000000):
+                    break
+
+                page_index = page_index + 1
+                tStart = datetime.datetime.now()
+                page = self.es.scroll(scroll_id = sid, scroll = '5m')
+                print "scroll time ,sec:", (datetime.datetime.now() - tStart).total_seconds()
+
+                # Update the scroll ID
+                sid = page['_scroll_id']
+                # Get the number of results that we returned in the last scroll
+                scroll_size = len(page['hits']['hits'])
+                #print "scroll size: " + str(scroll_size)
+        except Exception as ex:
+            print ex
+
         return result
 
     def get_banjax(self, start, stop, target):
@@ -164,7 +168,7 @@ def get_banjax(self, start, stop, target):
         #indexes = [start.strftime('deflect.log-%Y.%m.*')]
         #if(start.month != stop.month):
             #indexes.append(stop.strftime('deflect.log-%Y.%m.*'))
-        print "es.search() start..."
+        print "es.search() start banjax..."
         if (target is not None) :
             es_body = {
             "from" : 0, "size" : 10000,
@@ -224,53 +228,57 @@ def get_banjax(self, start, stop, target):
         print >>f1, ts_start
         print >>f1, ts_stop
         """   
-        #pdb.set_trace()
-        page = self.es.search(index=indexes, 
-            scroll = '5m',
-            #search_type = 'scan',
-            size = 10000,
-            body = es_body
-            #add index between the quotation marks
-            )
-        result = []
-        sid = page['_scroll_id']
-        page_index = 0
-        total_size = page['hits']['total'] 
-        print "total # of Banjax hits : ", total_size
-        # Start scrolling
-        #pdb.set_trace()
-        num_processed = 0
-        scroll_size = total_size
         result = {}
-        while (scroll_size > 0):
-            print "Scrolling...", page_index
+        try:
+            #pdb.set_trace()
+            page = self.es.search(index=indexes, 
+                scroll = '5m',
+                #search_type = 'scan',
+                size = 10000,
+                body = es_body
+                #add index between the quotation marks
+                )
+            result = []
+            sid = page['_scroll_id']
+            page_index = 0
+            total_size = page['hits']['total'] 
+            print "total # of Banjax hits : ", total_size
+            # Start scrolling
+            #pdb.set_trace()
+            num_processed = 0
+            scroll_size = total_size
+
+            while (scroll_size > 0):
+                print "Scrolling banjax...", page_index
 
-            json_result = page['hits']['hits']
-            for log in json_result:
-                src = log["_source"]
-                if "client_ip" not in src:
-                    continue
-                v = {}
-                if "rule_type" in src:
-                    v['rule'] = src['rule_type']
-
-                result[src['client_ip']] = v  
+                json_result = page['hits']['hits']
+                for log in json_result:
+                    src = log["_source"]
+                    if "client_ip" not in src:
+                        continue
+                    v = {}
+                    if "rule_type" in src:
+                        v['rule'] = src['rule_type']
+                    
+                    result[src['client_ip']] = v  
 
-            print "num_processed: " + str(num_processed)
-            if(num_processed > 5000000):
-                break
+                print "num_processed: " + str(num_processed)
+                if(num_processed > 5000000):
+                    break
 
-            page_index = page_index + 1
-            tStart = datetime.datetime.now()
-            page = self.es.scroll(scroll_id = sid, scroll = '2m')
-            print "scroll time ,sec:", (datetime.datetime.now() - tStart).total_seconds()
+                page_index = page_index + 1
+                tStart = datetime.datetime.now()
+                page = self.es.scroll(scroll_id = sid, scroll = '2m')
+                print "scroll time ,sec:", (datetime.datetime.now() - tStart).total_seconds()
+
+                # Update the scroll ID
+                sid = page['_scroll_id']
+                # Get the number of results that we returned in the last scroll
+                scroll_size = len(page['hits']['hits'])
+                print "scroll size: " + str(scroll_size)
+        except Exception as ex:
+            print ex
 
-            # Update the scroll ID
-            sid = page['_scroll_id']
-            # Get the number of results that we returned in the last scroll
-            scroll_size = len(page['hits']['hits'])
-            print "scroll size: " + str(scroll_size)
-
         return result