Skip to content

Commit

Permalink
Merge pull request johnnykv#21 from threatstream/json-hpfeeds-in-mongo
Browse files Browse the repository at this point in the history
Json hpfeeds in mongo
  • Loading branch information
jatrost committed Jun 19, 2015
2 parents 10da969 + d85d067 commit e15337d
Show file tree
Hide file tree
Showing 16 changed files with 27 additions and 15 deletions.
2 changes: 1 addition & 1 deletion normalizer/modules/amun_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class AmunEvents(BaseNormalizer):
channels = ('amun.events',)

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)
o_data = self.parse_record_data(data)

if ignore_rfc1918 and self.is_RFC1918_addr(o_data['attackerIP']):
return []
Expand Down
8 changes: 8 additions & 0 deletions normalizer/modules/basenormalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import socket
import struct
from urlparse import urlparse
import json


class BaseNormalizer(object):
Expand Down Expand Up @@ -75,3 +76,10 @@ def is_RFC1918_addr(self, ip):

return False

def parse_record_data(self, data):
if type(data) is dict:
return data
elif type(data) is list:
return data
else:
return json.loads(data)
2 changes: 1 addition & 1 deletion normalizer/modules/beeswarm_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class BeeswarmHive(BaseNormalizer):
channels = ('beeswarm.hive',)

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)
o_data = self.parse_record_data(data)

if ignore_rfc1918 and self.is_RFC1918_addr(o_data['attacker_ip']):
return []
Expand Down
2 changes: 1 addition & 1 deletion normalizer/modules/conpot_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Conpot(BaseNormalizer):
channels = ('conpot.events',)

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)
o_data = self.parse_record_data(data)

if ignore_rfc1918 and self.is_RFC1918_addr(o_data['remote'][0]):
return []
Expand Down
3 changes: 1 addition & 2 deletions normalizer/modules/dionaea_capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ class DionaeaCaptures(BaseNormalizer):
channels = ('dionaea.capture', 'dionaea.capture.anon', 'dionaea.captures')

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)

o_data = self.parse_record_data(data)
if ignore_rfc1918 and self.is_RFC1918_addr(o_data['saddr']):
return []

Expand Down
2 changes: 1 addition & 1 deletion normalizer/modules/dionaea_connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def normalize_ip(self, ip):
return ip

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)
o_data = self.parse_record_data(data)

o_data['remote_host'] = self.normalize_ip(o_data['remote_host'])
if ignore_rfc1918 and self.is_RFC1918_addr(o_data['remote_host']):
Expand Down
2 changes: 1 addition & 1 deletion normalizer/modules/elastichoney_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class ElastichoneyEvents(BaseNormalizer):
channels = ('elastichoney.events',)

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)
o_data = self.parse_record_data(data)

if ignore_rfc1918 and self.is_RFC1918_addr(o_data['source']):
return []
Expand Down
2 changes: 1 addition & 1 deletion normalizer/modules/glastopf_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(self):
self.dork_filter = '/headers|favicon.ico|w00tw00t|/robots.txt'

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)
o_data = self.parse_record_data(data)

if ignore_rfc1918 and self.is_RFC1918_addr(o_data['source'][0]):
return []
Expand Down
2 changes: 1 addition & 1 deletion normalizer/modules/kippo_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class KippoEvents(BaseNormalizer):
channels = ('kippo.sessions',)

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)
o_data = self.parse_record_data(data)

if ignore_rfc1918 and self.is_RFC1918_addr(o_data['peerIP']):
return []
Expand Down
2 changes: 1 addition & 1 deletion normalizer/modules/p0f_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_metadata(self, o_data, submission_timestamp):
return metadata

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)
o_data = self.parse_record_data(data)

if ignore_rfc1918 and self.is_RFC1918_addr(o_data['client_ip']):
return []
Expand Down
2 changes: 1 addition & 1 deletion normalizer/modules/shockpot_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class ShockpotEvents(BaseNormalizer):
channels = ('shockpot.events',)

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)
o_data = self.parse_record_data(data)

if ignore_rfc1918 and self.is_RFC1918_addr(o_data['source_ip']):
return []
Expand Down
2 changes: 1 addition & 1 deletion normalizer/modules/snort_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Snort(BaseNormalizer):
channels = ('snort.alerts',)

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)
o_data = self.parse_record_data(data)

if ignore_rfc1918 and self.is_RFC1918_addr(o_data['source_ip']):
return []
Expand Down
2 changes: 1 addition & 1 deletion normalizer/modules/suricata_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class Suricata(BaseNormalizer):
channels = ('suricata.events',)

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)
o_data = self.parse_record_data(data)

if ignore_rfc1918 and self.is_RFC1918_addr(o_data['source_ip']):
return []
Expand Down
2 changes: 1 addition & 1 deletion normalizer/modules/thug_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class ThugFiles(BaseNormalizer):
channels = ('thug.files',)

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
data = json.loads(data)
data = self.parse_record_data(data)
decoded = base64.b64decode(data['data'])
hashes = super(ThugFiles, self).generate_checksum_list(decoded)

Expand Down
2 changes: 1 addition & 1 deletion normalizer/modules/wordpot_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class WordpotEvents(BaseNormalizer):
channels = ('wordpot.events',)

def normalize(self, data, channel, submission_timestamp, ignore_rfc1918=True):
o_data = json.loads(data)
o_data = self.parse_record_data(data)

if ignore_rfc1918 and self.is_RFC1918_addr(o_data['source_ip']):
return []
Expand Down
5 changes: 5 additions & 0 deletions persistance/mnemodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import logging
import string
import time
import json
from datetime import datetime

from pymongo import MongoClient
Expand Down Expand Up @@ -112,6 +113,10 @@ def insert_hpfeed(self, ident, channel, payload):
payload = str(payload).encode('hex')
else:
payload = str(payload)
try:
payload = json.loads(payload)
except ValueError, e:
logger.warning('insert_hpfeed: payload was not JSON, storing as a string (ident=%s, channel=%s)', ident, channel)

timestamp = datetime.utcnow()
entry = {'channel': channel,
Expand Down

0 comments on commit e15337d

Please sign in to comment.