Skip to content

Commit

Permalink
Merge pull request #427 from robcza/universal_csv-parser
Browse files Browse the repository at this point in the history
Universal csv parser
  • Loading branch information
sebix committed Feb 3, 2016
2 parents ec0d997 + 36e5549 commit ee1a3f6
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 0 deletions.
12 changes: 12 additions & 0 deletions intelmq/bots/BOTS
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,18 @@
"module": "intelmq.bots.parsers.fraunhofer.parser_dga",
"parameters": {}
},
"Generic CSV": {
"description": "Generic CSV Parser is a generic bot configurable to parse different csv collected files. Ignoring lines starting with character #",
"module": "intelmq.bots.parsers.generic.parser_csv",
"parameters": {
"columns": [
"",
"source.fqdn"
],
"delimiter": ",",
"type": "c&c"
}
},
"HpHosts": {
"description": "HpHosts Parser is the bot responsible to parse the report and sanitize the information.",
"module": "intelmq.bots.parsers.hphosts.parser",
Expand Down
Empty file.
90 changes: 90 additions & 0 deletions intelmq/bots/parsers/generic/parser_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import sys
from dateutil.parser import parse
from io import StringIO
import re

if sys.version_info[0] == 2:
import unicodecsv as csv
else:
import csv

from intelmq.lib import utils
from intelmq.lib.bot import Bot
from intelmq.lib.message import Event


class GenericCsvParserBot(Bot):

def process(self):
report = self.receive_message()

if not report or not report.contains("raw"):
self.acknowledge_message()
return

columns = self.parameters.columns

raw_report = utils.base64_decode(report.value("raw"))
# ignore lines starting with #
raw_report = re.sub(r'(?m)^#.*\n?', '', raw_report)
# ignore null bytes
raw_report = re.sub(r'(?m)\0', '', raw_report)
for row in csv.reader(StringIO(raw_report),
delimiter=str(self.parameters.delimiter)):
event = Event(report)

for key, value in zip(columns, row):

if key in ["__IGNORE__", ""]:
continue
try:
if key in ["time.source", "time.destination"]:
value = parse(value, fuzzy=True).isoformat()
value += " UTC"
# regex from http://stackoverflow.com/a/23483979
# matching ipv4/ipv6 IP within string
if key in ["source.ip", "destination.ip"]:
value = re.compile(
'(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])'
'\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0'
'-5])|(([a-zA-Z]|[a-zA-Z][a-zA-Z0-9\-]*[a-zA-Z0-9])'
'\.)*([A-Za-z]|[A-Za-z][A-Za-z0-9\-]*[A-Za-z0-9])|'
'\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|('
'([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]'
'|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d'
'\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:['
'0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|['
'1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|'
':))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1'
',3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d'
'\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){'
'3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,'
'4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-'
'4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-'
'9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-'
'Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0'
'-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1'
'\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(('
'(:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4'
'}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2['
'0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]'
'{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2'
'[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|'
'[1-9]?\d)){3}))|:)))(%.+)?').match(value).group()

except:
continue
event.add(key, value, sanitize=True)

event.add('classification.type', self.parameters.type)
event.add("raw", ",".join(row), sanitize=True)

self.send_message(event)
self.acknowledge_message()


if __name__ == "__main__":
bot = GenericCsvParserBot(sys.argv[1])
bot.start()
Empty file.
65 changes: 65 additions & 0 deletions intelmq/tests/bots/parsers/generic/test_parser_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import unittest

import intelmq.lib.test as test
from intelmq.bots.parsers.generic.parser_csv import \
GenericCsvParserBot

EXAMPLE_REPORT = {"feed.name": "Sample CSV Feed",
"feed.url": "http://www.samplecsvthreatfeed.com/list",
"raw": "IyBuZXNteXNsIGphayBub2hhCjIwMTUtMTItMTQgMDQ6MTk6MDAJV"
"GVzdGluZwlSZWFsbHkgYmFkIGFjdG9yIHNpdGUgY29tbWVudAlOb3"
"RoaW5nCVVuaW1wb3J0YW50CXd3dy5jZW5ub3dvcmxkLmNvbS9QYXl"
"tZW50X0NvbmZpcm1hdGlvbi9QYXltZW50X0NvbmZpcm1hdGlvbi56"
"aXAJMTk4LjEwNS4yMjEuNTo4MAltYWlsNS5idWxscy51bmlzb25wb"
"GF0Zm9ybS5jb20JanVzdCBhbm90aGVyIGNvbW1lbnQKI2RhbHNpIG"
"5lc215c2w=",
"__type": "Report",
"time.observation": "2015-01-01T00:00:00+00:00",
}
EXAMPLE_EVENT = {"feed.name": "Sample CSV Feed",
"feed.url": "http://www.samplecsvthreatfeed.com/list",
"__type": "Event",
"time.source": "2015-12-14T04:19:00+00:00",
"source.url": "http://www.cennoworld.com/Payment_Confirmation/"
"Payment_Confirmation.zip",
"source.ip": "198.105.221.5",
"source.fqdn": "mail5.bulls.unisonplatform.com",
"event_description.text": "Really bad actor site comment",
"classification.type": "malware",
"raw": "MjAxNS0xMi0xNCAwNDoxOTowMCxUZXN0aW5nLFJlYWxseSBiYWQgYW"
"N0b3Igc2l0ZSBjb21tZW50LE5vdGhpbmcsVW5pbXBvcnRhbnQsd3d3"
"LmNlbm5vd29ybGQuY29tL1BheW1lbnRfQ29uZmlybWF0aW9uL1BheW"
"1lbnRfQ29uZmlybWF0aW9uLnppcCwxOTguMTA1LjIyMS41OjgwLG1h"
"aWw1LmJ1bGxzLnVuaXNvbnBsYXRmb3JtLmNvbSxqdXN0IGFub3RoZX"
"IgY29tbWVudA==",
"time.observation": "2015-01-01T00:00:00+00:00",
}


class TestGenericCsvParserBot(test.BotTestCase, unittest.TestCase):
"""
A TestCase for a GenericCsvParserBot.
"""

@classmethod
def set_bot(cls):
cls.bot_reference = GenericCsvParserBot
cls.default_input_message = EXAMPLE_REPORT
cls.sysconfig = {"columns": ["time.source", "__IGNORE__",
"event_description.text", "__IGNORE__",
"__IGNORE__", "source.url", "source.ip",
"source.fqdn", "__IGNORE__"],
"delimiter": "\t",
"type": "malware"}

def test_event(self):
""" Test if correct Event has been produced. """
self.run_bot()
self.assertMessageEqual(0, EXAMPLE_EVENT)


if __name__ == '__main__':
unittest.main()

0 comments on commit ee1a3f6

Please sign in to comment.