Skip to content

Commit

Permalink
Merge pull request #456 from robcza/csv-types
Browse files Browse the repository at this point in the history
ENH: generic csv parser: type_translation
  • Loading branch information
sebix committed Mar 15, 2016
2 parents 2df58ca + c5b928f commit 04ccc93
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 3 deletions.
30 changes: 27 additions & 3 deletions intelmq/bots/parsers/generic/parser_csv.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
# -*- coding: utf-8 -*-
"""
Generic CSV parser
Parameters:
columns: string
delimiter: string
default_url_protocol: string
type: string
type_translation: string
"""
from __future__ import unicode_literals
import sys
from dateutil.parser import parse
import re
import json

from intelmq.lib import utils
from intelmq.lib.bot import Bot
Expand All @@ -19,6 +31,9 @@ def process(self):
return

columns = self.parameters.columns
type_translation = None
if hasattr(self.parameters, 'type_translation'):
type_translation = json.loads(self.parameters.type_translation)

raw_report = utils.base64_decode(report.get("raw"))
# ignore lines starting with #
Expand Down Expand Up @@ -69,13 +84,22 @@ def process(self):
'[1-9]?\d)){3}))|:)))(%.+)?').match(value).group()
elif key.endswith('.url') and '://' not in value:
value = self.parameters.default_url_protocol + value
elif key in ["classification.type"] and type_translation:
if value in type_translation:
value = type_translation[value]
elif not hasattr(self.parameters, 'type'):
continue

except:
self.logger.exception('Encountered error while parsing'
'line in csv file, ignoring.')
self.logger.warning('Encountered error while parsing line'
' in csv file, ignoring this row: ' +
repr(row))
continue
event.add(key, value)

event.add('classification.type', self.parameters.type)
if hasattr(self.parameters, 'type')\
and not event.contains("classification.type"):
event.add('classification.type', self.parameters.type)
event.add("raw", ",".join(row))

self.send_message(event)
Expand Down
66 changes: 66 additions & 0 deletions intelmq/tests/bots/parsers/generic/test_parser_csv2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import unittest

import intelmq.lib.test as test
from intelmq.bots.parsers.generic.parser_csv import \
GenericCsvParserBot

EXAMPLE_REPORT = {"feed.name": "Sample CSV Feed",
"feed.url": "http://www.samplecsvthreatfeed.com/list",
"raw": "IyBuZXNteXNsIGphayBub2hhCjIwMTUtMTItMTQgMDQ6MTk6MDAJV"
"GVzdGluZwlSZWFsbHkgYmFkIGFjdG9yIHNpdGUgY29tbWVudAlOb3"
"RoaW5nCVVuaW1wb3J0YW50CXd3dy5jZW5ub3dvcmxkLmNvbS9QYXl"
"tZW50X0NvbmZpcm1hdGlvbi9QYXltZW50X0NvbmZpcm1hdGlvbi56"
"aXAJMTk4LjEwNS4yMjEuNTo4MAltYWlsNS5idWxscy51bmlzb25wb"
"GF0Zm9ybS5jb20JanVzdCBhbm90aGVyIGNvbW1lbnQKI2RhbHNpIG"
"5lc215c2w=",
"__type": "Report",
"time.observation": "2015-01-01T00:00:00+00:00",
}
EXAMPLE_EVENT = {"feed.name": "Sample CSV Feed",
"feed.url": "http://www.samplecsvthreatfeed.com/list",
"__type": "Event",
"time.source": "2015-12-14T04:19:00+00:00",
"source.url": "http://www.cennoworld.com/Payment_Confirmation/"
"Payment_Confirmation.zip",
"source.ip": "198.105.221.5",
"source.fqdn": "mail5.bulls.unisonplatform.com",
"event_description.text": "Really bad actor site comment",
"classification.type": "malware",
"raw": "MjAxNS0xMi0xNCAwNDoxOTowMCxUZXN0aW5nLFJlYWxseSBiYWQgYW"
"N0b3Igc2l0ZSBjb21tZW50LE5vdGhpbmcsVW5pbXBvcnRhbnQsd3d3"
"LmNlbm5vd29ybGQuY29tL1BheW1lbnRfQ29uZmlybWF0aW9uL1BheW"
"1lbnRfQ29uZmlybWF0aW9uLnppcCwxOTguMTA1LjIyMS41OjgwLG1h"
"aWw1LmJ1bGxzLnVuaXNvbnBsYXRmb3JtLmNvbSxqdXN0IGFub3RoZX"
"IgY29tbWVudA==",
"time.observation": "2015-01-01T00:00:00+00:00",
}


class TestGenericCsvParserBot(test.BotTestCase, unittest.TestCase):
"""
A TestCase for a GenericCsvParserBot.
"""

@classmethod
def set_bot(cls):
cls.bot_reference = GenericCsvParserBot
cls.default_input_message = EXAMPLE_REPORT
cls.sysconfig = {"columns": ["time.source", "classification.type",
"event_description.text", "__IGNORE__",
"__IGNORE__", "source.url", "source.ip",
"source.fqdn", "__IGNORE__"],
"delimiter": "\t",
"type_translation": "{\"Testing\": \"malware\"}",
"default_url_protocol": "http://"}

def test_event(self):
""" Test if correct Event has been produced. """
self.run_bot()
self.assertMessageEqual(0, EXAMPLE_EVENT)


if __name__ == '__main__':
unittest.main()

0 comments on commit 04ccc93

Please sign in to comment.