From b0447aea601cc066cbbc3dc3a11e2bea609f3bdb Mon Sep 17 00:00:00 2001 From: National CyberSecurity Center Date: Thu, 18 Jun 2015 01:08:03 +0100 Subject: [PATCH] v1.0 is now available Former-commit-id: 9970d67d8fa862b323ce0e35d45bc71175d9aee6 --- CHANGELOG | 9 - CHANGELOG.md | 62 ++++ COPYRIGHT | 11 +- README.md | 114 ++++-- REQUIREMENTS | 13 + TODO.md | 128 ------- docs/README.md | 79 ---- docs/User-Guide.md | 5 +- intelmq/bin/intelmqctl | 64 +++- intelmq/bots/BOTS | 238 +++++------- intelmq/bots/collectors/hpfeeds/collector.py | 48 --- .../{ => bots/collectors/http}/__init__.py | 0 .../bots/collectors/http/collector_http.py | 27 ++ .../collectors/http/collector_http_stream.py | 25 ++ intelmq/bots/collectors/{url => http}/lib.py | 143 ++++--- ...ail-attach.py => collector_mail_attach.py} | 9 +- .../{mail-url.py => collector_mail_url.py} | 8 +- .../bots/collectors/microsoft_dcu/README.md | 25 -- .../collectors/microsoft_dcu/collector.py | 72 ---- intelmq/bots/collectors/url/collector.py | 15 - intelmq/bots/collectors/xmpp/collector.py | 1 - .../experts/abusix/{abusix.py => expert.py} | 57 +-- intelmq/bots/experts/abusix/lib.py | 20 +- .../{asnlookup => asn_lookup}/README.md | 0 .../asn_lookup}/__init__.py | 0 .../asnlookup.py => asn_lookup/expert.py} | 81 ++-- intelmq/bots/experts/contactdb/contactdb.py | 52 --- .../bots/experts/countrycodefilter/README.md | 5 - .../countrycodefilter/countrycodefilter.py | 33 -- intelmq/bots/experts/cymru/cymru.py | 92 ----- .../cymru_whois}/__init__.py | 0 intelmq/bots/experts/cymru_whois/expert.py | 83 +++++ .../experts/{cymru => cymru_whois}/lib.py | 26 +- intelmq/bots/experts/deduplicator/README.md | 6 +- intelmq/bots/experts/deduplicator/expert.py | 31 ++ intelmq/bots/experts/filter/README.md | 8 + .../url => experts/filter}/__init__.py | 0 intelmq/bots/experts/filter/expert.py | 42 +++ intelmq/bots/experts/geoip/__init__.py | 0 intelmq/bots/experts/geoip/geoip.py | 53 --- .../{geoip => maxmind_geoip}/README.md | 6 +- .../maxmind_geoip}/__init__.py | 0 intelmq/bots/experts/maxmind_geoip/expert.py | 50 +++ .../experts/ripencc/{ripencc.py => expert.py} | 63 ++-- intelmq/bots/experts/ripencc/lib.py | 7 +- intelmq/bots/experts/sanitizer/__init__.py | 0 intelmq/bots/experts/sanitizer/sanitizer.py | 91 ----- .../taxonomy/{taxonomy.py => expert.py} | 89 ++--- intelmq/bots/outputs/debug/__init__.py | 0 intelmq/bots/outputs/debug/debug.py | 17 - .../bots/outputs/file/{file.py => output.py} | 5 +- intelmq/bots/outputs/intelmailer/__init__.py | 0 .../bots/outputs/intelmailer/intelmailer.py | 24 -- intelmq/bots/outputs/logcollector/__init__.py | 0 .../outputs/mongodb/{mongodb.py => output.py} | 5 +- .../postgresql/{postgresql.py => output.py} | 16 +- .../asnlookup => outputs/tcp}/__init__.py | 0 .../logcollector.py => tcp/output.py} | 18 +- .../parsers/abusehelper/DO_NOT_USE_THIS_CODE | 0 intelmq/bots/parsers/abusehelper/__init__.py | 1 - .../bots/parsers/abusehelper/abusehelper.py | 73 ---- .../alienvault}/__init__.py | 0 intelmq/bots/parsers/alienvault/parser.py | 79 ++++ intelmq/bots/parsers/arbor/parser.py | 80 ++-- .../bots/parsers/bruteforceblocker/parser.py | 57 +-- intelmq/bots/parsers/certeu/__init__.py | 0 .../parsers/certeu/malicious-urls-parser.py | 45 --- .../parsers/dragonresearchgroup/parser-ssh.py | 45 --- .../parsers/dragonresearchgroup/parser-vnc.py | 45 --- .../parsers/dragonresearchgroup/parser_ssh.py | 51 +++ .../parsers/dragonresearchgroup/parser_vnc.py | 49 +++ intelmq/bots/parsers/dshield/parser.py | 49 --- intelmq/bots/parsers/dshield/parser_asn.py | 61 +++ intelmq/bots/parsers/generic/parser.py | 45 --- intelmq/bots/parsers/hpfeeds/parser.py | 33 -- .../hphosts}/__init__.py | 0 intelmq/bots/parsers/hphosts/parser.py | 53 +++ .../bots/parsers/malwaredomainlist/parser.py | 61 +-- .../malwarepatrol/parser-dansguardian.py | 36 -- .../malwarepatrol/parser_dansguardian.py | 42 +++ .../bots/parsers/microsoft_dcu/__init__.py | 0 intelmq/bots/parsers/microsoft_dcu/lib.py | 349 ------------------ intelmq/bots/parsers/microsoft_dcu/parser.py | 41 -- intelmq/bots/parsers/openbl/parser.py | 53 +-- intelmq/bots/parsers/phishtank/parser.py | 66 ++-- intelmq/bots/parsers/shadowserver/__init__.py | 0 .../parsers/shadowserver/chargen-parser.py | 66 ---- .../bots/parsers/shadowserver/drone-parser.py | 81 ---- .../shadowserver/microsoft-sinkhole.py | 76 ---- .../bots/parsers/shadowserver/qotd-parser.py | 66 ---- .../bots/parsers/shadowserver/snmp-parser.py | 67 ---- .../__init__.py | 0 .../parsers/taichung_city_netflow/parser.py | 70 ++++ .../parsers/taichungcitynetflow/__init__.py | 1 - .../parsers/taichungcitynetflow/parser.py | 57 --- intelmq/bots/parsers/torexitnode | 42 --- intelmq/bots/parsers/vxvault/parser.py | 98 ++--- intelmq/bots/utils.py | 176 --------- intelmq/conf/defaults.conf | 17 + intelmq/conf/harmonization.conf | 130 +++++++ intelmq/conf/pipeline.conf | 80 +--- intelmq/conf/runtime.conf | 68 +--- intelmq/conf/startup.conf | 78 +--- intelmq/conf/system.conf | 12 +- intelmq/lib/__init__.py | 0 intelmq/lib/cache.py | 33 -- intelmq/lib/message.py | 99 ----- intelmq/lib/pipeline.py | 66 ---- intelmq/lib/utils.py | 62 ---- intelmq/tests/__init__.py | 0 intelmq/tests/bots/__init__.py | 0 intelmq/tests/bots/parsers/__init__.py | 0 intelmq/tests/bots/parsers/dcu/__init__.py | 0 intelmq/tests/bots/parsers/dcu/lib.py | 83 ----- scripts/prettyprint.sh | 4 + scripts/prettyprint.txt | 1 + scripts/vagrant/README.md | 47 +++ scripts/vagrant/Vagrantfile | 72 ++++ scripts/vagrant/bootstrap.sh | 49 +++ tests/README.md | 3 - .../error_generator}/deduplicator.py | 35 +- {intelmq/lib => tests/log-procedure}/bot.py | 123 +++--- tests/message-factory/code.py | 34 ++ tests/{ => old}/pipeline-rabbitmq.py | 0 tests/{ => old}/pipeline-redis.py | 0 tests/{ => old}/pipeline.py | 0 tests/{ => old}/redis.conf | 0 tests/scripts/cleanup.sh | 4 + tests/split-pipeline/bot.py | 208 +++++++++++ tests/threads-test-poc/base.py | 76 ++++ tests/threads-test-poc/base2.py | 154 ++++++++ tests/threads-test-poc/notes.txt | 13 + .../{threading-tests => threads-test}/bot.py | 0 .../conf/BOTS | 0 .../conf/pipeline.conf | 0 .../conf/runtime.conf | 0 .../conf/startup.conf | 0 .../conf/system.conf | 0 .../pipeline.py | 0 tests/translation_problems/harmonization.conf | 122 ++++++ tests/translation_problems/harmonization.py | 330 +++++++++++++++++ .../malwaredomainlist}/__init__.py | 0 .../malwaredomainlist/parser.py | 54 +++ .../phishtank}/__init__.py | 0 .../translation_problems/phishtank/parser.py | 51 +++ 145 files changed, 2991 insertions(+), 3407 deletions(-) delete mode 100644 CHANGELOG create mode 100644 CHANGELOG.md create mode 100644 REQUIREMENTS delete mode 100644 TODO.md delete mode 100644 docs/README.md delete mode 100644 intelmq/bots/collectors/hpfeeds/collector.py rename intelmq/{ => bots/collectors/http}/__init__.py (100%) create mode 100644 intelmq/bots/collectors/http/collector_http.py create mode 100644 intelmq/bots/collectors/http/collector_http_stream.py rename intelmq/bots/collectors/{url => http}/lib.py (92%) rename intelmq/bots/collectors/mail/{mail-attach.py => collector_mail_attach.py} (79%) rename intelmq/bots/collectors/mail/{mail-url.py => collector_mail_url.py} (80%) delete mode 100644 intelmq/bots/collectors/microsoft_dcu/README.md delete mode 100644 intelmq/bots/collectors/microsoft_dcu/collector.py delete mode 100644 intelmq/bots/collectors/url/collector.py delete mode 100644 intelmq/bots/collectors/xmpp/collector.py rename intelmq/bots/experts/abusix/{abusix.py => expert.py} (64%) rename intelmq/bots/experts/{asnlookup => asn_lookup}/README.md (100%) rename intelmq/bots/{collectors/hpfeeds => experts/asn_lookup}/__init__.py (100%) rename intelmq/bots/experts/{asnlookup/asnlookup.py => asn_lookup/expert.py} (66%) delete mode 100644 intelmq/bots/experts/contactdb/contactdb.py delete mode 100644 intelmq/bots/experts/countrycodefilter/README.md delete mode 100644 intelmq/bots/experts/countrycodefilter/countrycodefilter.py delete mode 100644 intelmq/bots/experts/cymru/cymru.py rename intelmq/bots/{collectors/microsoft_dcu => experts/cymru_whois}/__init__.py (100%) create mode 100644 intelmq/bots/experts/cymru_whois/expert.py rename intelmq/bots/experts/{cymru => cymru_whois}/lib.py (85%) create mode 100644 intelmq/bots/experts/deduplicator/expert.py create mode 100644 intelmq/bots/experts/filter/README.md rename intelmq/bots/{collectors/url => experts/filter}/__init__.py (100%) create mode 100644 intelmq/bots/experts/filter/expert.py delete mode 100644 intelmq/bots/experts/geoip/__init__.py delete mode 100644 intelmq/bots/experts/geoip/geoip.py rename intelmq/bots/experts/{geoip => maxmind_geoip}/README.md (56%) rename intelmq/bots/{collectors/xmpp => experts/maxmind_geoip}/__init__.py (100%) create mode 100644 intelmq/bots/experts/maxmind_geoip/expert.py rename intelmq/bots/experts/ripencc/{ripencc.py => expert.py} (72%) delete mode 100644 intelmq/bots/experts/sanitizer/__init__.py delete mode 100644 intelmq/bots/experts/sanitizer/sanitizer.py rename intelmq/bots/experts/taxonomy/{taxonomy.py => expert.py} (82%) delete mode 100644 intelmq/bots/outputs/debug/__init__.py delete mode 100644 intelmq/bots/outputs/debug/debug.py rename intelmq/bots/outputs/file/{file.py => output.py} (82%) delete mode 100644 intelmq/bots/outputs/intelmailer/__init__.py delete mode 100644 intelmq/bots/outputs/intelmailer/intelmailer.py delete mode 100644 intelmq/bots/outputs/logcollector/__init__.py rename intelmq/bots/outputs/mongodb/{mongodb.py => output.py} (85%) rename intelmq/bots/outputs/postgresql/{postgresql.py => output.py} (70%) rename intelmq/bots/{experts/asnlookup => outputs/tcp}/__init__.py (100%) rename intelmq/bots/outputs/{logcollector/logcollector.py => tcp/output.py} (77%) delete mode 100644 intelmq/bots/parsers/abusehelper/DO_NOT_USE_THIS_CODE delete mode 100644 intelmq/bots/parsers/abusehelper/__init__.py delete mode 100644 intelmq/bots/parsers/abusehelper/abusehelper.py rename intelmq/bots/{experts/contactdb => parsers/alienvault}/__init__.py (100%) create mode 100644 intelmq/bots/parsers/alienvault/parser.py delete mode 100644 intelmq/bots/parsers/certeu/__init__.py delete mode 100644 intelmq/bots/parsers/certeu/malicious-urls-parser.py delete mode 100644 intelmq/bots/parsers/dragonresearchgroup/parser-ssh.py delete mode 100644 intelmq/bots/parsers/dragonresearchgroup/parser-vnc.py create mode 100644 intelmq/bots/parsers/dragonresearchgroup/parser_ssh.py create mode 100644 intelmq/bots/parsers/dragonresearchgroup/parser_vnc.py delete mode 100644 intelmq/bots/parsers/dshield/parser.py create mode 100644 intelmq/bots/parsers/dshield/parser_asn.py delete mode 100644 intelmq/bots/parsers/generic/parser.py delete mode 100644 intelmq/bots/parsers/hpfeeds/parser.py rename intelmq/bots/{experts/countrycodefilter => parsers/hphosts}/__init__.py (100%) create mode 100644 intelmq/bots/parsers/hphosts/parser.py delete mode 100644 intelmq/bots/parsers/malwarepatrol/parser-dansguardian.py create mode 100644 intelmq/bots/parsers/malwarepatrol/parser_dansguardian.py delete mode 100644 intelmq/bots/parsers/microsoft_dcu/__init__.py delete mode 100644 intelmq/bots/parsers/microsoft_dcu/lib.py delete mode 100644 intelmq/bots/parsers/microsoft_dcu/parser.py delete mode 100755 intelmq/bots/parsers/shadowserver/__init__.py delete mode 100755 intelmq/bots/parsers/shadowserver/chargen-parser.py delete mode 100755 intelmq/bots/parsers/shadowserver/drone-parser.py delete mode 100755 intelmq/bots/parsers/shadowserver/microsoft-sinkhole.py delete mode 100755 intelmq/bots/parsers/shadowserver/qotd-parser.py delete mode 100644 intelmq/bots/parsers/shadowserver/snmp-parser.py rename intelmq/bots/parsers/{generic => taichung_city_netflow}/__init__.py (100%) create mode 100644 intelmq/bots/parsers/taichung_city_netflow/parser.py delete mode 100644 intelmq/bots/parsers/taichungcitynetflow/__init__.py delete mode 100644 intelmq/bots/parsers/taichungcitynetflow/parser.py delete mode 100644 intelmq/bots/parsers/torexitnode delete mode 100644 intelmq/bots/utils.py create mode 100644 intelmq/conf/defaults.conf create mode 100644 intelmq/conf/harmonization.conf delete mode 100644 intelmq/lib/__init__.py delete mode 100644 intelmq/lib/cache.py delete mode 100644 intelmq/lib/message.py delete mode 100644 intelmq/lib/pipeline.py delete mode 100644 intelmq/lib/utils.py delete mode 100644 intelmq/tests/__init__.py delete mode 100644 intelmq/tests/bots/__init__.py delete mode 100644 intelmq/tests/bots/parsers/__init__.py delete mode 100644 intelmq/tests/bots/parsers/dcu/__init__.py delete mode 100644 intelmq/tests/bots/parsers/dcu/lib.py create mode 100644 scripts/prettyprint.sh create mode 100644 scripts/prettyprint.txt create mode 100644 scripts/vagrant/README.md create mode 100644 scripts/vagrant/Vagrantfile create mode 100755 scripts/vagrant/bootstrap.sh delete mode 100644 tests/README.md rename {intelmq/bots/experts/deduplicator => tests/error_generator}/deduplicator.py (54%) rename {intelmq/lib => tests/log-procedure}/bot.py (70%) create mode 100644 tests/message-factory/code.py rename tests/{ => old}/pipeline-rabbitmq.py (100%) rename tests/{ => old}/pipeline-redis.py (100%) rename tests/{ => old}/pipeline.py (100%) rename tests/{ => old}/redis.conf (100%) create mode 100644 tests/scripts/cleanup.sh create mode 100644 tests/split-pipeline/bot.py create mode 100644 tests/threads-test-poc/base.py create mode 100644 tests/threads-test-poc/base2.py create mode 100644 tests/threads-test-poc/notes.txt rename tests/{threading-tests => threads-test}/bot.py (100%) rename tests/{threading-tests => threads-test}/conf/BOTS (100%) rename tests/{threading-tests => threads-test}/conf/pipeline.conf (100%) rename tests/{threading-tests => threads-test}/conf/runtime.conf (100%) rename tests/{threading-tests => threads-test}/conf/startup.conf (100%) rename tests/{threading-tests => threads-test}/conf/system.conf (100%) rename tests/{threading-tests => threads-test}/pipeline.py (100%) create mode 100644 tests/translation_problems/harmonization.conf create mode 100644 tests/translation_problems/harmonization.py rename {intelmq/bots/parsers/hpfeeds => tests/translation_problems/malwaredomainlist}/__init__.py (100%) create mode 100644 tests/translation_problems/malwaredomainlist/parser.py rename {intelmq/bots/experts/cymru => tests/translation_problems/phishtank}/__init__.py (100%) create mode 100644 tests/translation_problems/phishtank/parser.py diff --git a/CHANGELOG b/CHANGELOG deleted file mode 100644 index 3e471b3dc..000000000 --- a/CHANGELOG +++ /dev/null @@ -1,9 +0,0 @@ - - -# 2015/06/02 (aaron) -Please simply add to the top of this file and do not forget to add a date - - -# 2015/06/02 (aaron) -We need a changelog file. This makes things really easier for others to read -what happened. diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..66cd3c05d --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,62 @@ +CHANGELOG +========== + +## 2015/06/03 (aaron) + + * fixed the license to AGPL in setup.py + * moved back the docs/* files from the wiki repo to docs/. See #205. + * added python-zmq as a setup requirment in UserGuide . See #206 + + + + +## When did this happen? (XXX FIXME) + +* improvements in pipeline + FILE: lib/pipeline.py + + - PipelineFactory to give possibility to easily add a new broker (Redis, ZMQ, etc..) + - Splitter feature: if this option is enable, will split the events in source queue to multiple destination queues + + + +* add different messages support + FILE: lib/message.py + + - the system is flexible to define a new type of message like 'tweet' without change anything in bot.py, pipeline.py. Just need to add a new class in message.py and harmonization.conf + + + +* add harmonization support + FILE: lib/harmonization.py + FILE: conf/harmonization.conf + + - in harmonization.conf is possible to define the fields of a specific message in json format. + - the harmonization.py has datatypes witch contains sanitize and validation methods that will make sure that the values are correct to be part of an event. + + + +* Error Handling + - multiple parameters in configuration which gives possibility to define how bot will handle some errors. Example of parameters: + "error_procedure" - retry or pass in case of error + "error_retry_delay" - time in seconds to retry + "error_max_retries" - number of retries + "error_log_message" - log or not the message in error log + "error_log_exception" - log or not the exception in error log + "error_dump_message" - log or not the message in dump log to be fixed and re-insert in pipeline + + + +* Exceptions + FILE: lib/exceptions.py + + - custom exceptions for IntelMQ + + + +* Defaults configrations + - new configuration file to specify the default parameters which will be apllied to all bots. Bots can overwrite the configurations. + + + +* New bots/feeds diff --git a/COPYRIGHT b/COPYRIGHT index 44671ba5d..4a56f0b23 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -9,6 +9,15 @@ Copyright by: Dalila Lima - Fyodor Y - Hélder Fernandes RCTS CERT - + Krystian Kochanowski - + Tiago Pedrosa < - > - + Josef Bernhart < - > - + ufoczek < - > - + robcza < - > - + Th4nat0s < - > - + Andre Pinheiro Dognaedis + Bruno Teixeira Dognaedis + Leandro Bragues Dognaedis + Ricardo Ferreira Dognaedis This code is licensed under the GNU AFFERO GENERAL PUBLIC LICENSE version 3. diff --git a/README.md b/README.md index 7cb3353a0..89859f231 100644 --- a/README.md +++ b/README.md @@ -1,35 +1,79 @@ -![IntelMQ](http://s28.postimg.org/r2av18a3x/Logo_Intel_MQ.png) - -**IntelMQ** is a solution for CERTs for collecting and processing security -feeds, pastebins, tweets using a message queue protocol. -It's a community driven initiative called **IHAP** (Incident Handling -Automation Project) which was conceptually designed -by European CERTs during several InfoSec events. Its main goal is to -give to incident responders an easy way to collect & process threat -intelligence thus improving the incident handling processes of CERTs. - -IntelMQ's design was influenced by -[AbuseHelper](https://bitbucket.org/clarifiednetworks/abusehelper), -however it was re-written from scratch and aims at: - -* Reduce the complexity of system administration -* Reduce the complexity of writing new bots for new data feeds -* Reduce the probability of events lost in all process with persistence functionality (even system crash) -* Use and improve the existing Data Harmonization Ontology -* Use JSON format for all messages -* Integration of the existing tools (AbuseHelper, CIF) -* Provide easy way to store data into Log Collectors like ElasticSearch, Splunk -* Provide easy way to create your own black-lists -* Provide easy communication with other systems via HTTP RESTFUL API - -It follows the following basic meta-guidelines: - -* Don't break simplicity - KISS -* Keep it open source - forever -* Strive for perfection while keeping a deadline - * Reduce complexity/avoid feature bloat - * Embrace unit testing - * Code readability: test with unexperienced programmers -* Communicate clearly - -Visit [Wiki page](https://github.com/certtools/intelmq/wiki/). +![IntelMQ](http://s28.postimg.org/r2av18a3x/Logo_Intel_MQ.png) + +**IntelMQ** is a solution for CERTs for collecting and processing security +feeds, pastebins, tweets using a message queue protocol. +It's a community driven initiative called **IHAP** (Incident Handling +Automation Project) which was conceptually designed +by European CERTs during several InfoSec events. Its main goal is to +give to incident responders an easy way to collect & process threat +intelligence thus improving the incident handling processes of CERTs. + +IntelMQ's design was influenced by +[AbuseHelper](https://bitbucket.org/clarifiednetworks/abusehelper), +however it was re-written from scratch and aims at: + +* Reduce the complexity of system administration +* Reduce the complexity of writing new bots for new data feeds +* Reduce the probability of events lost in all process with persistence functionality (even system crash) +* Use and improve the existing Data Harmonization Ontology +* Use JSON format for all messages +* Integration of the existing tools (AbuseHelper, CIF) +* Provide easy way to store data into Log Collectors like ElasticSearch, Splunk +* Provide easy way to create your own black-lists +* Provide easy communication with other systems via HTTP RESTFUL API + +It follows the following basic meta-guidelines: + +* Don't break simplicity - KISS +* Keep it open source - forever +* Strive for perfection while keeping a deadline + * Reduce complexity/avoid feature bloat + * Embrace unit testing + * Code readability: test with unexperienced programmers +* Communicate clearly + + +## Table of Contents + +1. [How to Install](#how-to-install) +2. [Developers Guide](#dev-guide) +3. [IntelMQ Manager](#control-platform) +4. [Incident Handling Automation Project](#incident-handling-automation-project) +5. [Data Harmonization](#data-harmonization) +6. [Licence](#licence) + + + +## How to Install + +See [UserGuide](docs/User-Guide.md). + + + +## Developers Guide + +See [Developers Guide](docs/Developers-Guide.md). + + +## IntelMQ Manager + +Check the [tool](https://github.com/certtools/intelmq-manager) and manage easily IntelMQ system. + + + +## Incident Handling Automation Project + +* **URL:** http://www.enisa.europa.eu/activities/cert/support/incident-handling-automation +* **Mailing-list:** ihap@lists.trusted-introducer.org + + + +## Data Harmonization + +IntelMQ use the Data Harmonization. Check the following +[document](docs/Data-Harmonization.md). + + +## Licence + +This software is licensed under GNU Affero General Public License version 3 diff --git a/REQUIREMENTS b/REQUIREMENTS new file mode 100644 index 000000000..51be68f4e --- /dev/null +++ b/REQUIREMENTS @@ -0,0 +1,13 @@ +python-dateutil==1.5 +geoip2==0.5.1 +dnspython==1.11.1 +redis==2.10.3 +pymongo==2.7.1 +xmpppy==0.5.0rc1 +imbox==0.5.5 +unicodecsv==0.9.4 +pytz==2012d +psutil==2.1.1 +pyzmq==14.6.0 +pydns==2.3.6 +pycurl==7.19.0 diff --git a/TODO.md b/TODO.md deleted file mode 100644 index a12e2d035..000000000 --- a/TODO.md +++ /dev/null @@ -1,128 +0,0 @@ -## Release 1 - TODO - -(sorted by priority) - -* https://github.com/nicolasff/phpredis - -* Create n6 bot. - -* Create [Shadowserver Bots](http://www.shadowserver.org/wiki/pmwiki.php/Services/Downloads) - -* Create [Malware Hash Registry - Cymru](http://www.team-cymru.org/Services/MHR/#dns) - -* Write docs/eCSIRT-Taxonomy.md based on document from Don Stikvoort, named "Incident Class mkVint" - -* Check [RabbitMQ based fork of CIF v1](https://github.com/cikl), [Warden](https://csirt.cesnet.cz/Warden/Intro) and [Build STIX document from CIF output](http://tools.netsa.cert.org/script-cif2stix/index.html) - -* **[DONE]** General Bots Configuration (bots access the parameters via self.parameters.parameter_name, name can be easily changed) - -* **[DONE]** AbuseHelper Integration (xmpp bot to connect to room) - -* **[DONE]** Splunk Output Bot - -* **[DONE]** TeamCymru Expert - -* **[DONE]** Support multiple destination queues for each bot - -* **[DONE]** Write Bot Architecture (event, cache, utils relations etc...) - -* **[DONE]** Remove Cache initiallization from bot.py. Create a 'init' method to all class that ineherit from bot.py. - -* **[DONE]** Remove all self.parameters from bot.py - -* **[DONE]** Create configuration option for logs folder (change in bot.py) - -* **[DONE]** Add in each expert a line to test if the augment keys already exists - -* **[DONE]** Remove "observation time" for event in deduplicator bot - -* **[DONE]** Quality Control: perfomance tests - -* **[DONE]** Create a python package and use this setup.py [example](https://github.com/pika/pika/blob/master/setup.py) - -## Release 2 - TODO - -* Create RabbitMQ queue for bot management and state sharing - -* Create bots for all feeds that are not available in AbuseHelper (INTECO, CERT-EU, etc) - -* New bots: https://github.com/collectiveintel/cif-v1/tree/686c9ac9c34658ccc83d5b9fea97972eeaad0f29/cif-smrt/rules/etc - -* Improve encoding/decoding - -* Add 'requirements.txt' with fixed version numbers for each package -> pip install -r requirements.txt - -* Remove old queues depending of load configuration - -* Restruct repository and may be create python packages: -``` -/src - /intellib -> /usr/local/lib/python..... - /intelmq -> /opt/ - /bots - /confs -/docs -..files... -``` - -* ElasticSearch Output Bot - -* Evaluate how to initiallize bots from command-line: /etc/init.d/arbor-feed start ??? Or just with webinterface? - -* ContactDB Expert (Install ContactDB) - -* Trash Queue when bot do not recognize the message and cant do nothing - -* Create a management interface to give feed access to other people. Good example: HPFrieds/HPFeeds system. - -* Evaluate: Python 3 vs Python 2.7 - -* 1-N Queues: support other exchange types (current solution support fanout) - -* Pipeline Management with Web Interface - -* Bots Management with Web Interface - -* Video Tutorial - -* Monitoring with Web Interface - -* JSON Messages - * with tag field (report, abuse-event, pastebin, tweet) - * with syntax abusehelper event-like ( event.add("domain", "example.com") ) - * __hash__ method - * evaluate: http://danielmiessler.com/study/url_vs_uri/ - -* PostgreSQL (Reports, Events) - -* Bots to create: - * MalwareHash - * RT - * SSHKeyScan - * URL2Domain - * isOutCustomer - * CrowdStrike - * DomainTools - * VirusTotal - * Shodan - * PassiveDNS - * [HostFiles](https://bitbucket.org/slingris/abusehelper/src/d5a32b813593/abusehelper/contrib/hostfiles/?at=default) - * [MalwarePatrol](https://bitbucket.org/slingris/abusehelper/src/d5a32b813593/abusehelper/contrib/malwarepatrol/?at=default) - * [n6](https://bitbucket.org/slingris/abusehelper/src/d5a32b813593/abusehelper/contrib/n6/?at=default) - * [OpenBL](https://bitbucket.org/slingris/abusehelper/src/d5a32b813593/abusehelper/contrib/openbl/?at=default) - * [SQLBot](https://bitbucket.org/slingris/abusehelper/src/d5a32b813593/abusehelper/contrib/sqlbot/?at=default) - * [XSSed](https://bitbucket.org/slingris/abusehelper/src/d5a32b813593/abusehelper/contrib/xssed/?at=default) - * **[DONE]** [VXVault](https://bitbucket.org/slingris/abusehelper/src/d5a32b813593/abusehelper/contrib/vxvault/?at=default) - -## Feedback - -### Chris Horsley Feedback - -| Requirement | Reason | Possible Solutions | -|---|-----------------------------------------|------------------------------------------------------------|---|---| -| Can use lightweight threads | Minimal overhead for memory | http://gevent.org/ , https://pypi.python.org/pypi/greenlet | -| Can run distributed over network | Redundancy of system, may want to process event data on separate machine do data storage one day | http://python-rq.org/ , http://www.celeryproject.org/ , https://github.com/pika/pika | -| Can support a sequential processing pipeline | Need an API to schedule and execute remote data processing functions in order | http://www.celeryproject.org/ , Custom API | -| Can process events in parallel | Avoids slow, serial processing of a long series of events, parallel processing gives large speed benefits where there are network / database calls | http://python-rq.org/ http://www.celeryproject.org/ | -| Don't reinvent message serialization / scheduling | Remote function execution is a hard problem to solve (e.g. locking, scheduling), use a library that's well tested and supported | http://python-rq.org/ http://www.celeryproject.org/ | - diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 38e36d484..000000000 --- a/docs/README.md +++ /dev/null @@ -1,79 +0,0 @@ -![IntelMQ](http://s28.postimg.org/r2av18a3x/Logo_Intel_MQ.png) - -**IntelMQ** is a solution for CERTs for collecting and processing security -feeds, pastebins, tweets using a message queue protocol. -It's a community driven initiative called **IHAP** (Incident Handling -Automation Project) which was conceptually designed -by European CERTs during several InfoSec events. Its main goal is to -give to incident responders an easy way to collect & process threat -intelligence thus improving the incident handling processes of CERTs. - -IntelMQ's design was influenced by -[AbuseHelper](https://bitbucket.org/clarifiednetworks/abusehelper), -however it was re-written from scratch and aims at: - -* Reduce the complexity of system administration -* Reduce the complexity of writing new bots for new data feeds -* Reduce the probability of events lost in all process with persistence functionality (even system crash) -* Use and improve the existing Data Harmonization Ontology -* Use JSON format for all messages -* Integration of the existing tools (AbuseHelper, CIF) -* Provide easy way to store data into Log Collectors like ElasticSearch, Splunk -* Provide easy way to create your own black-lists -* Provide easy communication with other systems via HTTP RESTFUL API - -It follows the following basic meta-guidelines: - -* Don't break simplicity - KISS -* Keep it open source - forever -* Strive for perfection while keeping a deadline - * Reduce complexity/avoid feature bloat - * Embrace unit testing - * Code readability: test with unexperienced programmers -* Communicate clearly - - -## Table of Contents - -1. [How to Install](#how-to-install) -2. [Developers Guide](#dev-guide) -3. [IntelMQ Manager](#control-platform) -4. [Incident Handling Automation Project](#incident-handling-automation-project) -5. [Data Harmonization](#data-harmonization) -6. [Licence](#licence) - - - -## How to Install - -See [UserGuide](docs/User-Guide). - - - -## Developers Guide - -See [Developers Guide](docs/Developers-Guide). - - -## IntelMQ Manager - -Check the [tool](https://github.com/certtools/intelmq-manager) and manage easily IntelMQ system. - - - -## Incident Handling Automation Project - -* **URL:** http://www.enisa.europa.eu/activities/cert/support/incident-handling-automation -* **Mailing-list:** ihap@lists.trusted-introducer.org - - - -## Data Harmonization - -IntelMQ use the Data Harmonization. Check the following -[document](docs/Data-Harmonization). - - -## Licence - -This software is licensed under GNU Affero General Public License version 3 diff --git a/docs/User-Guide.md b/docs/User-Guide.md index 1d52d87a6..3cf62ddad 100644 --- a/docs/User-Guide.md +++ b/docs/User-Guide.md @@ -23,7 +23,7 @@ The following instructions assume: ### Install Dependencies ``` -apt-get install python-pip git build-essential python-dev redis-server +apt-get install python-pip git build-essential python-dev redis-server python-zmq ``` @@ -34,6 +34,7 @@ sudo su - git clone https://github.com/certtools/intelmq.git cd intelmq/ +pip install -r REQUIREMENTS python setup.py install useradd -d /opt/intelmq -U -s /bin/bash intelmq chmod -R 0770 /opt/intelmq @@ -170,4 +171,4 @@ pip uninstall intelmq # Frequently Asked Questions Consult the [FAQ](FAQ) -if you encountered any problem. \ No newline at end of file +if you encountered any problem. diff --git a/intelmq/bin/intelmqctl b/intelmq/bin/intelmqctl index e2f4d4dbf..a422c171b 100755 --- a/intelmq/bin/intelmqctl +++ b/intelmq/bin/intelmqctl @@ -10,11 +10,17 @@ import inspect import psutil import signal import argparse -from intelmq.lib.pipeline import Pipeline +from intelmq.lib.pipeline import PipelineFactory, Redis +from intelmq import DEFAULTS_CONF_FILE +from intelmq import STARTUP_CONF_FILE +from intelmq import PIPELINE_CONF_FILE +from intelmq import SYSTEM_CONF_FILE +from intelmq import RUNTIME_CONF_FILE +from intelmq.lib import utils + +class Parameters(object): + pass -SYSTEM_CONFIGURATION_FILE = "/opt/intelmq/etc/system.conf" -STARTUP_CONFIGURATION_FILE = "/opt/intelmq/etc/startup.conf" -PIPELINE_CONFIGURATION_FILE = "/opt/intelmq/etc/pipeline.conf" PIDDIR = "/opt/intelmq/var/run/" @@ -159,15 +165,37 @@ class IntelMQContoller(): RETURN_TYPE = self.args.type - fp = open(STARTUP_CONFIGURATION_FILE, 'r') + fp = open(STARTUP_CONF_FILE, 'r') self.startup = json.load(fp) - fp = open(SYSTEM_CONFIGURATION_FILE, 'r') + fp = open(SYSTEM_CONF_FILE, 'r') self.system = json.load(fp) if not os.path.exists(PIDDIR): os.makedirs(PIDDIR) + #stolen functions from the bot file + #this will not work with various instances of REDIS + self.parameters = Parameters() + self.load_system_configuration() + self.load_defaults_configuration() + + def load_system_configuration(self): + + config = utils.load_configuration(SYSTEM_CONF_FILE) + for option, value in config.iteritems(): + setattr(self.parameters, option, value) + + + def load_defaults_configuration(self): + + # Load defaults configuration section + + config = utils.load_configuration(DEFAULTS_CONF_FILE) + + for option, value in config.iteritems(): + setattr(self.parameters, option, value) + def auto_method_call(self, method): inspect_members = inspect.getmembers(self) @@ -307,17 +335,31 @@ class IntelMQContoller(): def list_queues(self): - fp = open(PIPELINE_CONFIGURATION_FILE, 'r') + fp = open(DEFAULTS_CONF_FILE, 'r') + conf = json.load(fp) + #pipeline_host = conf[""] + #pipeline_port = + #pipeline_db = + fp.close() + + fp = open(PIPELINE_CONF_FILE, 'r') conf = json.load(fp) + fp.close() + + source_queues = set() + destination_queues = set() - queues = set() for key, value in conf.iteritems(): if 'source-queue' in value: - queues.add(value['source-queue']) + source_queues.add(value['source-queue']) if 'destination-queues' in value: - queues.update(value['destination-queues']) + destination_queues.update(value['destination-queues']) + + pipeline = PipelineFactory.create(self.parameters) + pipeline.set_queues(source_queues, "source") + pipeline.connect() - pipeline = Pipeline() + queues = source_queues.union(destination_queues) counters = pipeline.count_queued_messages(queues) log_list_queus(counters) diff --git a/intelmq/bots/BOTS b/intelmq/bots/BOTS index fdc985fb6..39ec5a4a7 100644 --- a/intelmq/bots/BOTS +++ b/intelmq/bots/BOTS @@ -1,198 +1,175 @@ { "Collector": { "Taichung City Netflow": { - "module": "intelmq.bots.collectors.url.collector", + "module": "intelmq.bots.collectors.http.collector_http", "description": "Taichung City Netflow Collector is the bot responsible to get the report from source of information.", "parameters": { "url": "https://tc.edu.tw/net/netflow/lkout/recent/30", + "feed": "Taichung", "rate_limit": "3600" } }, - "HPFeeds": { - "module": "intelmq.bots.collectors.hpfeeds.collector", - "description": "HPFeeds Collector is the bot responsible to integrate HPFeeds solution with IntelMQ.", + "AnubisNetworks CyberFeed": { + "module": "intelmq.bots.collectors.http.collector_http_stream", + "description": "AnubisNetworks CyberFeed Collector is the bot responsible to get the report from source of information.", "parameters": { - "broker_port": "20000", - "broker_host": "127.0.0.1", - "ident": "hpfeeds", - "secret": "hpfeeds-secret", + "url": "http://prod.cyberfeed.net:8080/stream?key=< key >", + "feed": "AnubisNetworks", "rate_limit": "0" } }, "Arbor": { - "module": "intelmq.bots.collectors.url.collector", + "module": "intelmq.bots.collectors.http.collector_http", "description": "Arbor Collector is the bot responsible to get the report from source of information.", "parameters": { "url": "http://atlas-public.ec2.arbor.net/public/ssh_attackers", + "feed": "Arbor", "rate_limit": "3600" } }, - "BruteForceBlocker": { - "module": "intelmq.bots.collectors.url.collector", - "description": "BruteForceBlocker Collector is the bot responsible to get the report from source of information.", + "HpHosts": { + "module": "intelmq.bots.collectors.http.collector_http", + "description": "HPHost Collector is the bot responsible to get the report from source of information.", + "parameters": { + "url": "http://hosts-file.net/download/hosts.txt", + "feed": "HpHosts", + "rate_limit": "3600" + } + }, + "AlienVault": { + "module": "intelmq.bots.collectors.http.collector_http", + "description": "AlienVault Collector is the bot responsible to get the report from source of information.", + "parameters": { + "url": "https://reputation.alienvault.com/reputation.data", + "feed": "AlienVault", + "rate_limit": "3600" + } + }, + "Danger Rulez": { + "module": "intelmq.bots.collectors.http.collector_http", + "description": "Danger Rulez Collector is the bot responsible to get the report from source of information.", "parameters": { "url": "http://danger.rulez.sk/projects/bruteforceblocker/blist.php", + "feed": "Danger Rulez", "rate_limit": "3600" } }, "Dshield": { - "module": "intelmq.bots.collectors.url.collector", + "module": "intelmq.bots.collectors.http.collector_http", "description": "Dshield Collector is the bot responsible to get the report from source of information.", "parameters": { "url": "http://dshield.org/asdetailsascii.html?as=", + "feed": "DShield", "rate_limit": "3600" } }, - "Microsoft DCU": { - "module": "intelmq.bots.collectors.microsoft_dcu.collector", - "description": "Microsoft DCU collector fetches report from an azure account", - "parameters": { - "azure_account_name": "", - "azure_account_key": "", - "date": "yesterday or YYYY-MM-DD date or empty", - "rate_limit": "86400" - } - }, "VXVault": { - "module": "intelmq.bots.collectors.url.collector", + "module": "intelmq.bots.collectors.http.collector_http", "description": "VXVault Collector is the bot responsible to get the report from source of information.", "parameters": { "url": "http://vxvault.siri-urz.net/URL_List.php", + "feed": "VxVault", "rate_limit": "3600" } }, "Malware Domain List": { - "module": "intelmq.bots.collectors.url.collector", + "module": "intelmq.bots.collectors.http.collector_http", "description": "Malware Domain List Collector is the bot responsible to get the report from source of information.", "parameters": { "url": "http://www.malwaredomainlist.com/updatescsv.php", + "feed": "Malware Domain List", "rate_limit": "3600" } }, "Dragon Research Group SSH": { - "module": "intelmq.bots.collectors.url.collector", + "module": "intelmq.bots.collectors.http.collector_http", "description": "Dragon Research Group SSH Collector is the bot responsible to get the report from source of information.", "parameters": { "url": "http://dragonresearchgroup.org/insight/sshpwauth.txt", + "feed": "Dragon Research Group", "rate_limit": "3600" } }, "Dragon Research Group VNC": { - "module": "intelmq.bots.collectors.url.collector", + "module": "intelmq.bots.collectors.http.collector_http", "description": "Dragon Research Group VNC Collector is the bot responsible to get the report from source of information.", "parameters": { "url": "https://dragonresearchgroup.org/insight/vncprobe.txt", + "feed": "Dragon Research Group", "rate_limit": "3600" } }, "PhishTank": { - "module": "intelmq.bots.collectors.url.collector", + "module": "intelmq.bots.collectors.http.collector_http", "description": "PhishTank Collector is the bot responsible to get the report from source of information.", "parameters": { "url": "http://data.phishtank.com/data/< API KEY >/online-valid.csv", + "feed": "Phishtank", "rate_limit": "28800" } }, "MalwarePatrol Dans Guardian": { - "module": "intelmq.bots.collectors.url.collector", + "module": "intelmq.bots.collectors.http.collector_http", "description": "MalwarePatrol Dans Guardian Collector is the bot responsible to get the report from source of information.", "parameters": { "url": "https://lists.malwarepatrol.net/cgi/getfile?receipt=< API KEY >&product=8&list=dansguardian", + "feed": "MalwarePatrol", "rate_limit": "180000" } }, "OpenBL": { - "module": "intelmq.bots.collectors.url.collector", + "module": "intelmq.bots.collectors.http.collector_http", "description": "OpenBL Collector is the bot responsible to get the report from source of information.", "parameters": { "url": "http://www.openbl.org/lists/date_all.txt", + "feed": "OpenBL", "rate_limit": "43200" } - }, - "ShadowServer Drone Report": { - "module": "intelmq.bots.collectors.mail.mail-attach", - "description": "ShadowServer Drone Collector is the bot responsible to get the report from source of information.", - "parameters": { - "rate_limit": "3600", - "mail_host": "", - "mail_user": "", - "mail_password": "", - "mail_ssl": true, - "folder": "Inbox.shadowserver", - "subject_regex": "Shadowserver [^ ]+ Drone Report", - "attach_regex": "csv.zip", - "attach_unzip": true - } - }, - "ShadowServer Chargen Report": { - "module": "intelmq.bots.collectors.mail.mail-url", - "description": "ShadowServer Chargen Collector is the bot responsible to get the report from source of information.", - "parameters": { - "rate_limit": "3600", - "mail_host": "", - "mail_user": "", - "mail_password": "", - "mail_ssl": true, - "folder": "Inbox.shadowserver", - "subject_regex": "Shadowserver [^ ]+ Chargen Report", - "url_regex": "http://dl.shadowserver.org/[^ ]+" - } - }, - "CERT-EU Malicious URLs": { - "module": "intelmq.bots.collectors.mail.mail-attach", - "description": "CERT-EU Malicious URLs Collector is the bot responsible to get the report from source of information.", - "parameters": { - "rate_limit": "3600", - "mail_host": "", - "mail_user": "", - "mail_password": "", - "mail_ssl": true, - "folder": "Inbox.cert-eu", - "subject_regex": "Malicious URLs", - "attach_regex": "report.csv$", - "attach_unzip": false - } } }, "Parser": { "Taichung City Netflow": { - "module": "intelmq.bots.parsers.taichungcitynetflow.parser", + "module": "intelmq.bots.parsers.taichung_city_netflow.parser", "description": "Taichung City Netflow Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" + "error_log_message": false } }, "HPFeeds": { "module": "intelmq.bots.parsers.hpfeeds.parser", "description": "HPFeeds Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" } }, "Arbor": { "module": "intelmq.bots.parsers.arbor.parser", "description": "Arbor Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" + } + }, + "HpHosts": { + "module": "intelmq.bots.parsers.hphosts.parser", + "description": "HpHosts Parser is the bot responsible to parse the report and sanitize the information.", + "parameters": { + "error_log_message": false + } + }, + "AlienVault": { + "module": "intelmq.bots.parsers.alienvault.parser", + "description": "AlienVault Parser is the bot responsible to parse the report and sanitize the information.", + "parameters": { + "error_log_message": false } }, "BruteForceBlocker": { "module": "intelmq.bots.parsers.bruteforceblocker.parser", "description": "BruteForceBlocker Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" } }, "Dshield": { - "module": "intelmq.bots.parsers.dshield.parser", + "module": "intelmq.bots.parsers.dshield.parser_asn", "description": "Dshield Parser is the bot responsible to parse the report and sanitize the information.", - "parameters": { - "rate_limit": "0" - } - }, - "Microsoft DCU": { - "module": "intelmq.bots.parsers.microsoft_dcu.parser", - "description": "Parses output created by Microsoft DCU collector into IntelMQ format", "parameters": { } }, @@ -200,79 +177,68 @@ "module": "intelmq.bots.parsers.vxvault.parser", "description": "VXVault Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" } }, "Malware Domain List": { "module": "intelmq.bots.parsers.malwaredomainlist.parser", "description": "Malware Domain List Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" } }, "Dragon Research Group SSH": { - "module": "intelmq.bots.parsers.dragonresearchgroup.parser-ssh", + "module": "intelmq.bots.parsers.dragonresearchgroup.parser_ssh", "description": "Dragon Research Group SSH Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" } }, "Dragon Research Group VNC": { - "module": "intelmq.bots.parsers.dragonresearchgroup.parser-vnc", + "module": "intelmq.bots.parsers.dragonresearchgroup.parser_vnc", "description": "Dragon Research Group VNC Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" } }, "PhishTank": { "module": "intelmq.bots.parsers.phishtank.parser", "description": "PhishTank Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" } }, "MalwarePatrol Dans Guardian": { - "module": "intelmq.bots.parsers.malwarepatrol.parser-dansguardian", + "module": "intelmq.bots.parsers.malwarepatrol.parser_dansguardian", "description": "MalwarePatrol Dans Guardian Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" } }, "OpenBL": { "module": "intelmq.bots.parsers.openbl.parser", "description": "OpenBL Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" } }, "ShadowServer Drone": { - "module": "intelmq.bots.parsers.shadowserver.drone-parser", + "module": "intelmq.bots.parsers.shadowserver.parser_drone", "description": "ShadowServer Drone Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" } }, "ShadowServer Chargen": { - "module": "intelmq.bots.parsers.shadowserver.chargen-parser", + "module": "intelmq.bots.parsers.shadowserver.parser_chargen", "description": "ShadowServer Chargen Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" } }, "CERT-EU Malicious URLs": { - "module": "intelmq.bots.parsers.certeu.malicious-urls-parser", + "module": "intelmq.bots.parsers.certeu.parser_malicious_urls", "description": "CERT-EU Malicious URLs Parser is the bot responsible to parse the report and sanitize the information.", "parameters": { - "rate_limit": "0" } } }, "Expert": { - "Cymru": { - "module": "intelmq.bots.experts.cymru.cymru", - "description": "Cymry (IP to ASN) is the bot responsible to add network information to the events (BGP, ASN, AS Name, Country, etc..).", + "Cymru Whois": { + "module": "intelmq.bots.experts.cymru_whois.expert", + "description": "Cymry Whois (IP to ASN) is the bot responsible to add network information to the events (BGP, ASN, AS Name, Country, etc..).", "parameters": { - "rate_limit": "0", "redis_cache_host": "127.0.0.1", "redis_cache_port": "6379", "redis_cache_db": "5", @@ -280,10 +246,9 @@ } }, "RIPENCC": { - "module": "intelmq.bots.experts.ripencc.ripencc", + "module": "intelmq.bots.experts.ripencc.expert", "description": "RIPENCC is the bot resposible to get the correspondent abuse contact from source IP and destination IP of the events", "parameters": { - "rate_limit": "0", "redis_cache_host": "127.0.0.1", "redis_cache_port": "6379", "redis_cache_db": "5", @@ -291,37 +256,33 @@ } }, "Abusix": { - "module": "intelmq.bots.experts.abusix.abusix", + "module": "intelmq.bots.experts.abusix.expert", "description": "Abusix is the bot resposible to get the correspondent abuse contact from source IP and destination IP of the events", "parameters": { - "rate_limit": "0", "redis_cache_host": "127.0.0.1", "redis_cache_port": "6379", "redis_cache_db": "5", "redis_cache_ttl": "86400" } }, - "GeoIP": { - "module": "intelmq.bots.experts.geoip.geoip", - "description": "GeoIP (MaxMind) is the bot responsible to add geolocation information to the events (Country, City, Longitude, Latitude, etc..)", + "MaxMind GeoIP": { + "module": "intelmq.bots.experts.maxmind_geoip.expert", + "description": "MaxMind GeoIP is the bot responsible to add geolocation information to the events (Country, City, Longitude, Latitude, etc..)", "parameters": { - "rate_limit": "0", - "database": "/opt/intelmq/var/lib/bots/geoip/GeoLite2-City.mmdb" + "database": "/opt/intelmq/var/lib/bots/maxmind_geoip/GeoLite2-City.mmdb" } }, "ASN Lookup": { - "module": "intelmq.bots.experts.asnlookup.asnlookup", + "module": "intelmq.bots.experts.asn_lookup.expert", "description": "ASN Lookup is the bot responsible to add ASN and BGP information from Route Views Project to the events.", "parameters": { - "rate_limit": "0", - "database": "/opt/intelmq/var/lib/bots/asnlookup/ipasn.dat" + "database": "/opt/intelmq/var/lib/bots/asn_lookup/ipasn.dat" } }, "Deduplicator": { - "module": "intelmq.bots.experts.deduplicator.deduplicator", + "module": "intelmq.bots.experts.deduplicator.expert", "description": "Deduplicator is the bot responsible to detect and remove deduplicated events.", "parameters": { - "rate_limit": "0", "redis_cache_host": "127.0.0.1", "redis_cache_port": "6379", "redis_cache_db": "6", @@ -329,51 +290,47 @@ } }, "Sanitizer": { - "module": "intelmq.bots.experts.sanitizer.sanitizer", + "module": "intelmq.bots.experts.sanitizer.expert", "description": "Sanitizer is the bot responsible to sanitize all events.", "parameters": { - "rate_limit": "0" } }, "Taxonomy": { - "module": "intelmq.bots.experts.taxonomy.taxonomy", + "module": "intelmq.bots.experts.taxonomy.expert", "description": "Taxonomy is the bot responsible to apply the eCSIRT Taxonomy to all events.", "parameters": { - "rate_limit": "0" } }, - "CountryCodeFilter": { - "module": "intelmq.bots.experts.countrycodefilter.countrycodefilter", - "description": "Filters out events which do not match a specific country code. Needs: the cymru expert.", + "Filter": { + "module": "intelmq.bots.experts.filter.expert", + "description": "Filters out events depending on bot parameters specification (filter_key, filter_value, filter_action)", "parameters": { - "countrycode": "", - "rate_limit": "0" + "filter_key": "", + "filter_value": "", + "filter_action": "" } } }, "Output": { "File": { - "module": "intelmq.bots.outputs.file.file", + "module": "intelmq.bots.outputs.file.output", "description": "File is the bot responsible to send events to a file.", "parameters": { - "file": "/opt/intelmq/var/lib/bots/file-output/events.txt", - "rate_limit": "0" + "file": "/opt/intelmq/var/lib/bots/file-output/events.txt" } }, - "LogCollector": { - "module": "intelmq.bots.outputs.logcollector.logcollector", - "description": "LogCollector is the bot responsible to send events to a logcollector (Splunk, ElasticSearch, etc..).", + "TCP": { + "module": "intelmq.bots.outputs.tcp.output", + "description": "TCP is the bot responsible to send events to a tcp port (Splunk, ElasticSearch, etc..).", "parameters": { - "rate_limit": "0", "ip": "", "port": "" } }, "MongoDB": { - "module": "intelmq.bots.outputs.mongodb.mongodb", + "module": "intelmq.bots.outputs.mongodb.output", "description": "MongoDB is the bot responsible to send events to a MongoDB database.", "parameters": { - "rate_limit": "0", "host": "", "port": "", "database": "", @@ -381,10 +338,9 @@ } }, "PostgreSQL": { - "module": "intelmq.bots.outputs.postgresql.postgresql", + "module": "intelmq.bots.outputs.postgresql.output", "description": "PostgreSQL is the bot responsible to send events to a PostgreSQL Database.", "parameters": { - "rate_limit": "0", "host": "", "port": "", "database": "intelmq-events", diff --git a/intelmq/bots/collectors/hpfeeds/collector.py b/intelmq/bots/collectors/hpfeeds/collector.py deleted file mode 100644 index ada4505bd..000000000 --- a/intelmq/bots/collectors/hpfeeds/collector.py +++ /dev/null @@ -1,48 +0,0 @@ -from intelmq.lib.bot import Bot, sys -import hpfeeds -import redis - -class HpfeedsBot(Bot): - - def process(self): - - self.logger.info('connecting to %s:%s %s %s' % (self.parameters.broker_host, - self.parameters.broker_port, - self.parameters.ident, - self.parameters.secret)) - - redis_host = '127.0.0.1' - redis_port = 6379 - redis_db = 2 - redis_queue = 'hpfeeds' - rc = redis.Redis(host=redis_host, port=redis_port, db=redis_db) - - while rc.llen(redis_queue) > 0: - mesg = rc.rpop(redis_queue) - self.send_message(mesg) - -# hpc = hpfeeds.new(self.parameters.broker_host, int(self.parameters.broker_port), self.parameters.ident, self.parameters.secret) -# hpc.subscribe(self.parameters.channels_subscribe) -# self.logger.info('connected to %s' % hpc.brokername) -# -# def on_message(identifier, channel, payload): -# decoded = '' -# try: decoded = json.loads(str(payload)) -# except: decoded = {'raw': payload} -# self.send_message(json.dumps(decoded)) -# self.logger.info('incoming message from {0} on channel {1}, length {2}'.format(identifier, channel, len(payload))) -# -# def on_error(payload): -# self.logger(' -> errormessage from server: {0}'.format(payload)) -# hpc.stop() -# -# -# hpc.s.settimeout(0.01) -# hpc.run(on_message, on_error) -# hpc.close() - - - -if __name__ == "__main__": - bot = HpfeedsBot(sys.argv[1]) - bot.start() diff --git a/intelmq/__init__.py b/intelmq/bots/collectors/http/__init__.py similarity index 100% rename from intelmq/__init__.py rename to intelmq/bots/collectors/http/__init__.py diff --git a/intelmq/bots/collectors/http/collector_http.py b/intelmq/bots/collectors/http/collector_http.py new file mode 100644 index 000000000..3a7b856f4 --- /dev/null +++ b/intelmq/bots/collectors/http/collector_http.py @@ -0,0 +1,27 @@ +from intelmq.lib.bot import Bot, sys +from intelmq.bots.collectors.http.lib import fetch_url +from intelmq.lib.message import Report + +class URLCollectorBot(Bot): + + def process(self): + self.logger.info("Downloading report from %s" % self.parameters.url) + raw_report = fetch_url( + self.parameters.url, + timeout = 60.0, + chunk_size = 16384, + http_proxy=self.parameters.http_proxy, + https_proxy=self.parameters.https_proxy + ) + self.logger.info("Report downloaded.") + + report = Report() + report.add("raw", raw_report, sanitize=True) + report.add("feed.name", self.parameters.feed, sanitize=True) + report.add("feed.url", self.parameters.url, sanitize=True) + self.send_message(report) + + +if __name__ == "__main__": + bot = URLCollectorBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/collectors/http/collector_http_stream.py b/intelmq/bots/collectors/http/collector_http_stream.py new file mode 100644 index 000000000..dd4243c3b --- /dev/null +++ b/intelmq/bots/collectors/http/collector_http_stream.py @@ -0,0 +1,25 @@ +import pycurl +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Report + +class HTTPStreamCollectorBot(Bot): + + def init(self): + self.conn = pycurl.Curl() + self.conn.setopt(pycurl.URL, str(self.parameters.url)) + self.conn.setopt(pycurl.WRITEFUNCTION, self.on_receive) + + def process(self): + self.conn.perform() + + def on_receive(self, data): + for line in data.split('\n'): + report = Report() + report.add("raw", str(line), sanitize=True) + report.add("feed.name", self.parameters.feed, sanitize=True) + self.send_message(report) + + +if __name__ == "__main__": + bot = HTTPStreamCollectorBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/collectors/url/lib.py b/intelmq/bots/collectors/http/lib.py similarity index 92% rename from intelmq/bots/collectors/url/lib.py rename to intelmq/bots/collectors/http/lib.py index 24a947cac..a3c76c6b3 100644 --- a/intelmq/bots/collectors/url/lib.py +++ b/intelmq/bots/collectors/http/lib.py @@ -1,72 +1,71 @@ -import re -import ssl -import socket -import shutil -import httplib -import urllib2 -import StringIO -from urlparse import urlparse -from intelmq.lib.utils import decode - -def fetch_url(url, timeout=60.0, chunk_size=16384, http_proxy = None, https_proxy = None): - - if http_proxy and https_proxy: - proxy = urllib2.ProxyHandler({'http': http_proxy, 'https': https_proxy }) - opener = urllib2.build_opener(proxy) - urllib2.install_opener(opener) - - req = urllib2.urlopen(url, timeout = timeout) - iostring = StringIO.StringIO() - shutil.copyfileobj(req, iostring, chunk_size) - value = iostring.getvalue() - iostring.close() - return decode(value) - - - -''' - -# https://gist.github.com/zed/1347055 - -def fetch_url_ssl(url, key_file, cert_file, ca_file, timeout=60.0, chunk_size=16384): - regex = '([^:]+)(:([0-9]+))?' - url_parsed = urlparse(url) - - host_port = re.search(regex, url_parsed.netloc) - host = host_port.group(1) - port = host_port.group(3) - if not port: - port = 443 - - connection = HTTPSClientAuthConnection(host, port, key_file=key_file, cert_file=cert_file, ca_file=ca_file, timeout=60.0) - connection.request('GET', url_parsed.path) - - iostring = StringIO.StringIO() - shutil.copyfileobj(connection.getresponse(), iostring, chunk_size) - value = iostring.getvalue() - - iostring.close() - connection.close() - - return decode(value) - - - -class HTTPSClientAuthConnection(httplib.HTTPSConnection): - - def __init__(self, host, port, key_file, cert_file, ca_file, timeout=None): - httplib.HTTPSConnection.__init__(self, host, key_file=key_file, cert_file=cert_file) - self.key_file = key_file - self.cert_file = cert_file - self.ca_file = ca_file - self.timeout = timeout - self.cert_reqs = ssl.CERT_REQUIRED - - def connect(self): - sock = socket.create_connection((self.host, self.port), self.timeout) - - if not self.ca_file: - self.cert_reqs=ssl.CERT_NONE - - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ca_certs=self.ca_file, cert_reqs=self.cert_reqs) -''' +import re +import ssl +import socket +import shutil +import httplib +import urllib2 +import StringIO +from urlparse import urlparse + +def fetch_url(url, timeout=60.0, chunk_size=16384, http_proxy = None, https_proxy = None): + + if http_proxy and https_proxy: + proxy = urllib2.ProxyHandler({'http': http_proxy, 'https': https_proxy }) + opener = urllib2.build_opener(proxy) + urllib2.install_opener(opener) + + req = urllib2.urlopen(url, timeout = timeout) + iostring = StringIO.StringIO() + shutil.copyfileobj(req, iostring, chunk_size) + value = iostring.getvalue() + iostring.close() + return value + + + +''' + +# https://gist.github.com/zed/1347055 + +def fetch_url_ssl(url, key_file, cert_file, ca_file, timeout=60.0, chunk_size=16384): + regex = '([^:]+)(:([0-9]+))?' + url_parsed = urlparse(url) + + host_port = re.search(regex, url_parsed.netloc) + host = host_port.group(1) + port = host_port.group(3) + if not port: + port = 443 + + connection = HTTPSClientAuthConnection(host, port, key_file=key_file, cert_file=cert_file, ca_file=ca_file, timeout=60.0) + connection.request('GET', url_parsed.path) + + iostring = StringIO.StringIO() + shutil.copyfileobj(connection.getresponse(), iostring, chunk_size) + value = iostring.getvalue() + + iostring.close() + connection.close() + + return value + + + +class HTTPSClientAuthConnection(httplib.HTTPSConnection): + + def __init__(self, host, port, key_file, cert_file, ca_file, timeout=None): + httplib.HTTPSConnection.__init__(self, host, key_file=key_file, cert_file=cert_file) + self.key_file = key_file + self.cert_file = cert_file + self.ca_file = ca_file + self.timeout = timeout + self.cert_reqs = ssl.CERT_REQUIRED + + def connect(self): + sock = socket.create_connection((self.host, self.port), self.timeout) + + if not self.ca_file: + self.cert_reqs=ssl.CERT_NONE + + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ca_certs=self.ca_file, cert_reqs=self.cert_reqs) +''' diff --git a/intelmq/bots/collectors/mail/mail-attach.py b/intelmq/bots/collectors/mail/collector_mail_attach.py similarity index 79% rename from intelmq/bots/collectors/mail/mail-attach.py rename to intelmq/bots/collectors/mail/collector_mail_attach.py index 4819612d6..c22b501e5 100755 --- a/intelmq/bots/collectors/mail/mail-attach.py +++ b/intelmq/bots/collectors/mail/collector_mail_attach.py @@ -2,6 +2,7 @@ import imbox import zipfile from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Report from intelmq.bots.collectors.mail.lib import Mail class MailAttachCollectorBot(Bot): @@ -28,10 +29,14 @@ def process(self): if self.parameters.attach_unzip: zipped = zipfile.ZipFile(attach['content']) - report = zipped.read(zipped.namelist()[0]) + raw_report = zipped.read(zipped.namelist()[0]) else: - report = attach['content'].read() + raw_report = attach['content'].read() + report = Report() + report.add("raw", raw_report, sanitize=True) + report.add("feed.name", self.parameters.feed, sanitize=True) + self.send_message(report) mailbox.mark_seen(uid) diff --git a/intelmq/bots/collectors/mail/mail-url.py b/intelmq/bots/collectors/mail/collector_mail_url.py similarity index 80% rename from intelmq/bots/collectors/mail/mail-url.py rename to intelmq/bots/collectors/mail/collector_mail_url.py index 0edeb03a7..7bd36e8a8 100755 --- a/intelmq/bots/collectors/mail/mail-url.py +++ b/intelmq/bots/collectors/mail/collector_mail_url.py @@ -3,6 +3,7 @@ from intelmq.lib.bot import Bot, sys from intelmq.bots.collectors.mail.lib import Mail from intelmq.bots.collectors.url.lib import fetch_url +from intelmq.lib.message import Report class MailURLCollectorBot(Bot): @@ -24,9 +25,12 @@ def process(self): url = match.group() self.logger.info("Downloading report from %s" % url) - report = fetch_url(url, timeout = 60.0, chunk_size = 16384) + raw_report = fetch_url(url, timeout = 60.0, chunk_size = 16384) self.logger.info("Report downloaded.") - + + report = Report() + report.add("raw", raw_report, sanitize=True) + report.add("feed.name", self.parameters.feed, sanitize=True) self.send_message(report) mailbox.mark_seen(uid) diff --git a/intelmq/bots/collectors/microsoft_dcu/README.md b/intelmq/bots/collectors/microsoft_dcu/README.md deleted file mode 100644 index cfb5e60b6..000000000 --- a/intelmq/bots/collectors/microsoft_dcu/README.md +++ /dev/null @@ -1,25 +0,0 @@ - -# Microsoft DCU collector - -## Config parameters - -### Description - * rate_limit: set this to 1 day - * azure_account_name: the login for azure (Microsoft's cloud) - * azure_account_key : a base64 encoded key. Microsoft will provide this for you - * date: what blob to fetch from the Azure cloud. Each blob of data is in a container with a specific date. -Here you can either specify ```yesterday``` or a fixed date. If the value is null, all data will be fetched! -Please note that this can take very long for a year of data. Specifying a fixed date makes sense for debugging. -In most cases in production, you will want to specify ```yesterday```. - * rate_limit: the usual rate limiting parameter. Please note that this must be 1 day (in seconds 86400 seconds) - - - -### Example - "microsoft-dcu-collector": { - "azure_account_name": "azureaccountname", - "azure_account_key": "ABCDEF01234567890abcdef42==", - "date": "yesterday", - "rate_limit": 86400 - } - diff --git a/intelmq/bots/collectors/microsoft_dcu/collector.py b/intelmq/bots/collectors/microsoft_dcu/collector.py deleted file mode 100644 index 8ed2a5d1a..000000000 --- a/intelmq/bots/collectors/microsoft_dcu/collector.py +++ /dev/null @@ -1,72 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -from azure.storage import BlobService - -import gzip -import StringIO -import datetime - -from urlparse import urlparse - - -class DCUCollectorBot(Bot): - """ - This IntelMQ collector is for getting a blob - from an azure account (Microsoft dcu). - It opens the account and reads all containers. - """ - - def process(self): - account_name = self.parameters.azure_account_name - account_key = self.parameters.azure_account_key - - blob_service = BlobService(account_name, account_key, protocol="https") - proxy_setting = self.parameters.https_proxy or "" - date_setting = self.parameters.date or "" - date = None - - if date_setting: - if date_setting != "yesterday": - date = datetime.datetime.strptime(date_setting, "%Y-%m-%d").date() # for debbuging (probably) - elif date_setting == "yesterday": - date = datetime.date.today() - datetime.timedelta(days=1) # for normal usage - - proxy_url = "https://" + proxy_setting if proxy_setting.find("https://") == -1 else proxy_setting - proxy_options = urlparse(proxy_url) - - if date: - self.logger.info("Fetching for date: %s (%s)" % (date, date_setting)) - else: - self.logger.info("No 'date' was specified, fetching ALL") - - if proxy_options.hostname: - self.logger.info("Using https proxy(host=%s, port=%s)" % (proxy_options.hostname, proxy_options.port)) - blob_service.set_proxy(host=proxy_options.hostname, port=proxy_options.port) - else: - if proxy_setting: - self.logger.info("Using NO proxy, couldn't use 'https_proxy' it was: %s" % proxy_setting) - else: - self.logger.info("Using NO proxy, 'https_proxy' was empty") - - for container in blob_service.list_containers(): - container_name = container.name - if container_name == "heartbeat": - continue - - if date and (not container_name == "processed-" + str(date)): - self.logger.info("IGNORING container '%s' didn't match date selection" % container_name) - continue - - for blob in blob_service.list_blobs(container_name): - self.logger.info("Fetching blob %s in container %s" % (container_name, blob.name)) - data = blob_service.get_blob(container_name, blob.name) - cs = StringIO.StringIO(data) - report = gzip.GzipFile(fileobj=cs).read() - - self.send_message(report) - -if __name__ == "__main__": - bot = DCUCollectorBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/collectors/url/collector.py b/intelmq/bots/collectors/url/collector.py deleted file mode 100644 index 10edd5447..000000000 --- a/intelmq/bots/collectors/url/collector.py +++ /dev/null @@ -1,15 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.bots.collectors.url.lib import fetch_url - -class URLCollectorBot(Bot): - - def process(self): - self.logger.info("Downloading report from %s" % self.parameters.url) - report = fetch_url(self.parameters.url, timeout = 60.0, chunk_size = 16384, http_proxy=self.parameters.http_proxy, https_proxy=self.parameters.https_proxy) - self.logger.info("Report downloaded.") - self.send_message(report) - - -if __name__ == "__main__": - bot = URLCollectorBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/collectors/xmpp/collector.py b/intelmq/bots/collectors/xmpp/collector.py deleted file mode 100644 index 03e118560..000000000 --- a/intelmq/bots/collectors/xmpp/collector.py +++ /dev/null @@ -1 +0,0 @@ -''' TBD ''' \ No newline at end of file diff --git a/intelmq/bots/experts/abusix/abusix.py b/intelmq/bots/experts/abusix/expert.py similarity index 64% rename from intelmq/bots/experts/abusix/abusix.py rename to intelmq/bots/experts/abusix/expert.py index 7b33e2d3b..eef8e17aa 100644 --- a/intelmq/bots/experts/abusix/abusix.py +++ b/intelmq/bots/experts/abusix/expert.py @@ -1,27 +1,30 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.bots.experts.abusix.lib import Abusix - -''' -Reference: https://abusix.com/contactdb.html -RIPE abuse contacts resolving through DNS TXT queries -''' - -class AbusixExpertBot(Bot): - - def process(self): - - event = self.receive_message() - - for key in ['source_','destination_']: - if event.contains(key + "ip"): - ip = event.value(key + "ip") - email = Abusix.query(ip) - if email: - event.add(key + "abuse_contact", email) - - self.send_message(event) - self.acknowledge_message() - -if __name__ == "__main__": - bot = AbusixExpertBot(sys.argv[1]) - bot.start() +from intelmq.lib.bot import Bot, sys +from intelmq.bots.experts.abusix.lib import Abusix + +''' +Reference: https://abusix.com/contactdb.html +RIPE abuse contacts resolving through DNS TXT queries +''' + + +class AbusixExpertBot(Bot): + + def process(self): + event = self.receive_message() + + for key in ['source.', 'destination.']: + ip_key = key + "ip" + if event.contains(ip_key): + ip = event.value(ip_key) + email = Abusix.query(ip) + if email: + abuse_contact_key = key + "abuse_contact" + event.add(abuse_contact_key, email) + + self.send_message(event) + self.acknowledge_message() + + +if __name__ == "__main__": + bot = AbusixExpertBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/experts/abusix/lib.py b/intelmq/bots/experts/abusix/lib.py index 8b476a8b7..c493a1a88 100755 --- a/intelmq/bots/experts/abusix/lib.py +++ b/intelmq/bots/experts/abusix/lib.py @@ -1,21 +1,25 @@ import re -import dns.resolver +import dns.resolver + +QUERY_TEMPLATE = "%s.%s.%s.%s.abuse-contacts.abusix.org" +REGEX = r"[^@]+@[^@]+\.[^@]+" + class Abusix(): @staticmethod def query(ip): - ipbytes = ip.split('.') - if len(ipbytes) != 4: - return None - - query = ipbytes[3] + '.' + ipbytes[2] + '.' + ipbytes[1] + '.' + ipbytes[0] + '.abuse-contacts.abusix.org' + octets = ip.split('.') + if len(octets) != 4: + return None + + query = QUERY_TEMPLATE % (octets[3], octets[2], octets[1], octets[0]) try: response = dns.resolver.query(query, 'TXT') - if len(response) >= 1 and re.match(r"[^@]+@[^@]+\.[^@]+", str(response[0])): - return str(response[0]).replace("\"", "") + if len(response) >= 1 and re.match(REGEX, str(response[0])): + return str(response[0]).replace("\"", "") else: return None except: diff --git a/intelmq/bots/experts/asnlookup/README.md b/intelmq/bots/experts/asn_lookup/README.md similarity index 100% rename from intelmq/bots/experts/asnlookup/README.md rename to intelmq/bots/experts/asn_lookup/README.md diff --git a/intelmq/bots/collectors/hpfeeds/__init__.py b/intelmq/bots/experts/asn_lookup/__init__.py similarity index 100% rename from intelmq/bots/collectors/hpfeeds/__init__.py rename to intelmq/bots/experts/asn_lookup/__init__.py diff --git a/intelmq/bots/experts/asnlookup/asnlookup.py b/intelmq/bots/experts/asn_lookup/expert.py similarity index 66% rename from intelmq/bots/experts/asnlookup/asnlookup.py rename to intelmq/bots/experts/asn_lookup/expert.py index 9140c25f0..9f06abc82 100644 --- a/intelmq/bots/experts/asnlookup/asnlookup.py +++ b/intelmq/bots/experts/asn_lookup/expert.py @@ -1,38 +1,43 @@ -import pyasn -from intelmq.lib.bot import Bot, sys - -class ASNLookupExpertBot(Bot): - - def init(self): - try: - self.database = pyasn.pyasn(self.parameters.database) - except IOError: - self.logger.error("pyasn data file does not exist or could not be accessed in '%s'" % self.parameters.database) - self.logger.error("Read 'bots/experts/asnlookup/README' and follow the procedure") - self.stop() - - def process(self): - event = self.receive_message() - - keys = ["source_%s", "destination_%s"] - - for key in keys: - ip = event.value(key % "ip") - - if not ip: - continue - - info = self.database.lookup(ip) - - if info: - if info[0]: - event.update(key % "asn", unicode(info[0])) - if info[1]: - event.update(key % "bgp_prefix", unicode(info[1])) - - self.send_message(event) - self.acknowledge_message() - -if __name__ == "__main__": - bot = ASNLookupExpertBot(sys.argv[1]) - bot.start() +import pyasn +from intelmq.lib.bot import Bot, sys + + +class ASNLookupExpertBot(Bot): + + def init(self): + try: + self.database = pyasn.pyasn(self.parameters.database) + except IOError: + self.logger.error("pyasn data file does not exist or could not be accessed in '%s'" % self.parameters.database) + self.logger.error("Read 'bots/experts/asnlookup/README' and follow the procedure") + self.stop() + + def process(self): + event = self.receive_message() + + for key in ["source.", "destination."]: + + ip_key = key + "ip" + asn_key = key + "asn" + bgp_key = key + "bgp_prefix" + + if not event.contains(ip_key): + continue + + ip = event.value(ip_key) + + info = self.database.lookup(ip) + + if info: + if info[0]: + event.add(asn_key, unicode(info[0]), sanitize=True, force=True) + if info[1]: + event.add(bgp_key, unicode(info[1]), sanitize=True, force=True) + + self.send_message(event) + self.acknowledge_message() + + +if __name__ == "__main__": + bot = ASNLookupExpertBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/experts/contactdb/contactdb.py b/intelmq/bots/experts/contactdb/contactdb.py deleted file mode 100644 index 8a38e3315..000000000 --- a/intelmq/bots/experts/contactdb/contactdb.py +++ /dev/null @@ -1,52 +0,0 @@ -''' -DEPRECATED | DEPRECATED | DEPRECATED | -DEPRECATED | DEPRECATED | DEPRECATED | -DEPRECATED | DEPRECATED | DEPRECATED | -DEPRECATED | DEPRECATED | DEPRECATED | -DEPRECATED | DEPRECATED | DEPRECATED | -DEPRECATED | DEPRECATED | DEPRECATED | -DEPRECATED | DEPRECATED | DEPRECATED | - - - -import sys - -from intelmq.lib.bot import * -from intelmq.lib.utils import * -from intelmq.lib.message import * - -CONTACTDB_LOCATION = 'http://contactdb.cert.pt:8000/' -MINIMUM_BGP_PREFIX_IPV4 = 24 -MINIMUM_BGP_PREFIX_IPV6 = 120 - -def convert_contactdb_result(result): - pass - -class ContactDBExpertBot(Bot): - - def process(self): - event = self.receive_message() - if event: - ip = event.value("ip") - - # WRITE A SPECIFIC GENERIC UTIL for this kind of functionality (Cymru, ContactDB use it ) - (int_ip, ip_size, minimum_bgp) = ipstr_to_int(ip) - binstr_ip = int_to_binstr(int_ip)[:minimum_bgp] - - result = self.cache.get(binstr_ip): - if result is None: - result = self.get_cache_result(ip) - self.cache.set(binstr_ip, result) - - event.add("contactdb_entity", result['entity']['name']) - - self.send_message(event) - self.acknowledge_message() - - def get_cache_result(self, ip): - import urllib2 - -if __name__ == "__main__": - bot = ContactDBExpertBot(sys.argv[1]) - bot.start() -''' \ No newline at end of file diff --git a/intelmq/bots/experts/countrycodefilter/README.md b/intelmq/bots/experts/countrycodefilter/README.md deleted file mode 100644 index eac8a00e3..000000000 --- a/intelmq/bots/experts/countrycodefilter/README.md +++ /dev/null @@ -1,5 +0,0 @@ -### Country Code filter - -A simple filter. will look for a key of name "source_cymru_cc" and if found, will compare if it matches the configured country code (for example: "PT") -All other events will not be passed on. - diff --git a/intelmq/bots/experts/countrycodefilter/countrycodefilter.py b/intelmq/bots/experts/countrycodefilter/countrycodefilter.py deleted file mode 100644 index 83ecdf192..000000000 --- a/intelmq/bots/experts/countrycodefilter/countrycodefilter.py +++ /dev/null @@ -1,33 +0,0 @@ -from copy import copy -from intelmq.lib.bot import Bot, sys -from intelmq.lib.cache import Cache -from intelmq.lib.message import Event - - -class CountryCodeFilterBot(Bot): - - def init(self): - if not self.parameters.countrycode: - self.cc = self.parameters.countrycode - self.logger.warn("no country code found. countrycode_filter = %s" % self.parameters.countrycode) - self.stop() - else: - self.logger.info("country code found. countrycode_filter = %s" % self.parameters.countrycode) - - def process(self): - message = self.receive_message() - - if message: - - # Event deduplication - if isinstance(message, Event): - cc = message.contains("source_cymru_cc") - if ( cc == self.cc ): - self.logger.debug("country code found! country = %s" % (cc)) - self.send_message(message) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = CountryCodeFilterBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/experts/cymru/cymru.py b/intelmq/bots/experts/cymru/cymru.py deleted file mode 100644 index 5b14bffc8..000000000 --- a/intelmq/bots/experts/cymru/cymru.py +++ /dev/null @@ -1,92 +0,0 @@ -import json -from intelmq.lib.bot import Bot, sys -from intelmq.lib.cache import Cache -from intelmq.bots import utils -from intelmq.bots.experts.cymru.lib import Cymru - -MINIMUM_BGP_PREFIX_IPV4 = 24 -MINIMUM_BGP_PREFIX_IPV6 = 128 # FIXME - -class CymruExpertBot(Bot): - - def init(self): - self.cache = Cache( - self.parameters.redis_cache_host, - self.parameters.redis_cache_port, - self.parameters.redis_cache_db, - self.parameters.redis_cache_ttl - ) - - - def process(self): - event = self.receive_message() - - if not event: - self.acknowledge_message() - return - - keys = ["source_%s", "destination_%s"] - - for key in keys: - ip = event.value(key % "ip") - - if not ip: - self.send_message(event) - self.acknowledge_message() - return - - elif utils.is_ipv4(ip): - ip_version = 4 - ip_integer = utils.ip_to_int(ip) - cache_key = bin(ip_integer)[2 : MINIMUM_BGP_PREFIX_IPV4 + 2] - - elif utils.is_ipv6(ip): - ip_version = 6 - ip_integer = utils.ip_to_int(ip) - cache_key = bin(ip_integer)[2 : MINIMUM_BGP_PREFIX_IPV6 + 2] - - else: - self.send_message(event) - self.acknowledge_message() - return - - - result_json = self.cache.get(cache_key) - - if result_json: - result = json.loads(result_json) - else: - result = Cymru.query(ip, ip_version) - result_json = json.dumps(result) - self.cache.set(cache_key, result_json) - - if "asn" in result: - event.clear(key % 'asn') - event.add(key % 'asn', result['asn']) - - if "bgp_prefix" in result: - event.clear(key % 'bgp_prefix') - event.add(key % 'bgp_prefix', result['bgp_prefix']) - - if "registry" in result: - event.clear(key % 'registry') - event.add(key % 'registry', result['registry']) - - if "allocated" in result: - event.clear(key % 'allocated') - event.add(key % 'allocated', result['allocated']) - - if "as_name" in result: - event.clear(key % 'as_name') - event.add(key % 'as_name', result['as_name']) - - if "cc" in result: - event.clear(key % 'cymru_cc') - event.add(key % 'cymru_cc', result['cc']) - - self.send_message(event) - self.acknowledge_message() - -if __name__ == "__main__": - bot = CymruExpertBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/collectors/microsoft_dcu/__init__.py b/intelmq/bots/experts/cymru_whois/__init__.py similarity index 100% rename from intelmq/bots/collectors/microsoft_dcu/__init__.py rename to intelmq/bots/experts/cymru_whois/__init__.py diff --git a/intelmq/bots/experts/cymru_whois/expert.py b/intelmq/bots/experts/cymru_whois/expert.py new file mode 100644 index 000000000..6cc34a84a --- /dev/null +++ b/intelmq/bots/experts/cymru_whois/expert.py @@ -0,0 +1,83 @@ +import json +from intelmq.lib.bot import Bot, sys +from intelmq.lib.cache import Cache +from intelmq.lib.harmonization import IPAddress +from intelmq.bots.experts.cymru_whois.lib import Cymru + + +MINIMUM_BGP_PREFIX_IPV4 = 24 +MINIMUM_BGP_PREFIX_IPV6 = 128 # FIXME + + +class CymruExpertBot(Bot): + + def init(self): + self.cache = Cache( + self.parameters.redis_cache_host, + self.parameters.redis_cache_port, + self.parameters.redis_cache_db, + self.parameters.redis_cache_ttl + ) + + + def process(self): + event = self.receive_message() + + keys = ["source.%s", "destination.%s"] + + for key in keys: + ip_key = key % "ip" + + if not event.contains(ip_key): + continue + + ip = event.value(ip_key) + ip_version = IPAddress.version(ip) + ip_integer = IPAddress.to_int(ip) + + if ip_version == 4: + minimum = MINIMUM_BGP_PREFIX_IPV4 + + elif ip_version == 6: + minimum = MINIMUM_BGP_PREFIX_IPV6 + + else: + self.logger.error("Invalid IP version") + self.send_message(event) + self.acknowledge_message() + + cache_key = bin(ip_integer)[2 : minimum + 2] + result_json = self.cache.get(cache_key) + + if result_json: + result = json.loads(result_json) + else: + result = Cymru.query(ip) + result_json = json.dumps(result) + self.cache.set(cache_key, result_json) + + if "asn" in result: + event.add(key % 'asn', result['asn'], sanitize=True, force=True) + + if "bgp_prefix" in result: + event.add(key % 'bgp_prefix', result['bgp_prefix'], sanitize=True, force=True) + + if "registry" in result: + event.add(key % 'registry', result['registry'], sanitize=True, force=True) + + if "allocated" in result: + event.add(key % 'allocated', result['allocated'], sanitize=True, force=True) + + if "as_name" in result: + event.add(key % 'as_name', result['as_name'], sanitize=True, force=True) + + if "cc" in result: + event.add(key % 'geolocation.cc', result['cc'], sanitize=True, force=True) + + self.send_message(event) + self.acknowledge_message() + + +if __name__ == "__main__": + bot = CymruExpertBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/experts/cymru/lib.py b/intelmq/bots/experts/cymru_whois/lib.py similarity index 85% rename from intelmq/bots/experts/cymru/lib.py rename to intelmq/bots/experts/cymru_whois/lib.py index 70d6cbd5b..a62ceffb7 100755 --- a/intelmq/bots/experts/cymru/lib.py +++ b/intelmq/bots/experts/cymru_whois/lib.py @@ -1,8 +1,7 @@ import binascii import StringIO import dns.resolver -from intelmq.lib.utils import decode -from intelmq.bots import utils +from intelmq.lib.harmonization import IPAddress ''' Reference: http://www.team-cymru.org/Services/ip-to-asn.html#dns @@ -11,13 +10,15 @@ IP_QUERY = "%s.origin%s.asn.cymru.com" ASN_QUERY = "AS%s.asn.cymru.com" + class Cymru(): @staticmethod - def query(ip, ip_version): - raw_result = Cymru.__ip_query(ip, ip_version) + def query(ip): + raw_result = Cymru.__ip_query(ip) result = Cymru.__ip_query_parse(raw_result) + if "asn" in result: raw_result = Cymru.__asn_query(result['asn']) extra_info = Cymru.__asn_query_parse(raw_result) @@ -34,20 +35,25 @@ def __query(query): query_result.to_wire(fp) value = fp.getvalue()[1:] # ignore first character fp.close() - return decode(value, force=True) + return value except dns.exception.DNSException: return None @staticmethod - def __ip_query(ip, ip_version): - reversed_ip = utils.get_reverse_ip(ip) - version = "" - if ip_version == 6: + def __ip_query(ip): + ip_version = IPAddress.version(ip) + reverse_ip = IPAddress.to_reverse(ip) + + if ip_version == 4: + reverse = reverse_ip.split('.in-addr.arpa.')[0] + version = "" + else: + reverse = reverse_ip.split('.ip6.arpa.')[0] version = "6" - query = IP_QUERY % (reversed_ip, version) + query = IP_QUERY % (reverse, version) return Cymru.__query(query) diff --git a/intelmq/bots/experts/deduplicator/README.md b/intelmq/bots/experts/deduplicator/README.md index 2edf83e84..e5c8a77b8 100644 --- a/intelmq/bots/experts/deduplicator/README.md +++ b/intelmq/bots/experts/deduplicator/README.md @@ -1,3 +1,3 @@ -### Deduplicator - -System check if it already saw a specific message using Redias as memcache +### Deduplicator + +System check if it already saw a specific message using Redias as memcache diff --git a/intelmq/bots/experts/deduplicator/expert.py b/intelmq/bots/experts/deduplicator/expert.py new file mode 100644 index 000000000..421b22171 --- /dev/null +++ b/intelmq/bots/experts/deduplicator/expert.py @@ -0,0 +1,31 @@ +from intelmq.lib.bot import Bot, sys +from intelmq.lib.cache import Cache +from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime +from intelmq.lib import utils + + +class DeduplicatorBot(Bot): + + def init(self): + self.cache = Cache( + self.parameters.redis_cache_host, + self.parameters.redis_cache_port, + self.parameters.redis_cache_db, + self.parameters.redis_cache_ttl + ) + + def process(self): + message = self.receive_message() + message_hash = hash(message) + + if not self.cache.exists(message_hash): + self.cache.set(message_hash, 'hash') + self.send_message(message) + + self.acknowledge_message() + + +if __name__ == "__main__": + bot = DeduplicatorBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/experts/filter/README.md b/intelmq/bots/experts/filter/README.md new file mode 100644 index 000000000..d7a446bcd --- /dev/null +++ b/intelmq/bots/experts/filter/README.md @@ -0,0 +1,8 @@ +### Filter Bot + +A simple filter. + +Parameters: +* filter_key - key from data harmonization +* filter_value - value for the key +* filter_action - action when a message match to the criteria (possible actions: keep/drop) diff --git a/intelmq/bots/collectors/url/__init__.py b/intelmq/bots/experts/filter/__init__.py similarity index 100% rename from intelmq/bots/collectors/url/__init__.py rename to intelmq/bots/experts/filter/__init__.py diff --git a/intelmq/bots/experts/filter/expert.py b/intelmq/bots/experts/filter/expert.py new file mode 100644 index 000000000..c94183a71 --- /dev/null +++ b/intelmq/bots/experts/filter/expert.py @@ -0,0 +1,42 @@ +from intelmq.lib.bot import Bot, sys +from intelmq.lib.cache import Cache +from intelmq.lib.message import Event + + +class FilterBot(Bot): + + def init(self): + if not self.parameters.filter_key: + self.logger.warn("No filter_key parameter found.") + self.stop() + elif not self.parameters.filter_value: + self.logger.warn("No filter_value parameter found.") + self.stop() + elif not (self.parameters.filter_action == "drop" or self.parameters.filter_action == "keep"): + self.logger.warn("No filter_action parameter found.") + self.stop() + + def process(self): + event = self.receive_message() + + if event.contains(self.parameters.filter_key): + + if self.parameters.filter_action == "drop": + if event.contains(self.parameters.filter_key) == self.parameters.filter_value: + self.acknowledge_message() + else: + self.send_message(message) + self.acknowledge_message() + + if self.parameters.filter_action == "keep": + if event.contains(self.parameters.filter_key) == self.parameters.filter_value: + self.send_message(message) + self.acknowledge_message() + else: + self.acknowledge_message() + + self.acknowledge_message() + +if __name__ == "__main__": + bot = FilterBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/experts/geoip/__init__.py b/intelmq/bots/experts/geoip/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/bots/experts/geoip/geoip.py b/intelmq/bots/experts/geoip/geoip.py deleted file mode 100644 index ad1a59023..000000000 --- a/intelmq/bots/experts/geoip/geoip.py +++ /dev/null @@ -1,53 +0,0 @@ -import geoip2.database -from intelmq.lib.bot import Bot, sys - -class GeoIPExpertBot(Bot): - - def init(self): - try: - self.database = geoip2.database.Reader(self.parameters.database) - except IOError: - self.logger.error("GeoIP Database does not exist or could not be accessed in '%s'" % self.parameters.database) - self.logger.error("Read 'bots/experts/geoip/README' and follow the procedure") - self.stop() - - def process(self): - event = self.receive_message() - if event: - - keys = ["source_%s", "destination_%s"] - - for key in keys: - ip = event.value(key % "ip") - - if not ip: - continue - - try: - info = self.database.city(ip) - - if info.country.iso_code: - event.clear(key % "cc") - event.add(key % "cc", unicode(info.country.iso_code)) - - if info.location.latitude: - event.clear(key % "latitude") - event.add(key % "latitude", unicode(info.location.latitude)) - - if info.location.longitude: - event.clear(key % "longitude") - event.add(key % "longitude", unicode(info.location.longitude)) - - if info.city.name: - event.clear(key % "city") - event.add(key % "city", unicode(info.city.name)) - - except geoip2.errors.AddressNotFoundError: - pass - - self.send_message(event) - self.acknowledge_message() - -if __name__ == "__main__": - bot = GeoIPExpertBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/experts/geoip/README.md b/intelmq/bots/experts/maxmind_geoip/README.md similarity index 56% rename from intelmq/bots/experts/geoip/README.md rename to intelmq/bots/experts/maxmind_geoip/README.md index ab3a87bf6..220c7e799 100644 --- a/intelmq/bots/experts/geoip/README.md +++ b/intelmq/bots/experts/maxmind_geoip/README.md @@ -1,10 +1,10 @@ * Download database from http://geolite.maxmind.com/download/geoip/database/GeoLite2-City.mmdb.gz * Unzip -* Create geoip folder '/opt/intelmq/var/lib/bots/geoip' -* Copy database to '/opt/intelmq/var/lib/bots/geoip' +* Create geoip folder '/opt/intelmq/var/lib/bots/maxmind_geoip' +* Copy database to '/opt/intelmq/var/lib/bots/maxmind_geoip' * Update the correspondent section in '/opt/intelmq/etc/runtime.conf': ``` - "database": "/opt/intelmq/var/lib/bots/geoip/GeoLite2-City.mmdb" + "database": "/opt/intelmq/var/lib/bots/maxmind_geoip/GeoLite2-City.mmdb" ``` * Update the corresponding 'bot_id' section in '/opt/intelmq/etc/pipeline.conf'. diff --git a/intelmq/bots/collectors/xmpp/__init__.py b/intelmq/bots/experts/maxmind_geoip/__init__.py similarity index 100% rename from intelmq/bots/collectors/xmpp/__init__.py rename to intelmq/bots/experts/maxmind_geoip/__init__.py diff --git a/intelmq/bots/experts/maxmind_geoip/expert.py b/intelmq/bots/experts/maxmind_geoip/expert.py new file mode 100644 index 000000000..b0f861bc2 --- /dev/null +++ b/intelmq/bots/experts/maxmind_geoip/expert.py @@ -0,0 +1,50 @@ +import geoip2.database +from intelmq.lib.bot import Bot, sys + + +class GeoIPExpertBot(Bot): + + def init(self): + try: + self.database = geoip2.database.Reader(self.parameters.database) + except IOError: + self.logger.error("GeoIP Database does not exist or could not be accessed in '%s'" % self.parameters.database) + self.logger.error("Read 'bots/experts/geoip/README' and follow the procedure") + self.stop() + + def process(self): + event = self.receive_message() + + for key in ["source.%s", "destination.%s"]: + geo_key = key % "geolocation.%s" + + if not event.contains(key % "ip"): + continue + + ip = event.value(key % "ip") + + try: + info = self.database.city(ip) + + if info.country.iso_code: + event.add(geo_key % "cc", info.country.iso_code, sanitize=True, force=True) + + if info.location.latitude: + event.add(geo_key % "latitude", unicode(info.location.latitude), sanitize=True, force=True) + + if info.location.longitude: + event.add(geo_key % "longitude", unicode(info.location.longitude), sanitize=True, force=True) + + if info.city.name: + event.add(geo_key % "city", info.city.name, sanitize=True, force=True) + + except geoip2.errors.AddressNotFoundError: + pass + + self.send_message(event) + self.acknowledge_message() + + +if __name__ == "__main__": + bot = GeoIPExpertBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/experts/ripencc/ripencc.py b/intelmq/bots/experts/ripencc/expert.py similarity index 72% rename from intelmq/bots/experts/ripencc/ripencc.py rename to intelmq/bots/experts/ripencc/expert.py index 8c4a52624..3282bde62 100644 --- a/intelmq/bots/experts/ripencc/ripencc.py +++ b/intelmq/bots/experts/ripencc/expert.py @@ -1,31 +1,32 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.bots.experts.ripencc.lib import RIPENCC - -''' -Reference: https://stat.ripe.net/data/abuse-contact-finder/data.json?resource=1.1.1.1 - -TODO: -Load RIPE networks prefixes into memory. -Compare each IP with networks prefixes loadad. -If ip matchs, query RIPE -''' - -class RIPENCCExpertBot(Bot): - - def process(self): - - event = self.receive_message() - - for key in ['source_','destination_']: - if event.contains(key + "ip"): - ip = event.value(key + "ip") - email = RIPENCC.query(ip) - if email: - event.add(key + "abuse_contact", email) - - self.send_message(event) - self.acknowledge_message() - -if __name__ == "__main__": - bot = RIPENCCExpertBot(sys.argv[1]) - bot.start() +from intelmq.lib.bot import Bot, sys +from intelmq.bots.experts.ripencc.lib import RIPENCC + +''' +Reference: https://stat.ripe.net/data/abuse-contact-finder/data.json?resource=1.1.1.1 + +TODO: +Load RIPE networks prefixes into memory. +Compare each IP with networks prefixes loadad. +If ip matchs, query RIPE +''' + +class RIPENCCExpertBot(Bot): + + def process(self): + + event = self.receive_message() + + for key in ['source.','destination.']: + ip_key = key + "ip" + if event.contains(ip_key): + ip = event.value(ip_key) + email = RIPENCC.query(ip) + if email: + event.add(key + "abuse_contact", email, sanitize=True) + + self.send_message(event) + self.acknowledge_message() + +if __name__ == "__main__": + bot = RIPENCCExpertBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/experts/ripencc/lib.py b/intelmq/bots/experts/ripencc/lib.py index 869cb0a63..3d09442da 100755 --- a/intelmq/bots/experts/ripencc/lib.py +++ b/intelmq/bots/experts/ripencc/lib.py @@ -1,13 +1,14 @@ import json import requests + class RIPENCC(): @staticmethod def query(ip): - url = 'https://stat.ripe.net/data/abuse-contact-finder/data.json?resource=' + ip - response = requests.get(url, data="") + url = 'https://stat.ripe.net/data/abuse-contact-finder/data.json?resource=' + ip + response = requests.get(url, data="") try: if (response.json()['data']['anti_abuse_contacts']['abuse_c']): @@ -15,4 +16,4 @@ def query(ip): else: return None except: - return None \ No newline at end of file + return None diff --git a/intelmq/bots/experts/sanitizer/__init__.py b/intelmq/bots/experts/sanitizer/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/bots/experts/sanitizer/sanitizer.py b/intelmq/bots/experts/sanitizer/sanitizer.py deleted file mode 100644 index 3dd394e74..000000000 --- a/intelmq/bots/experts/sanitizer/sanitizer.py +++ /dev/null @@ -1,91 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.bots import utils - -class SanitizerBot(Bot): - - def process(self): - event = self.receive_message() - - if event: - - keys_pairs = [ - ( - "source_ip", - "source_domain_name", - "source_url", - "source_asn" - ), - ( - "destination_ip", - "destination_domain_name", - "destination_url", - "destination_asn" - ) - ] - - for keys in keys_pairs: - - ip = domain_name = url = None - - for key in keys: - - if "asn" in key: - continue - - if not event.contains(key): - continue - - value = event.value(key) - - if len(value) <= 2: # ignore invalid values - continue - - result = utils.is_ip(value) - if result: - ip = result - - result = utils.is_domain_name(value) - if result: - domain_name = result - - result = utils.is_url(value) - if result: - url = result - - if not domain_name and url: - domain_name = utils.get_domain_name_from_url(url) - - if not ip and domain_name: - ip = utils.get_ip_from_domain_name(domain_name) - - if not ip and url: - ip = utils.get_ip_from_url(url) - - for key in keys: - - if "url" in key and url: - event.clear(key) - event.add(key, url) - - if "domain_name" in key and domain_name: - event.clear(key) - event.add(key, domain_name) - - if "ip" in key and ip: - event.clear(key) - event.add(key, ip) - - if "asn" in key: - try: - int(event.value(key)) - except: - event.clear(key) - - self.send_message(event) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = SanitizerBot(sys.argv[1]) - bot.start() - diff --git a/intelmq/bots/experts/taxonomy/taxonomy.py b/intelmq/bots/experts/taxonomy/expert.py similarity index 82% rename from intelmq/bots/experts/taxonomy/taxonomy.py rename to intelmq/bots/experts/taxonomy/expert.py index 509e3e9e9..1ee3b6541 100644 --- a/intelmq/bots/experts/taxonomy/taxonomy.py +++ b/intelmq/bots/experts/taxonomy/expert.py @@ -1,44 +1,45 @@ -from intelmq.lib.bot import Bot, sys - -# FIXME: this dict should be on a sparated file - -TAXONOMY = { - "phishing" : "Fraud", - "ddos" : "Availability", - "spam" : "Abusive Content", - "scanner" : "Information Gathering", - "dropzone" : "Information Content Security", - "malware" : "Malicious Code", - "botnet drone" : "Malicious Code", - "ransomware" : "Malicious Code", - "malware configuration" : "Malicious Code", - "c&c" : "Malicious Code", - "exploit" : "Intrusion Attempts", - "brute-force" : "Intrusion Attempts", - "ids alert" : "Intrusion Attempts", - "defacement" : "Intrusions", - "compromised" : "Intrusions", - "backdoor" : "Intrusions", - "vulnerable service" : "Vulnerable", - "blacklist" : "Other", - "unknown" : "Other", - "test" : "Test", - } - - -class TaxonomyExpertBot(Bot): - - def process(self): - event = self.receive_message() - if event: - if not event.contains("taxonomy") and event.contains("type"): - type = event.value("type") - taxonomy = TAXONOMY[type] - event.add("taxonomy", taxonomy) - - self.send_message(event) - self.acknowledge_message() - -if __name__ == "__main__": - bot = TaxonomyExpertBot(sys.argv[1]) - bot.start() +from intelmq.lib.bot import Bot, sys + +# FIXME: this dict should be on a sparated file + +TAXONOMY = { + "phishing" : "Fraud", + "ddos" : "Availability", + "spam" : "Abusive Content", + "scanner" : "Information Gathering", + "dropzone" : "Information Content Security", + "malware" : "Malicious Code", + "botnet drone" : "Malicious Code", + "ransomware" : "Malicious Code", + "malware configuration" : "Malicious Code", + "c&c" : "Malicious Code", + "exploit" : "Intrusion Attempts", + "brute-force" : "Intrusion Attempts", + "ids alert" : "Intrusion Attempts", + "defacement" : "Intrusions", + "compromised" : "Intrusions", + "backdoor" : "Intrusions", + "vulnerable service" : "Vulnerable", + "blacklist" : "Other", + "unknown" : "Other", + "test" : "Test", + } + + +class TaxonomyExpertBot(Bot): + + def process(self): + event = self.receive_message() + + if not event.contains("classification.taxonomy") and event.contains("classification.type"): + type = event.value("classification.type") + taxonomy = TAXONOMY[type] + event.add("classification.taxonomy", taxonomy, sanitize=True) + + self.send_message(event) + self.acknowledge_message() + + +if __name__ == "__main__": + bot = TaxonomyExpertBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/outputs/debug/__init__.py b/intelmq/bots/outputs/debug/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/bots/outputs/debug/debug.py b/intelmq/bots/outputs/debug/debug.py deleted file mode 100644 index f40af7c99..000000000 --- a/intelmq/bots/outputs/debug/debug.py +++ /dev/null @@ -1,17 +0,0 @@ -import json -from intelmq.lib.bot import Bot, sys - -class DebugBot(Bot): - - def process(self): - event = self.receive_message() - - if event: - print unicode(event) - self.send_message(event) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = DebugBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/outputs/file/file.py b/intelmq/bots/outputs/file/output.py similarity index 82% rename from intelmq/bots/outputs/file/file.py rename to intelmq/bots/outputs/file/output.py index ff92570bc..651c54151 100644 --- a/intelmq/bots/outputs/file/file.py +++ b/intelmq/bots/outputs/file/output.py @@ -1,5 +1,5 @@ from intelmq.lib.bot import Bot, sys -from intelmq.lib.utils import encode +from intelmq.lib import utils class FileBot(Bot): @@ -12,8 +12,7 @@ def process(self): event = self.receive_message() if event: - event_data = unicode(event) - event_data = encode(event_data) + event_data = event.to_json() self.file.write(event_data) self.file.write("\n") self.file.flush() diff --git a/intelmq/bots/outputs/intelmailer/__init__.py b/intelmq/bots/outputs/intelmailer/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/bots/outputs/intelmailer/intelmailer.py b/intelmq/bots/outputs/intelmailer/intelmailer.py deleted file mode 100644 index 165b36aa0..000000000 --- a/intelmq/bots/outputs/intelmailer/intelmailer.py +++ /dev/null @@ -1,24 +0,0 @@ -import datetime, pymongo -from intelmq.lib.bot import Bot, sys - -class IntelMailerBot(Bot): - - def init(self): - client = pymongo.MongoClient(self.parameters.host, int(self.parameters.port)) - db = client[self.parameters.database] - self.collection = db[self.parameters.collection] - - - def process(self): - event = self.receive_message() - - if event: - event_dict = event.to_dict() - event_dict['created_at'] = datetime.datetime.utcnow() - self.collection.insert(event_dict) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = IntelMailerBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/outputs/logcollector/__init__.py b/intelmq/bots/outputs/logcollector/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/bots/outputs/mongodb/mongodb.py b/intelmq/bots/outputs/mongodb/output.py similarity index 85% rename from intelmq/bots/outputs/mongodb/mongodb.py rename to intelmq/bots/outputs/mongodb/output.py index ea953cb2e..3e4e75774 100644 --- a/intelmq/bots/outputs/mongodb/mongodb.py +++ b/intelmq/bots/outputs/mongodb/output.py @@ -8,12 +8,9 @@ def init(self): db = client[self.parameters.database] self.collection = db[self.parameters.collection] - def process(self): event = self.receive_message() - - if event: - self.collection.insert(event.to_dict()) + self.collection.insert(event.to_dict()) self.acknowledge_message() diff --git a/intelmq/bots/outputs/postgresql/postgresql.py b/intelmq/bots/outputs/postgresql/output.py similarity index 70% rename from intelmq/bots/outputs/postgresql/postgresql.py rename to intelmq/bots/outputs/postgresql/output.py index 2c40142c1..0d2a51b6b 100644 --- a/intelmq/bots/outputs/postgresql/postgresql.py +++ b/intelmq/bots/outputs/postgresql/output.py @@ -17,16 +17,14 @@ def init(self): def process(self): event = self.receive_message() - if event: - evdict = event.to_dict() - keys = ", ".join(evdict.keys()) - values = evdict.values() - fvalues = len(values) * "%s, " - query = "INSERT INTO events (" + keys + ") VALUES (" + fvalues[:-2] + ")" - - self.cur.execute(query, values) - self.con.commit() + keys = ", ".join(event.keys()) + values = event.values() + fvalues = len(values) * "%s, " + query = "INSERT INTO events (" + keys + ") VALUES (" + fvalues[:-2] + ")" + + self.cur.execute(query, values) + self.con.commit() self.acknowledge_message() diff --git a/intelmq/bots/experts/asnlookup/__init__.py b/intelmq/bots/outputs/tcp/__init__.py similarity index 100% rename from intelmq/bots/experts/asnlookup/__init__.py rename to intelmq/bots/outputs/tcp/__init__.py diff --git a/intelmq/bots/outputs/logcollector/logcollector.py b/intelmq/bots/outputs/tcp/output.py similarity index 77% rename from intelmq/bots/outputs/logcollector/logcollector.py rename to intelmq/bots/outputs/tcp/output.py index a4628caf6..54a712620 100644 --- a/intelmq/bots/outputs/logcollector/logcollector.py +++ b/intelmq/bots/outputs/tcp/output.py @@ -1,19 +1,13 @@ -import time, socket +import time +import socket from intelmq.lib.bot import Bot, sys -class LogCollectorBot(Bot): +class TCPBot(Bot): def process(self): event = self.receive_message() - - if event: - data = '' - for key, value in event.items(): - data += key.replace(' ','_') + '="' + value + '" ' - data += "\n" - - self.send_data(data) - + data = event.to_json() + self.send_data(data) self.acknowledge_message() @@ -47,5 +41,5 @@ def send_data(self, data): if __name__ == "__main__": - bot = LogCollectorBot(sys.argv[1]) + bot = TCPBot(sys.argv[1]) bot.start() diff --git a/intelmq/bots/parsers/abusehelper/DO_NOT_USE_THIS_CODE b/intelmq/bots/parsers/abusehelper/DO_NOT_USE_THIS_CODE deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/bots/parsers/abusehelper/__init__.py b/intelmq/bots/parsers/abusehelper/__init__.py deleted file mode 100644 index 8b1378917..000000000 --- a/intelmq/bots/parsers/abusehelper/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/intelmq/bots/parsers/abusehelper/abusehelper.py b/intelmq/bots/parsers/abusehelper/abusehelper.py deleted file mode 100644 index b7976c1c7..000000000 --- a/intelmq/bots/parsers/abusehelper/abusehelper.py +++ /dev/null @@ -1,73 +0,0 @@ -import xmpp, time -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event - -# Required parameters: -# - jid -# - password -# - source_room -# - force_tls - -# [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] -# [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] -# [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] -# [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] -# [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] -# [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] -# [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] -# [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] -# [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] -# [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] [DEPRECATED] - -class AbuseHelperBot(Bot): - - def handle_message(self, xmpp_connection, message): - try: - event = Event.from_unicode(unicode(message.getBody())) - - for key in event.keys(): - value = event.value(key) - event.clear(key) - key = key.replace(' ','_') - event.add(key, value) - - self.send_message(event) - except: - pass - - def start(self): - jid = xmpp.JID(self.parameters.jid) - - xmpp_connection = xmpp.Client(jid.getDomain(), debug=[]) - connection_result = xmpp_connection.connect() - - if not connection_result: - # TODO: Log error - return - - if self.parameters.force_tls == 'true' and connection_result != 'tls': - # TODO: Log error - return - - authentication_result = xmpp_connection.auth(jid.getNode(), self.parameters.password) - if not authentication_result: - # TODO: Log error - return - - xmpp_connection.RegisterHandler(name='message', handler=self.handle_message) - xmpp_connection.sendInitPresence() - - xmpp_connection.send(xmpp.Presence(to='%s@conference.%s/%s' % (self.parameters.source_room, jid.getDomain(), self.bot_id))) - - while True: - if not xmpp_connection.isConnected(): - xmpp_connection.reconnectAndReauth() - else: - xmpp_connection.Process() - - time.sleep(int(self.parameters.rate_limit)) - - -if __name__ == "__main__": - bot = AbuseHelperBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/experts/contactdb/__init__.py b/intelmq/bots/parsers/alienvault/__init__.py similarity index 100% rename from intelmq/bots/experts/contactdb/__init__.py rename to intelmq/bots/parsers/alienvault/__init__.py diff --git a/intelmq/bots/parsers/alienvault/parser.py b/intelmq/bots/parsers/alienvault/parser.py new file mode 100644 index 000000000..dbf19a372 --- /dev/null +++ b/intelmq/bots/parsers/alienvault/parser.py @@ -0,0 +1,79 @@ +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime +from intelmq.lib import utils + + +CLASSIFICATION = { + "c&c": "c&c", + "scanning host": "scanner", + "malicious host": "malware", + "spamming": "spam", + "malware domain": "malware", + "malware ip": "malware", + "malware distribution": "malware", +} + + +class AlienVaultParserBot(Bot): + + def process(self): + report = self.receive_message() + + if not report.contains("raw"): + self.acknowledge_message() + + if len(report.value("raw").strip()) == 0: + self.acknowledge_message() + + raw_report = utils.base64_decode(report.value("raw")) + + for row in raw_report.split('\n'): + + row = row.strip() + if len(row) == 0: + continue + + values = row.split("#") + + # Send one event per classification + classification_types = list() + if values[3].strip().find(";") > 0: + classification_types.extend(values[3].split(";")) + else: + classification_types.append(values[3]) + + for ctype in classification_types: + + event = Event() + + if ctype.lower() in CLASSIFICATION: + event.add('classification.type', CLASSIFICATION[ctype.lower()], sanitize=True) + else: + event.add('classification.type', u"unknown") + + if len(values[6].strip()) > 0: + geo_coordinates = values[6].strip().split(",") + if len(geo_coordinates) == 2: + geo_latitude = geo_coordinates[0] + geo_longitude = geo_coordinates[1] + + event.add('source.ip', values[0].strip(), sanitize=True) + event.add('source.geolocation.cc', values[4].strip(), sanitize=True) + event.add('source.geolocation.city', values[5].strip(), sanitize=True) + event.add('source.geolocation.latitude', geo_latitude.strip(), sanitize=True) + event.add('source.geolocation.longitude', geo_longitude.strip(), sanitize=True) + + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add("raw", row, sanitize=True) + + + self.send_message(event) + self.acknowledge_message() + +if __name__ == "__main__": + bot = AlienVaultParserBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/parsers/arbor/parser.py b/intelmq/bots/parsers/arbor/parser.py index 52e0a0152..b5e776b49 100644 --- a/intelmq/bots/parsers/arbor/parser.py +++ b/intelmq/bots/parsers/arbor/parser.py @@ -1,37 +1,43 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -class ArborParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - for row in report.split('\n'): - row = row.strip() - - if len(row) == 0 or row.startswith('other'): - continue - - row = row.split() - event = Event() - - columns = ["source_ip"] - for key, value in zip(columns, row): - event.add(key, value) - - event.add('feed', 'arbor') - event.add('feed_url', 'http://atlas-public.ec2.arbor.net/public/ssh_attackers') - event.add('type', 'brute-force') - - event = utils.generate_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - -if __name__ == "__main__": - bot = ArborParserBot(sys.argv[1]) - bot.start() +from intelmq.lib import utils +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime + + +class ArborParserBot(Bot): + + def process(self): + report = self.receive_message() + + if not report.contains("raw"): + self.acknowledge_message() + + raw_report = utils.base64_decode(report.value("raw")) + for row in raw_report.split('\n'): + row = row.strip() + + if len(row) == 0 or row.startswith('other'): + continue + + event = Event() + + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add('classification.type', u'brute-force') + event.add("raw", row, sanitize=True) + + columns = ["source.ip"] + row = row.split() + + for key, value in zip(columns, row): + event.add(key, value, sanitize=True) + + self.send_message(event) + self.acknowledge_message() + + +if __name__ == "__main__": + bot = ArborParserBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/parsers/bruteforceblocker/parser.py b/intelmq/bots/parsers/bruteforceblocker/parser.py index 29ca238b5..e0bb95975 100644 --- a/intelmq/bots/parsers/bruteforceblocker/parser.py +++ b/intelmq/bots/parsers/bruteforceblocker/parser.py @@ -1,43 +1,46 @@ from intelmq.lib.bot import Bot, sys from intelmq.lib.message import Event -from intelmq.bots import utils +from intelmq.lib.harmonization import DateTime +from intelmq.lib import utils import re +REGEX_IP = "^[^ \t]+" +REGEX_TIMESTAMP = "# ([^ \t]+ [^ \t]+)" + + class BruteForceBlockerParserBot(Bot): def process(self): report = self.receive_message() - if report: - regex_ip = "^[^ \t]+" - regex_timestamp = "# ([^ \t]+ [^ \t]+)" - - for row in report.split('\n'): + if not report.contains("raw"): + self.acknowledge_message() - if row.startswith('#'): - continue + raw_report = utils.base64_decode(report.value("raw")) + for row in raw_report.split('\n'): - event = Event() + if row.startswith('#'): + continue - match = re.search(regex_ip, row) - if match: - ip = match.group() - - match = re.search(regex_timestamp, row) - if match: - timestamp = match.group(1) + " UTC" - - event.add("source_ip", ip) - event.add("source_time", timestamp) - event.add('feed', 'bruteforceblocker') - event.add('feed_url', 'http://danger.rulez.sk/projects/bruteforceblocker/blist.php') - event.add('type', 'brute-force') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) + event = Event() + + match = re.search(REGEX_IP, row) + if match: + ip = match.group() - self.send_message(event) + match = re.search(REGEX_TIMESTAMP, row) + if match: + timestamp = match.group(1) + " UTC" + + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add('time.source', timestamp, sanitize=True) + event.add('source.ip', ip, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add('classification.type', u'brute-force') + + self.send_message(event) self.acknowledge_message() if __name__ == "__main__": diff --git a/intelmq/bots/parsers/certeu/__init__.py b/intelmq/bots/parsers/certeu/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/bots/parsers/certeu/malicious-urls-parser.py b/intelmq/bots/parsers/certeu/malicious-urls-parser.py deleted file mode 100644 index 5e5f1dffd..000000000 --- a/intelmq/bots/parsers/certeu/malicious-urls-parser.py +++ /dev/null @@ -1,45 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -class CERTEUMaliciousURLsParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - for row in report.split('\n'): - - row = row.strip() - - if len(row) == 0: - continue - - row = row.split('|') - event = Event() - - columns = ["source_url", "source_asn", "source_ip", "source_time", "source_reverse_dns", "source_cc", "__IGNORE__", "additional_information"] - - for key, value in zip(columns, row): - value = value.strip() - - if key == "source_time": - value += " UTC" - - if value != "N/A" and key != "__IGNORE__": - event.add(key, value) - - event.add('feed', 'cert-eu') - event.add('type', 'malware') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = CERTEUMaliciousURLsParserBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/dragonresearchgroup/parser-ssh.py b/intelmq/bots/parsers/dragonresearchgroup/parser-ssh.py deleted file mode 100644 index b7102f6cd..000000000 --- a/intelmq/bots/parsers/dragonresearchgroup/parser-ssh.py +++ /dev/null @@ -1,45 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -class DragonResearchGroupSSHParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - for row in report.split('\n'): - row = row.strip() - - if len(row) == 0 or row.startswith('#'): # ignore all lines starting with comment mark - continue - - row = row.split('|') - event = Event() - - columns = ["source_asn", "source_as_name", "source_ip", "source_time"] - - for key, value in zip(columns, row): - value = value.strip() - - if key == "source_time": - value += " UTC" - - event.add(key, value) - - event.add('feed', 'dragonresearchgroup') - event.add('feed_url', 'http://dragonresearchgroup.org/insight/sshpwauth.txt') - event.add('type', 'brute-force') - event.add('application_protocol', 'ssh') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = DragonResearchGroupSSHParserBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/dragonresearchgroup/parser-vnc.py b/intelmq/bots/parsers/dragonresearchgroup/parser-vnc.py deleted file mode 100644 index 309ad85c3..000000000 --- a/intelmq/bots/parsers/dragonresearchgroup/parser-vnc.py +++ /dev/null @@ -1,45 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -class DragonResearchGroupVNCParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - for row in report.split('\n'): - row = row.strip() - - if len(row) == 0 or row.startswith('#'): # ignore all lines starting with comment mark - continue - - row = row.split('|') - event = Event() - - columns = ["source_asn", "source_as_name", "source_ip", "source_time"] - - for key, value in zip(columns, row): - value = value.strip() - - if key == "source_time": - value += " UTC" - - event.add(key, value) - - event.add('feed', 'dragonresearchgroup') - event.add('feed_url', 'http://dragonresearchgroup.org/insight/vncprobe.txt') - event.add('type', 'brute-force') - event.add('application_protocol', 'vnc') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = DragonResearchGroupVNCParserBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/dragonresearchgroup/parser_ssh.py b/intelmq/bots/parsers/dragonresearchgroup/parser_ssh.py new file mode 100644 index 000000000..9d9a7506a --- /dev/null +++ b/intelmq/bots/parsers/dragonresearchgroup/parser_ssh.py @@ -0,0 +1,51 @@ +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime +from intelmq.lib import utils + +class DragonResearchGroupSSHParserBot(Bot): + + def process(self): + report = self.receive_message() + + if not report.contains("raw"): + self.acknowledge_message() + + raw_report = utils.base64_decode(report.value("raw")) + for row in raw_report.split('\n'): + + row = row.strip() + + self.logger.error("Raw row %s" % row) + + if len(row) == 0 or row.startswith('#'): + continue + + splitted_row = row.split('|') + event = Event() + + columns = ["source.asn", "source.as_name", "source.ip", "time.source"] + + for key, value in zip(columns, splitted_row): + value = value.strip() + + if key == "time.source": + value += " UTC" + + event.add(key, value, sanitize=True) + + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add('classification.type', u'brute-force') + event.add('protocol.application', u'ssh') + event.add("raw", row, sanitize=True) + + self.send_message(event) + self.acknowledge_message() + + +if __name__ == "__main__": + bot = DragonResearchGroupSSHParserBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/parsers/dragonresearchgroup/parser_vnc.py b/intelmq/bots/parsers/dragonresearchgroup/parser_vnc.py new file mode 100644 index 000000000..561f62648 --- /dev/null +++ b/intelmq/bots/parsers/dragonresearchgroup/parser_vnc.py @@ -0,0 +1,49 @@ +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime +from intelmq.lib import utils + +class DragonResearchGroupVNCParserBot(Bot): + + def process(self): + report = self.receive_message() + + if not report.contains("raw"): + self.acknowledge_message() + + raw_report = utils.base64_decode(report.value("raw")) + for row in raw_report.split('\n'): + + row = row.strip() + + if len(row) == 0 or row.startswith('#'): + continue + + splitted_row = row.split('|') + event = Event() + + columns = ["source.asn", "source.as_name", "source.ip", "time.source"] + + for key, value in zip(columns, splitted_row): + value = value.strip() + + if key == "time.source": + value += " UTC" + + event.add(key, value, sanitize=True) + + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add('classification.type', u'brute-force') + event.add('protocol.application', u'vnc') + event.add("raw", row, sanitize=True) + + self.send_message(event) + self.acknowledge_message() + + +if __name__ == "__main__": + bot = DragonResearchGroupVNCParserBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/parsers/dshield/parser.py b/intelmq/bots/parsers/dshield/parser.py deleted file mode 100644 index 5e6a64bf3..000000000 --- a/intelmq/bots/parsers/dshield/parser.py +++ /dev/null @@ -1,49 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils -import re - -class DshieldParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - regex_ip = "^(\d+\.\d+\.\d+\.\d+)" # bug: this ignores IPv6 right now - regex_timestamp = "(\d+\-\d+\-\d+\s\d+\:\d+\:\d+)" - - for row in report.split('\n'): - - if row.startswith('#'): - continue - - event = Event() - - match = re.search(regex_ip, row) - if match: - ip = ".".join([octet.lstrip('0') for octet in match.group().split('.')]) - else: - continue # skip lines without IP address - - match = re.search(regex_timestamp, row) - if match: - timestamp = match.group(1) + " UTC" - else: - continue # no timestamp -> no event, skip this line - - event.add("source_ip", ip) - event.add("source_time", timestamp) - event.add('feed', 'dshield') - event.add('feed_url', 'http://dshield.org/asdetailsascii.html') - event.add('type', 'brute-force') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - -if __name__ == "__main__": - bot = DshieldParserBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/dshield/parser_asn.py b/intelmq/bots/parsers/dshield/parser_asn.py new file mode 100644 index 000000000..c625536af --- /dev/null +++ b/intelmq/bots/parsers/dshield/parser_asn.py @@ -0,0 +1,61 @@ +import re +from intelmq.lib import utils +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime + + +REGEX_IP = "^(\d+\.\d+\.\d+\.\d+)" # bug: this ignores IPv6 right now +REGEX_TIMESTAMP = "(\d+\-\d+\-\d+\s\d+\:\d+\:\d+)" + + +class DshieldParserBot(Bot): + + + def process(self): + report = self.receive_message() + + if not report.contains("raw"): + self.acknowledge_message() + + raw_report = utils.base64_decode(report.value("raw")) + for row in raw_report.split('\n'): + + if row.startswith('#'): + continue + + octets = list() + match = re.search(REGEX_IP, row) + if match: + for octet in match.group().split('.'): + result = octet.lstrip('0') + if result == "": + result = "0" + octets.append(result) + ip = ".".join(octets) + else: + continue + + match = re.search(REGEX_TIMESTAMP, row) + if match: + timestamp = match.group(1) + " UTC" + else: + continue + + event = Event() + + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add("time.source", timestamp, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add('classification.type', u'brute-force') + event.add("source.ip", ip, sanitize=True) + event.add("raw", row, sanitize=True) + + self.send_message(event) + self.acknowledge_message() + +if __name__ == "__main__": + bot = DshieldParserBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/parsers/generic/parser.py b/intelmq/bots/parsers/generic/parser.py deleted file mode 100644 index e92cc4066..000000000 --- a/intelmq/bots/parsers/generic/parser.py +++ /dev/null @@ -1,45 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils -import re - - -class GenericBot(Bot): -# Generic parser, will simply parse and add named group to event -# for example if you have the regex : -# '^\s*(?P(?:(?:\d){1,3}\.){3}\d{1,3})' -# You will have an item 'ip' in your event. - - def process(self): - report = self.receive_message() - self.logger.debug("Will apply regex %s" % self.parameters.regex) - if report: - rowcount = 0 - for row in report.split('\n'): # For each line - event = Event() - match = re.search(self.parameters.regex, row) - if match: - for key in match.groupdict(): - event.add(key, match.groupdict()[key]) - else: - continue # skip lines without matching regex - rowcount += 1 - # Get detail from parser parameters, will be nice to have it by - # source parameters.. Avoid adding if parsed - if not 'feed' in match.groupdict(): - event.add('feed', self.parameters.feed) - if not 'feed_url' in match.groupdict(): - event.add('feed_url', self.parameters.feed_url) - if not 'type' in match.groupdict(): - event.add('type', self.parameters.type) - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, - "observation_time") - event = utils.generate_reported_fields(event) - self.send_message(event) - self.logger.info("Processed %d event" % rowcount) - self.acknowledge_message() - -if __name__ == "__main__": - bot = GenericBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/hpfeeds/parser.py b/intelmq/bots/parsers/hpfeeds/parser.py deleted file mode 100644 index f4b044c33..000000000 --- a/intelmq/bots/parsers/hpfeeds/parser.py +++ /dev/null @@ -1,33 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils -import redis -import json - -class HPFeedsBot(Bot): - - def process(self): - report = self.receive_message() - self.logger.info(report) - if report: - - #m = json.loads(report) - m = report - - event = Event() - for k in m.keys(): - event.add(k, m.value(k)) - - event.add('feed', 'hpfeed') - event.add('feed_url', m.value("sensorname")) - - event = utils.generate_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - -if __name__ == "__main__": - bot = HPFeedsBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/experts/countrycodefilter/__init__.py b/intelmq/bots/parsers/hphosts/__init__.py similarity index 100% rename from intelmq/bots/experts/countrycodefilter/__init__.py rename to intelmq/bots/parsers/hphosts/__init__.py diff --git a/intelmq/bots/parsers/hphosts/parser.py b/intelmq/bots/parsers/hphosts/parser.py new file mode 100644 index 000000000..476f8875a --- /dev/null +++ b/intelmq/bots/parsers/hphosts/parser.py @@ -0,0 +1,53 @@ +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime +from intelmq.lib import utils + + +class HpHostsParser(Bot): + + def process(self): + report = self.receive_message() + + if not report.contains("raw"): + self.acknowledge_message() + + if len(report.value("raw").strip()) == 0: + self.acknowledge_message() + + raw_report = utils.base64_decode(report.value("raw")) + + for row in raw_report.split('\n'): + row = row.strip() + + if len(row) == 0 or row.startswith('#'): + continue + + row = row.replace('\r','') + values = row.split('\t') + + # if special char is in string should not be allowed + if "#" in values[1]: + continue + + # if domain name is localhost we are not interested + if values[1].lower().strip() == "localhost": + continue + + event = Event() + + time_observation = DateTime().generate_datetime_now() + event.add('classification.type', u'blacklist') + event.add('time.observation', time_observation, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add("raw", row, sanitize=True) + + event.add('source.fqdn', values[1], sanitize=True) + + self.send_message(event) + self.acknowledge_message() + +if __name__ == "__main__": + bot = HpHostsParser(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/parsers/malwaredomainlist/parser.py b/intelmq/bots/parsers/malwaredomainlist/parser.py index 780888e75..cf4bc8d0d 100644 --- a/intelmq/bots/parsers/malwaredomainlist/parser.py +++ b/intelmq/bots/parsers/malwaredomainlist/parser.py @@ -1,46 +1,51 @@ import unicodecsv from cStringIO import StringIO +from intelmq.lib import utils from intelmq.lib.bot import Bot, sys from intelmq.lib.message import Event -from intelmq.lib.utils import encode -from intelmq.bots import utils +from intelmq.lib.harmonization import DateTime class MalwareDomainListParserBot(Bot): def process(self): report = self.receive_message() - if report: - report = encode(report) + if not report.contains("raw"): + self.acknowledge_message() - columns = ["source_time", "source_url", "source_ip", "source_reverse_dns", "malware", "__IGNORE__", "source_asn"] + columns = [ + "time.source", + "source.url", + "source.ip", + "source.reverse_domain_name", + "description.text", + "__IGNORE__", + "source.asn" + ] - for row in unicodecsv.reader(StringIO(report), encoding='utf-8'): - event = Event() + raw_report = utils.base64_decode(report.value("raw")) + for row in unicodecsv.reader(StringIO(raw_report), encoding='utf-8'): + event = Event() - for key, value in zip(columns, row): - - if key is "__IGNORE__": - continue - - if key is "source_time": - value = value.replace('_',' ') - value += " UTC" - - if key is "malware": - value = value.lower() - - event.add(key, value) - - event.add('feed', 'malwaredomainslist') - event.add('feed_url', 'http://www.malwaredomainlist.com/updatescsv.php') - event.add('type', 'malware') # FIXME + for key, value in zip(columns, row): - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) + if key is "__IGNORE__": + continue + + if key is "time.source": + value = value.replace('_',' ') + value += " UTC" - self.send_message(event) + event.add(key, value, sanitize=True) + + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add('classification.type', u'malware') + event.add("raw", ",".join(row), sanitize=True) + + self.send_message(event) self.acknowledge_message() diff --git a/intelmq/bots/parsers/malwarepatrol/parser-dansguardian.py b/intelmq/bots/parsers/malwarepatrol/parser-dansguardian.py deleted file mode 100644 index 68fd195d1..000000000 --- a/intelmq/bots/parsers/malwarepatrol/parser-dansguardian.py +++ /dev/null @@ -1,36 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -class DansParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - for row in report.split('\n'): - row = row.strip() - - if len(row) == 0 or row.startswith('#'): - continue - - row = row.split() - event = Event() - - columns = ["source_url"] - for key, value in zip(columns, row): - event.add(key, value) - - event.add('feed', 'malwarepatrol-dansguardian') - event.add('type', 'malware') - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = DansParserBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/malwarepatrol/parser_dansguardian.py b/intelmq/bots/parsers/malwarepatrol/parser_dansguardian.py new file mode 100644 index 000000000..82c8c5a85 --- /dev/null +++ b/intelmq/bots/parsers/malwarepatrol/parser_dansguardian.py @@ -0,0 +1,42 @@ +from intelmq.lib import utils +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime + +class DansParserBot(Bot): + + def process(self): + report = self.receive_message() + + if not report.contains("raw"): + self.acknowledge_message() + + raw_report = utils.base64_decode(report.value("raw")) + + for row in raw_report.split('\n'): + row = row.strip() + + if len(row) == 0 or row.startswith('#'): + continue + + event = Event() + splitted_row = row.split() + + columns = ["source.url"] + for key, value in zip(columns, splitted_row): + event.add(key, value, sanitize=True) + + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add('classification.type', u'malware') + event.add("raw", row, sanitize=True) + + self.send_message(event) + self.acknowledge_message() + + +if __name__ == "__main__": + bot = DansParserBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/parsers/microsoft_dcu/__init__.py b/intelmq/bots/parsers/microsoft_dcu/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/bots/parsers/microsoft_dcu/lib.py b/intelmq/bots/parsers/microsoft_dcu/lib.py deleted file mode 100644 index 3fa30a2f1..000000000 --- a/intelmq/bots/parsers/microsoft_dcu/lib.py +++ /dev/null @@ -1,349 +0,0 @@ -from datetime import datetime, timedelta -from dateutil.tz import tzutc - -import re -import sys - -__HEADERS = ["SourcedFrom", "FileTimeUtc", - "Botnet", "SourceIp", - "SourcePort", "SourceIpAsnNr", - "TargetIp", "TargetPort", - "Payload", "SourceIpCountryCode", - "SourceIpRegion", "SourceIpCity", - "SourceIpPostalCode", "SourceIpLatitude", - "SourceIpLongitude", "SourceIpMetroCode", - "SourceIpAreaCode", "HttpRequest", - "HttpReferrer", "HttpUserAgent", - "HttpMethod", "HttpVersion", - "HttpHost", "Custom Field 1", - "Custom Field 2", "Custom Field 3", - "Custom Field 4", "Custom Field 5"] - - -__INTELMQ_TX = {"SourcedFrom": None, # IGNORE - "FileTimeUtc": "source_time", - "Botnet": None, # IS USED - "SourceIp": "source_ip", - "SourcePort": "source_port", - "SourceIpAsnNr": "source_asn", - "TargetIp": "destination_ip", - "TargetPort": "destination_port", - "Payload": None, # IGNORE - "SourceIpCountryCode": "source_cc", - "SourceIpRegion": None, # IGNORE - "SourceIpCity": "source_city", - "SourceIpPostalCode": None, # IGNORE - "SourceIpLatitude": "source_latitude", - "SourceIpLongitude": "source_longitude", - "SourceIpMetroCode": None, # IGNORE - "SourceIpAreaCode": None, # IGNORE - "HttpRequest": None, # IGNORE - "HttpReferrer": None, # IGNORE - "HttpUserAgent": "user_agent", - "HttpMethod": None, # IGNORE - "HttpVersion": None, # IGNORE - "HttpHost": None, # IGNORE - "Custom Field 1": None, # IGNORE - "Custom Field 2": None, # IGNORE - "Custom Field 3": None, # IGNORE - "Custom Field 4": None, # IGNORE - "Custom Field 5": None} # IGNORE - -# added with: -# cat ~/dcu-mapping.tsv | cut -f2,3 | sed 's/\t/\\t/g;s/$/\\n"/g;s/^/"/g' -__THREAT_CODES = dict(map(lambda x: x.split("\t"), ( - "B106-Agent\tmalware\n" - "B106-AgentBypass\tmalware\n" - "B106-Ainslot\tbackdoor\n" - "B106-Amighelo\tmalware\n" - "B106-Ardamax\tmalware\n" - "B106-Arhost\tmalware\n" - "B106-Assasin\tbackdoor\n" - "B106-AutInject\tbackdoor\n" - "B106-Autoac\tbackdoor\n" - "B106-Autorun\tmalware\n" - "B106-Balidor\tmalware\n" - "B106-Bancos\tbackdoor\n" - "B106-Bandok\tbackdoor\n" - "B106-Banker\tbackdoor\n" - "B106-Banload\tbackdoor\n" - "B106-Bariori\tbackdoor\n" - "B106-Beastdoor\tbackdoor\n" - "B106-Beaugrit\tbackdoor\n" - "B106-Bexelets\tbackdoor\n" - "B106-Bezigate\tbackdoor\n" - "B106-Bifrose\tbackdoor\n" - "B106-Binder\tvulnerable service\n" - "B106-Bisar\tbackdoor\n" - "B106-Bladabindi\tbackdoor\n" - "B106-Blohi\tbackdoor\n" - "B106-BrowserPassview\tvulnerable service\n" - "B106-Cakl\tbackdoor\n" - "B106-Cashback\tmalware\n" - "B106-CB\tmalware\n" - "B106-Ceatrg\tmalware\n" - "B106-Cechip\tbackdoor\n" - "B106-CeeInject\tmalware\n" - "B106-Chir\tmalware\n" - "B106-Chuchelo\tbackdoor\n" - "B106-Comame\tbackdoor\n" - "B106-Comisproc\tmalware\n" - "B106-Comitsproc\tmalware\n" - "B106-Comrerop\tbackdoor\n" - "B106-Comroki\tbackdoor\n" - "B106-Comsirig\tbackdoor\n" - "B106-Coolvidoor\tbackdoor\n" - "B106-Coremhead\tbackdoor\n" - "B106-Crime\tbackdoor\n" - "B106-Danglo\tmalware\n" - "B106-Darkddoser\tbackdoor\n" - "B106-Darkmoon\tbackdoor\n" - "B106-Decay\tbackdoor\n" - "B106-Defsel\tbackdoor\n" - "B106-Delf\tbackdoor\n" - "B106-DelfInject\tmalware\n" - "B106-Delfsnif\tbackdoor\n" - "B106-Dimegup\tbackdoor\n" - "B106-Dokstormac\tbackdoor\n" - "B106-Dooxud\tmalware\n" - "B106-Dorkbot\tmalware\n" - "B106-Dusvext\tmalware\n" - "B106-Dynamer\tbackdoor\n" - "B106-Effbee\tbackdoor\n" - "B106-EyeStye\tbackdoor\n" - "B106-Fareit\tmalware\n" - "B106-Farfli\tbackdoor\n" - "B106-Folyris\tmalware\n" - "B106-Frosparf\tbackdoor\n" - "B106-Fynloski\tmalware\n" - "B106-Gamarue\tmalware\n" - "B106-Gaobot\tbotnet drone\n" - "B106-Geratid\tbotnet drone\n" - "B106-Gernidru\tbackdoor\n" - "B106-Geycript\tmalware\n" - "B106-Gratem\tbackdoor\n" - "B106-GSpot\tbackdoor\n" - "B106-Habbo\tbackdoor\n" - "B106-Hamweq\tbotnet drone\n" - "B106-Hanictik\tbackdoor\n" - "B106-Hiderun\tmalware\n" - "B106-HistBoader\tbackdoor\n" - "B106-Horsamaz\tbackdoor\n" - "B106-Hoygunver\tbackdoor\n" - "B106-Hupigon\tbackdoor\n" - "B106-Inject\tbackdoor\n" - "B106-Injector\tbackdoor\n" - "B106-IRCbot\tbackdoor\n" - "B106-Ircbrute\tbackdoor\n" - "B106-Itsproc\tbackdoor\n" - "B106-Jenxcus\tbackdoor\n" - "B106-Keygen\tvulnerable service\n" - "B106-Keylogger\tbackdoor\n" - "B106-Kilim\tbackdoor\n" - "B106-KKmaka\tmalware\n" - "B106-Klovbot\tbackdoor\n" - "B106-Knowlog\tmalware\n" - "B106-Ldpinch\tbackdoor\n" - "B106-Lenc\tbackdoor\n" - "B106-Leodon\tbackdoor\n" - "B106-Levitiang\tmalware\n" - "B106-Lybsus\tbackdoor\n" - "B106-Lypsacop\tbackdoor\n" - "B106-Mafod\tbackdoor\n" - "B106-Malagent\tbackdoor\n" - "B106-Malex\tbackdoor\n" - "B106-Meredrop\tbackdoor\n" - "B106-Mielit\tbackdoor\n" - "B106-Misbot\tbackdoor\n" - "B106-Mobibez\tbackdoor\n" - "B106-Mosripe\tmalware\n" - "B106-Mosucker\tbackdoor\n" - "B106-Msposer\tbackdoor\n" - "B106-MULTI\tmalware\n" - "B106-Napolar\tmalware\n" - "B106-Naprat\tbackdoor\n" - "B106-Nayrabot\tmalware\n" - "B106-Necast\tbackdoor\n" - "B106-Neeris\tmalware\n" - "B106-Nemim\tbackdoor\n" - "B106-Neop\tbackdoor\n" - "B106-Neshta\tmalware\n" - "B106-Netbot\tbotnet drone\n" - "B106-NetWiredRC\tbackdoor\n" - "B106-Neurevt\tbackdoor\n" - "B106-Nitol\tbackdoor\n" - "B106-Noancooe\tbackdoor\n" - "B106-Nosrawec\tbackdoor\n" - "B106-Nuqel\tmalware\n" - "B106-Nusump\tbackdoor\n" - "B106-Obfuscator\tmalware\n" - "B106-Otran\tbackdoor\n" - "B106-Parama\tbackdoor\n" - "B106-Parite\tmalware\n" - "B106-PcClient\tbackdoor\n" - "B106-Pdfjsc\tmalware\n" - "B106-Poison\tbackdoor\n" - "B106-Poisonivy\tbackdoor\n" - "B106-Pontoeb\tbackdoor\n" - "B106-Popiidor\tbackdoor\n" - "B106-PossibleMalware\tmalware\n" - "B106-Prorat\tbackdoor\n" - "B106-Prosti\tbackdoor\n" - "B106-Protos\tbackdoor\n" - "B106-Pushbot\tbackdoor\n" - "B106-Ramnit\tbackdoor\n" - "B106-Ranos\tbackdoor\n" - "B106-Rbot\tbackdoor\n" - "B106-Rebhip\tmalware\n" - "B106-Remhead\tbackdoor\n" - "B106-Rimod\tbackdoor\n" - "B106-Ritros\tmalware\n" - "B106-Runner\tbackdoor\n" - "B106-Sality\tmalware\n" - "B106-Scar\tmalware\n" - "B106-Sdbot\tbackdoor\n" - "B106-Sharke\tbackdoor\n" - "B106-Silby\tbackdoor\n" - "B106-SillyShareCopy\tmalware\n" - "B106-Sisproc\tbackdoor\n" - "B106-Sisron\tbackdoor\n" - "B106-Skypams\tbackdoor\n" - "B106-Small\tmalware\n" - "B106-Smpdoss\tmalware\n" - "B106-Sormoeck\tbackdoor\n" - "B106-Splori\tbackdoor\n" - "B106-Spybot\tbackdoor\n" - "B106-Squida\tmalware\n" - "B106-SSonce\tbackdoor\n" - "B106-Sulunch\tmalware\n" - "B106-Swisyn\tbackdoor\n" - "B106-Swrort\tbackdoor\n" - "B106-SynFlood\tbackdoor\n" - "B106-Tapazom\tbackdoor\n" - "B106-Tawsebot\tmalware\n" - "B106-Tearspear\tbackdoor\n" - "B106-Tendrit\tmalware\n" - "B106-Tenpeq\tbackdoor\n" - "B106-Tobfy\transomware\n" - "B106-Tocofob\tbackdoor\n" - "B106-Toobtox\tbackdoor\n" - "B106-Tumpadex\tmalware\n" - "B106-Turkojan\tbackdoor\n" - "B106-Twores\tbackdoor\n" - "B106-Vahodon\tvulnerable service\n" - "B106-Vake\tbackdoor\n" - "B106-VB\tbackdoor\n" - "B106-Vbcrypt\tmalware\n" - "B106-Vbinder\tmalware\n" - "B106-VBInject\tmalware\n" - "B106-Vburses\tbackdoor\n" - "B106-Vharke\tbackdoor\n" - "B106-Vinject\tbackdoor\n" - "B106-Virut\tbackdoor\n" - "B106-Vobfus\tmalware\n" - "B106-Vonriamt\tmalware\n" - "B106-Vtub\tmalware\n" - "B106-Weenkay\tbackdoor\n" - "B106-Wervik\tmalware\n" - "B106-Xtrat\tbackdoor\n" - "B106-Xyligan\tbackdoor\n" - "B106-Yemrok\tbackdoor\n" - "B106-Zacusca\tbackdoor\n" - "B106-Zbot\tbackdoor\n" - "B106-Zegost\tbackdoor\n" - "B58-CODE1\tbotnet drone\n" - "B58-DGA1\tbotnet drone\n" - "B58-NOTC\tbotnet drone\n" - "B58\tmalware\n" - "B58-CHTOZ\tmalware\n" - "B58-CRSH\tmalware\n" - "B58-DGA2\tmalware\n" - "B58-DNS\tmalware\n" - "B58-XPLT\tmalware configuration\n" - "B93-CONFIG\tmalware configuration\n" - "B93-HD\tmalware configuration\n" - "B93-PLUGINS\tmalware configuration\n" - "B54-BASE\tblacklist\n" - "B54-CONFIG\tmalware configuration\n" - "B54-CONFIG\tmalware configuration\n" - "B54-INIT\tmalware\n" - "B54-OLD\tblacklist\n" - "B54-CODE 1\tblacklist\n" - "B54-CODE 1\tblacklist\n" - "B54-DNS\tblacklist\n" - "Conficker\tmalware\n" - "B157-N3\tmalware\n" - "B157-O3\tmalwarew\n" - "B157-R0\tmalware\n" - "B157-R1\tmalware\n" - "B157-RL\tc&c\n" - "B157\tbackdoor\n" - "B157-DGA\tbackdoor\n" - "Rustock\tbotnet drone\n" - "B68-1-32\tbotnet drone\n" - "B68-1-64\tbotnet drone\n" - "B68-2-32\tbotnet drone\n" - "B68-2-64\tbotnet drone\n" - "B68-DNS\tblacklist\n" - "B68-TCP\tblacklist\n" - "B68\tbotnet drone\n" - "B68-2-64\tbotnet drone\n" - "Waledac\tbotnet drone\n" - "zbot\tmalware\n" - "B75-S1\tbotnet drone\n" - "B75-S12\tbotnet drone\n" - "B75-S2\tbotnet drone" -).split("\n"))) - - -class ParsingError(Exception): - pass - -def convert_windows_timestamp(windows_ts): - """Convert a windows file utc timestamp to a datetime object""" - us = int(windows_ts)/10 - dt = datetime(1601, 1, 1, tzinfo=tzutc()) + timedelta(microseconds=us) - return dt - - -def dcu_headers(): - """Preferred way of retrieving the dcu format""" - return __HEADERS - - -def convert_threatcode_to_type(threat_code): - """Converts threat code to IntelMQ type""" - return __THREAT_CODES.get(threat_code, "unknown") - - -def convert_dcu_fields(fields): - """Converts the given dict of microsoft dcu fields to new fields""" - converted_fields = [] - for key, value in fields.items(): - converted_key = __INTELMQ_TX.get(key) - converted_value = value - - if key and value: - try: - if key == "Botnet": - converted_value = convert_threatcode_to_type(value) - converted_key = "type" - - if key == "SourceIpAsnNr": - converted_value = re.match("([Aa][Ss])?([0-9]+)", value).group(2) - - if key == "FileTimeUtc": - converted_value = str(convert_windows_timestamp(value)) - - if converted_key and converted_value: - converted_fields.append((converted_key, converted_value)) - - if key == "Botnet": - # FIXME: what do we do with the dcu threat code? At the moment just - # writing it into malware field, so maybe an expert can do something with that - # Another mapping table? Also it isn't conforming to the ontology... - converted_fields.append(("malware", value)) - except: - raise ParsingError("key '%s' and value '%s' made a problem" % (key, value)), None, sys.exc_info()[2] - - return dict(converted_fields) diff --git a/intelmq/bots/parsers/microsoft_dcu/parser.py b/intelmq/bots/parsers/microsoft_dcu/parser.py deleted file mode 100644 index 8b277872f..000000000 --- a/intelmq/bots/parsers/microsoft_dcu/parser.py +++ /dev/null @@ -1,41 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -import lib - - -class DCUParserBot(Bot): - """ Parses DCU-Collector output. """ - - def process(self): - report = self.receive_message() - - if report: - report = report.strip() - headers = lib.dcu_headers() - - rows = report.split("\n") - - for row in rows: - try: - columns = row.strip().split("\t") - fields = dict(zip(headers, columns)) - - event = Event(lib.convert_dcu_fields(fields)) - event.add("feed", "microsoft-dcu") - - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - except lib.ParsingError as exc: - msg = "Got a parsing problem: %s affected row '%s' IGNORING AND CONTINUING" % (exc.message, row.strip()) - self.logger.warning(msg, exc_info=True) - continue - self.acknowledge_message() - - -if __name__ == "__main__": - bot = DCUParserBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/openbl/parser.py b/intelmq/bots/parsers/openbl/parser.py index a1d4cec6f..9342a3b4b 100644 --- a/intelmq/bots/parsers/openbl/parser.py +++ b/intelmq/bots/parsers/openbl/parser.py @@ -1,42 +1,45 @@ from datetime import datetime +from intelmq.lib import utils from intelmq.lib.bot import Bot, sys from intelmq.lib.message import Event -from intelmq.bots import utils +from intelmq.lib.harmonization import DateTime + class OpenBLParserBot(Bot): def process(self): report = self.receive_message() - if report: - for row in report.split('\n'): - - row = row.strip() - - if len(row) == 0 or row.startswith('#'): - continue - - row = row.split() - event = Event() + if not report.contains("raw"): + self.acknowledge_message() - columns = ["source_ip", "source_time"] + raw_report = utils.base64_decode(report.value("raw")) + for row in raw_report.split('\n'): - for key, value in zip(columns, row): - if key == "source_time": - value = datetime.utcfromtimestamp(int(value)).strftime('%Y-%m-%d %H:%M:%S') + " UTC" - - event.add(key, value.strip()) - - event.add('feed', 'openbl') - event.add('feed_url', 'http://www.openbl.org/lists/date_all.txt') - event.add('type', 'blacklist') + row = row.strip() + + if len(row) == 0 or row.startswith('#'): + continue + + splitted_row = row.split() + event = Event() + + columns = ["source.ip", "time.source"] + + for key, value in zip(columns, splitted_row): + if key == "time.source": + value = datetime.utcfromtimestamp(int(value)).strftime('%Y-%m-%d %H:%M:%S') + " UTC" - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) + event.add(key, value.strip(), sanitize=True) - self.send_message(event) + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add('classification.type', u'blacklist') + event.add("raw", row, sanitize=True) + self.send_message(event) self.acknowledge_message() diff --git a/intelmq/bots/parsers/phishtank/parser.py b/intelmq/bots/parsers/phishtank/parser.py index 1b692d68b..ebf6df477 100644 --- a/intelmq/bots/parsers/phishtank/parser.py +++ b/intelmq/bots/parsers/phishtank/parser.py @@ -1,46 +1,54 @@ import unicodecsv from cStringIO import StringIO +from intelmq.lib import utils from intelmq.lib.bot import Bot, sys from intelmq.lib.message import Event -from intelmq.lib.utils import encode -from intelmq.bots import utils +from intelmq.lib.harmonization import DateTime class PhishTankParserBot(Bot): def process(self): report = self.receive_message() + + if not report.contains("raw"): + self.acknowledge_message() + + raw_report = utils.base64_decode(report.value("raw")) + + columns = [ + "__IGNORE__", + "source.url", + "description.url", + "time.source", + "__IGNORE__", + "__IGNORE__", + "__IGNORE__", + "description.target" + ] - if report: - event = Event() - report = encode(report) + for row in unicodecsv.reader(StringIO(raw_report), encoding='utf-8'): - # colums according to https://www.phishtank.com/developer_info.php as of 2015/04/30: - # phish_id,url,phish_detail_url,submission_time,verified,verification_time,online,target - # example: - # 123456,http://www.example.com/,http://www.phishtank.com/phish_detail.php?phish_id=123456,2009-06-19T15:15:47+00:00,yes,2009-06-19T15:37:31+00:00,yes,1st National Example Bank - columns = ["__IGNORE__", "source_url", "description_url", "source_time", "__IGNORE__", "__IGNORE__", "__IGNORE__", "target"] + # ignore headers + if "phish_id" in row: + continue - for row in unicodecsv.reader(StringIO(report), encoding='utf-8'): - - if "phish_id" in row: - continue # skip header - - for key, value in zip(columns, row): + event = Event() + + for key, value in zip(columns, row): - if key == "__IGNORE__": - continue - - event.add(key, value.strip()) + if key == "__IGNORE__": + continue - event.add('feed', 'phishtank') - event.add('type', 'phishing') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - + event.add(key, value, sanitize=True) + + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add('classification.type', u'phishing') + event.add("raw", ",".join(row), sanitize=True) + + self.send_message(event) self.acknowledge_message() diff --git a/intelmq/bots/parsers/shadowserver/__init__.py b/intelmq/bots/parsers/shadowserver/__init__.py deleted file mode 100755 index e69de29bb..000000000 diff --git a/intelmq/bots/parsers/shadowserver/chargen-parser.py b/intelmq/bots/parsers/shadowserver/chargen-parser.py deleted file mode 100755 index 0742c3ea5..000000000 --- a/intelmq/bots/parsers/shadowserver/chargen-parser.py +++ /dev/null @@ -1,66 +0,0 @@ -import csv -import StringIO -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -class ShadowServerChargenParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - report = report.strip() - - columns = { - "timestamp": "source_time", - "ip": "source_ip", - "protocol" : "transport_protocol", - "port" : "source_port", - "hostname": "source_reverse_dns", - "tag" : "__IGNORE__", - "quote" : "__IGNORE__", - "asn": "source_asn", - "geo": "source_cc", - "region" : "source_region", - "city" : "source_city" - } - - rows = csv.DictReader(StringIO.StringIO(report)) - - for row in rows: - event = Event() - - for key, value in row.items(): - - key = columns[key] - - if not value: - continue - - value = value.strip() - - if key is "__IGNORE__" or key is "__TDB__": - continue - - # set timezone explicitly to UTC as it is absent in the input - if key == "source_time": - value += " UTC" - - event.add(key, value) - - event.add('feed', 'shadowserver-chargen') - event.add('type', 'vulnerable service') - event.add('application_protocol', 'chargen') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = ShadowServerChargenParserBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/shadowserver/drone-parser.py b/intelmq/bots/parsers/shadowserver/drone-parser.py deleted file mode 100755 index 049b90bd1..000000000 --- a/intelmq/bots/parsers/shadowserver/drone-parser.py +++ /dev/null @@ -1,81 +0,0 @@ -import csv -import StringIO -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -class ShadowServerDroneParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - report = report.strip() - - columns = { - "timestamp": "source_time", - "ip": "source_ip", - "port": "source_port", - "asn": "source_asn", - "geo": "source_cc", - "region": "source_region", - "city": "source_city", - "hostname": "source_reverse_dns", - "type": "__IGNORE__", - "infection": "malware", - "url": "__TBD__", - "agent": "__TBD__", - "cc": "destination_ip", - "cc_port": "destination_port", - "cc_asn": "destination_asn", - "cc_geo": "destination_cc", - "cc_dns": "destination_reverse_dns", - "count": "__TBD__", - "proxy": "__TBD__", - "application": "__TBD__", - "p0f_genre": "__TBD__", - "p0f_detail": "__TBD__", - "machine_name": "__TBD__", - "id": "__TBD__" - } - - rows = csv.DictReader(StringIO.StringIO(report)) - - for row in rows: - event = Event() - - for key, value in row.items(): - - key = columns[key] - - if not value: - continue - - value = value.strip() - - if key is "__IGNORE__" or key is "__TBD__": - continue - - if key is "malware": - value = value.strip().lower() - - # set timezone explicitly to UTC as it is absent in the input - if key == "source_time": - value += " UTC" - - event.add(key, value) - - event.add('feed', 'shadowserver-drone') - event.add('type', 'botnet drone') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = ShadowServerDroneParserBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/shadowserver/microsoft-sinkhole.py b/intelmq/bots/parsers/shadowserver/microsoft-sinkhole.py deleted file mode 100755 index 9ee74847c..000000000 --- a/intelmq/bots/parsers/shadowserver/microsoft-sinkhole.py +++ /dev/null @@ -1,76 +0,0 @@ -import csv -import StringIO -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -class ShadowServerMicrosoftSinkholeParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - report = report.strip() - - columns = { - "timestamp": "source_time", - "ip": "source_ip", - "asn": "source_asn", - "geo": "source_cc", - "url": "__TBD__", - "type": "__IGNORE__", - "http_agent": "__TBD__", - "tor": "__TBD__", - "src_port": "source_port", - "p0f_genre": "__TBD__", - "p0f_detail": "__TBD__", - "hostname": "source_reverse_dns", - "dst_port": "destination_port", - "http_host": "__TBD__", - "http_referer": "__TBD__", - "http_referer_asn": "__TBD__", - "http_referer_ip": "__TBD__", - "http_referer_geo": "__TBD__", - "dst_ip": "destination_ip", - "dst_asn": "destination_asn", - "dst_geo": "destination_cc" - } - - rows = csv.DictReader(StringIO.StringIO(report)) - - for row in rows: - event = Event() - - for key, value in row.items(): - - key = columns[key] - - if not value: - continue - - value = value.strip() - - if key is "__IGNORE__" or key is "__TBD__": - continue - - # set timezone explicitly to UTC as it is absent in the input - if key == "source_time": - value += " UTC" - - event.add(key, value) - - event.add('feed', 'shadowserver-microsoft-sinkhole') - event.add('type', 'botnet drone') - event.add('application_protocol', 'http') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = ShadowServerMicrosoftSinkholeParserBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/shadowserver/qotd-parser.py b/intelmq/bots/parsers/shadowserver/qotd-parser.py deleted file mode 100755 index 093b0ba0c..000000000 --- a/intelmq/bots/parsers/shadowserver/qotd-parser.py +++ /dev/null @@ -1,66 +0,0 @@ -import csv -import StringIO -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -class ShadowServerQotdParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - report = report.strip() - - columns = { - "timestamp": "source_time", - "ip": "source_ip", - "protocol" : "transport_protocol", - "port" : "source_port", - "hostname": "source_reverse_dns", - "tag" : "__IGNORE__", - "quote" : "__IGNORE__", - "asn": "source_asn", - "geo": "source_cc", - "region" : "source_region", - "city" : "source_city" - } - - rows = csv.DictReader(StringIO.StringIO(report)) - - for row in rows: - event = Event() - - for key, value in row.items(): - - key = columns[key] - - if not value: - continue - - value = value.strip() - - if key is "__IGNORE__" or key is "__TDB__": - continue - - # set timezone explicitly to UTC as it is absent in the input - if key == "source_time": - value += " UTC" - - event.add(key, value) - - event.add('feed', 'shadowserver-qotd') - event.add('type', 'vulnerable service') - event.add('application_protocol', 'qotd') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = ShadowServerQotdParserBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/shadowserver/snmp-parser.py b/intelmq/bots/parsers/shadowserver/snmp-parser.py deleted file mode 100644 index 3400e9584..000000000 --- a/intelmq/bots/parsers/shadowserver/snmp-parser.py +++ /dev/null @@ -1,67 +0,0 @@ -import csv -import StringIO -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -class ShadowServerSNMPParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - report = report.strip() - - columns = { - "timestamp": "source_time", - "ip": "source_ip", - "protocol" : "transport_protocol", - "port" : "source_port", - "hostname": "source_reverse_dns", - "sysdesc" : "__TDB__", - "sysname" : "__TDB__", - "asn": "source_asn", - "geo": "source_cc", - "region" : "source_region", - "city" : "source_city", - "version" : "__IGNORE__" - } - - rows = csv.DictReader(StringIO.StringIO(report)) - - for row in rows: - event = Event() - - for key, value in row.items(): - - key = columns[key] - - if not value: - continue - - value = value.strip() - - if key is "__IGNORE__" or key is "__TDB__": - continue - - # set timezone explicitly to UTC as it is absent in the input - if key == "source_time": - value += " UTC" - - event.add(key, value) - - event.add('feed', 'shadowserver-snmp') - event.add('type', 'vulnerable service') - event.add('application_protocol', 'snmp') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = ShadowServerSNMPParserBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/generic/__init__.py b/intelmq/bots/parsers/taichung_city_netflow/__init__.py similarity index 100% rename from intelmq/bots/parsers/generic/__init__.py rename to intelmq/bots/parsers/taichung_city_netflow/__init__.py diff --git a/intelmq/bots/parsers/taichung_city_netflow/parser.py b/intelmq/bots/parsers/taichung_city_netflow/parser.py new file mode 100644 index 000000000..765dbc6bf --- /dev/null +++ b/intelmq/bots/parsers/taichung_city_netflow/parser.py @@ -0,0 +1,70 @@ +import re +from intelmq.lib import utils +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime + + +CLASSIFICATION = { + "brute-force": [u"brute-force", u"brute force", u"mysql"], + "c&c": [u"c&c server"], + "botnet drone": [u"irc-botnet"], + "malware": [u"malware provider", u"malware website", u'\u60e1\u610f', u"worm"], + "scanner": [u"scan"], + "exploit": [u"bash", u"php-cgi", u"phpmyadmin"], + } + + +class TaichungCityNetflowParserBot(Bot): + + + def get_type(self, value): + value = value.lower() + for event_type, keywords in CLASSIFICATION.iteritems(): + for keyword in keywords: + if keyword in value: + return event_type + return "unknown" + + + def process(self): + report = self.receive_message() + + if not report.contains("raw"): + self.acknowledge_message() + + raw_report = utils.base64_decode(report.value("raw")) + for row in raw_report.split(''): + + # Get IP and Type + info1 = re.search(">[\ ]*(\d+\.\d+\.\d+\.\d+)[\ ]*<.*([^<]+)", row) + + if not info1: + continue + + # Get Timestamp + info2 = re.search("[\ ]*(\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2})[\ ]*", row) + + event = Event() + + description = info1.group(2) + description = utils.decode(description) + event_type = self.get_type(description) + time_observation = DateTime().generate_datetime_now() + time_source = info2.group(1) + " UTC-8" + + event.add("time.source", time_source, sanitize=True) + event.add('time.observation', time_observation, sanitize=True) + event.add("source.ip", info1.group(1), sanitize=True) + event.add('classification.type', event_type, sanitize=True) + event.add('description.text', description, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add("raw", row, sanitize=True) + + self.send_message(event) + self.acknowledge_message() + +if __name__ == "__main__": + bot = TaichungCityNetflowParserBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/parsers/taichungcitynetflow/__init__.py b/intelmq/bots/parsers/taichungcitynetflow/__init__.py deleted file mode 100644 index 8d1c8b69c..000000000 --- a/intelmq/bots/parsers/taichungcitynetflow/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/intelmq/bots/parsers/taichungcitynetflow/parser.py b/intelmq/bots/parsers/taichungcitynetflow/parser.py deleted file mode 100644 index 16aa4a098..000000000 --- a/intelmq/bots/parsers/taichungcitynetflow/parser.py +++ /dev/null @@ -1,57 +0,0 @@ -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils -import re - -KEYWORDS = { - "brute-force": ["brute-force", "brute force", "mysql"], - "c&c": ["c&c server"], - "botnet drone": ["irc-botnet"], - "malware": ["malware provider", "malware website", u'\u60e1\u610f', "worm"], - "scanner": ["scan"], - "exploit": ["bash", "php-cgi", "phpmyadmin"], - } - -class TaichungCityNetflowParserBot(Bot): - - def get_type(self, value): - value = value.lower() - for event_type, keywords in KEYWORDS.iteritems(): - for keyword in keywords: - if unicode(keyword) in value: - return event_type - return "unknown" - - def process(self): - report = self.receive_message() - - for row in report.split(''): - - # Get IP and Type - info1 = re.search(">[\ ]*(\d+\.\d+\.\d+\.\d+)[\ ]*<.*([^<]+)", row) - - # Get Timestamp - info2 = re.search("[\ ]*(\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2})[\ ]*", row) - - if info1: - event = Event() - - event.add("source_ip", info1.group(1)) - description = info1.group(2) - event_type = self.get_type(description) - event.add('type', event_type) - event.add('description', description) - event.add("source_time", info2.group(1) + " UTC-8") - event.add('feed', 'taichungcitynetflow') - event.add('feed_url', 'https://tc.edu.tw/net/netflow/lkout/recent/30') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - -if __name__ == "__main__": - bot = TaichungCityNetflowParserBot(sys.argv[1]) - bot.start() \ No newline at end of file diff --git a/intelmq/bots/parsers/torexitnode b/intelmq/bots/parsers/torexitnode deleted file mode 100644 index 997456d02..000000000 --- a/intelmq/bots/parsers/torexitnode +++ /dev/null @@ -1,42 +0,0 @@ -""" -Tor Exit Node parser . - -Maintainer: Krystian Kochanowski -""" - -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -class TorexitnodeParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - for row in report.split('\n'): - row = row.strip() - - if len(row) == 0: - continue - - row = row.split() - event = Event() - - columns = ["source_ip"] - for key, value in zip(columns, row): - event.add(key, value) - - event.add('feed', 'torexitnode') - event.add('feed_url', 'https://torstatus.blutmagie.de/ip_list_exit.php/Tor_ip_list_EXIT.csv') - - event = utils.parse_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - -if __name__ == "__main__": - bot = TorexitnodeParserBot(sys.argv[1]) - bot.start() diff --git a/intelmq/bots/parsers/vxvault/parser.py b/intelmq/bots/parsers/vxvault/parser.py index d230cbbbd..1164ec399 100644 --- a/intelmq/bots/parsers/vxvault/parser.py +++ b/intelmq/bots/parsers/vxvault/parser.py @@ -1,47 +1,51 @@ -import urlparse -from intelmq.lib.bot import Bot, sys -from intelmq.lib.message import Event -from intelmq.bots import utils - -class VXVaultParserBot(Bot): - - def process(self): - report = self.receive_message() - - if report: - for row in report.split('\n'): - row = row.strip() - - if len(row) == 0 or not row.startswith('http'): - continue - - url_object = urlparse.urlparse(row) - - if not url_object: - continue - - url = url_object.geturl() - hostname = url_object.hostname - port = url_object.port - - event = Event() - event.add("source_url", url) - event.add("source_domain_name", hostname) - if port: - event.add("source_port", str(port)) - - event.add('feed', 'vxvault') - event.add('feed_url', 'http://vxvault.siri-urz.net/URL_List.php') - event.add('type', 'malware') - - event = utils.generate_source_time(event, "source_time") - event = utils.generate_observation_time(event, "observation_time") - event = utils.generate_reported_fields(event) - - self.send_message(event) - self.acknowledge_message() - - -if __name__ == "__main__": - bot = VXVaultParserBot(sys.argv[1]) - bot.start() +import urlparse +from intelmq.lib import utils +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime + + +class VXVaultParserBot(Bot): + + def process(self): + report = self.receive_message() + + if not report.contains("raw"): + self.acknowledge_message() + + raw_report = utils.base64_decode(report.value("raw")) + for row in raw_report.split('\n'): + + row = row.strip() + + if len(row) == 0 or not row.startswith('http'): + continue + + url_object = urlparse.urlparse(row) + + if not url_object: + continue + + url = url_object.geturl() + hostname = url_object.hostname + port = url_object.port + + event = Event() + + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add('feed.name', report.value("feed.name")) + event.add('feed.url', report.value("feed.url")) + event.add('classification.type', u'malware') + event.add("source.url", url, sanitize=True) + event.add("source.domain_name", hostname, sanitize=True) + event.add("source.port", str(port), sanitize=True) + event.add("raw", row, sanitize=True) + + self.send_message(event) + self.acknowledge_message() + + +if __name__ == "__main__": + bot = VXVaultParserBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/utils.py b/intelmq/bots/utils.py deleted file mode 100644 index 9ef0e6c45..000000000 --- a/intelmq/bots/utils.py +++ /dev/null @@ -1,176 +0,0 @@ -import re -import dns -import pytz -import socket -import binascii -import datetime -import dateutil.parser -import dateutil.tz -from urlparse import urlparse - - -def get_domain_name_from_url(url): - res = urlparse(url) - if res.netloc != "" and not is_ip(res.netloc): - return res.netloc - return None - - -def get_ip_from_url(url): - res = urlparse(url) - if res.netloc != "": - return get_ip_from_domain_name(res.netloc) - return None - - -def get_ip_from_domain_name(domain_name): - try: - socket.setdefaulttimeout(0.5) - return socket.gethostbyname(domain_name) - except: - return None - - -def get_reverse_ip(ip): - result = str(dns.reversename.from_address(ip)) - reverse = result.split('.in-addr.arpa.') - if not reverse: - reverse = result.split('.ip6.arpa.') - return reverse[0] - - -def is_url(url): - - if not "/" in url: - return None - - if "hxxp://" in url: - url = url.replace('hxxp://','http://') - - if "hxxps://" in url: - url = url.replace('hxxps://','https://') - - res = urlparse(url) - if res.netloc != "": - return url - - res = urlparse("http://" + url) - if res.netloc != "": - return "http://" + url - - return None - - -def is_domain_name(domain_name): - - if "/" in domain_name or is_ip(domain_name): - return None - - res = urlparse(domain_name) - if res.netloc != "": - return domain_name - - res = urlparse("http://" + domain_name) - if res.netloc != "": - return domain_name - - return None - - -def is_ip(ip): - if is_ipv4(ip): - return ip - if is_ipv6(ip): - return ip - return None - - -def is_ipv4(ip): - try: - socket.inet_pton(socket.AF_INET, ip) - return ip - except socket.error: - return None - - -def is_ipv6(ip): - try: - socket.inet_pton(socket.AF_INET6, ip) - return ip - except socket.error: - return None - - -def ip_to_int(ip): - try: - ip_integer = socket.inet_pton(socket.AF_INET, ip) - except socket.error: - try: - ip_integer = socket.inet_pton(socket.AF_INET6, ip) - except socket.error: - return None - - ip_integer = int(binascii.hexlify(ip_integer), 16) - return ip_integer - - -def parse_source_time(event, key): - if not event.contains(key): - return generate_source_time(event, key) - - value = event.value(key) - event.discard(key, value) - - new_value = dateutil.parser.parse(value) - new_value = new_value.astimezone(pytz.utc) - new_value = new_value.isoformat() - event.add(key, new_value) - return event - - -def generate_source_time(event, key): - value = datetime.datetime.utcnow() - tz = pytz.timezone('UTC') - value = value.replace(hour=0,minute=0,second=0,microsecond=0, tzinfo=tz) - value = value.isoformat() - event.add(key, value) - return event - - -def generate_observation_time(event, key): - value = datetime.datetime.utcnow() - value = value.replace(microsecond=0) - value = value.isoformat() - event.add(key, value) - return event - - -def generate_reported_fields(event): - - keys_pairs = [ - ( - "source_ip", - "source_domain_name", - "source_url", - "source_email_address", - "source_asn", - "source_cc" - ), - ( - "destination_ip", - "destination_domain_name", - "destination_url", - "destination_email_address", - "destination_asn", - "destination_cc" - ) - ] - - for keys in keys_pairs: - for key in keys: - if event.contains(key): - value = event.value(key) - reported_key = "reported_%s" % key - event.add(reported_key, value) - - return event diff --git a/intelmq/conf/defaults.conf b/intelmq/conf/defaults.conf new file mode 100644 index 000000000..922f024bc --- /dev/null +++ b/intelmq/conf/defaults.conf @@ -0,0 +1,17 @@ +{ + "broker": "redis", + "error_procedure": "retry", + "error_max_retries": 3, + "error_log_message": true, + "error_log_exception": true, + "error_dump_message": true, + "error_retry_delay": 15, + "rate_limit": 0, + "load_balance": true, + "source_pipeline_host": "127.0.0.1", + "source_pipeline_port": "6379", + "source_pipeline_db": "2", + "destination_pipeline_host": "127.0.0.1", + "destination_pipeline_port": "6379", + "destination_pipeline_db": "2" +} diff --git a/intelmq/conf/harmonization.conf b/intelmq/conf/harmonization.conf new file mode 100644 index 000000000..4b53b1826 --- /dev/null +++ b/intelmq/conf/harmonization.conf @@ -0,0 +1,130 @@ +{ + "report": { + "raw": { + "type": "Base64", + "description": "test ..." + }, + "feed.name": { + "type": "String", + "description": "test ..." + }, + "feed.url": { + "type": "URL", + "description": "test ..." + } + }, + "event": { + "raw": { + "type": "Base64", + "description": "test ..." + }, + "feed.name": { + "type": "FeedName", + "description": "test ..." + }, + "feed.url": { + "type": "URL", + "description": "test ..." + }, + "time.source": { + "type": "DateTime", + "description": "test ..." + }, + "time.observation": { + "type": "DateTime", + "description": "test ..." + }, + "classification.type": { + "type": "ClassificationType", + "description": "test ..." + }, + "classification.taxonomy": { + "type": "String", + "description": "Enisa threat taxonomy" + }, + "source.ip": { + "type": "IPAddress", + "description": "test ..." + }, + "source.fqdn": { + "type": "DomainName", + "description": "test ..." + }, + "source.reverse_domain_name": { + "type": "DomainName", + "description": "test ..." + }, + "source.url": { + "type": "URL", + "description": "test ..." + }, + "source.port": { + "type": "String", + "description": "test ..." + }, + "source.asn": { + "type": "String", + "description": "test ..." + }, + "source.bgp_prefix": { + "type": "String", + "description": "test ..." + }, + "source.registry": { + "type": "String", + "description": "test ..." + }, + "source.allocated": { + "type": "String", + "description": "test ..." + }, + "source.as_name": { + "type": "String", + "description": "test ..." + }, + "source.geolocation.cc": { + "type": "String", + "description": "test ..." + }, + "source.geolocation.city": { + "type": "String", + "description": "test ..." + }, + "source.geolocation.latitude": { + "type": "String", + "description": "test ..." + }, + "source.geolocation.longitude": { + "type": "String", + "description": "test ..." + }, + "source.abuse_contact": { + "type": "String", + "description": "test ..." + }, + "protocol.application": { + "type": "String", + "description": "test ..." + }, + "malware.name": { + "type": "MalwareName", + "description": "test ..." + }, + "description.target": { + "type": "String", + "description": "test ..." + }, + "description.text": { + "type": "String", + "description": "test ..." + }, + "description.url": { + "type": "URL", + "description": "test ..." + }, + "comment": { + "type": "String", + "description": "test ..." + } + } +} diff --git a/intelmq/conf/pipeline.conf b/intelmq/conf/pipeline.conf index 9152aa104..542cdba56 100644 --- a/intelmq/conf/pipeline.conf +++ b/intelmq/conf/pipeline.conf @@ -1,32 +1,4 @@ { - "vxvault-collector": { - "destination-queues": [ - "vxvault-parser-queue" - ] - }, - "vxvault-parser": { - "source-queue": "vxvault-parser-queue", - "destination-queues": [ - "deduplicator-expert-queue" - ] - }, - "deduplicator-expert": { - "source-queue": "deduplicator-expert-queue", - "destination-queues": [ - "sanitizer-expert-queue" - ] - }, - "malware-domain-list-collector": { - "destination-queues": [ - "malware-domain-list-parser-queue" - ] - }, - "malware-domain-list-parser": { - "source-queue": "malware-domain-list-parser-queue", - "destination-queues": [ - "deduplicator-expert-queue" - ] - }, "arbor-collector": { "destination-queues": [ "arbor-parser-queue" @@ -34,18 +6,6 @@ }, "arbor-parser": { "source-queue": "arbor-parser-queue", - "destination-queues": [ - "deduplicator-expert-queue" - ] - }, - "taxonomy-expert": { - "source-queue": "taxonomy-expert-queue", - "destination-queues": [ - "cymru-expert-queue" - ] - }, - "cymru-expert": { - "source-queue": "cymru-expert-queue", "destination-queues": [ "file-output-queue" ] @@ -53,43 +13,15 @@ "file-output": { "source-queue": "file-output-queue" }, - "dragon-research-group-vnc-collector": { - "destination-queues": [ - "dragon-research-group-vnc-parser-queue" - ] - }, - "dragon-research-group-vnc-parser": { - "source-queue": "dragon-research-group-vnc-parser-queue", - "destination-queues": [ - "deduplicator-expert-queue" - ] - }, - "dragon-research-group-ssh-collector": { - "destination-queues": [ - "dragon-research-group-ssh-parser-queue" - ] - }, - "dragon-research-group-ssh-parser": { - "source-queue": "dragon-research-group-ssh-parser-queue", - "destination-queues": [ - "deduplicator-expert-queue" - ] - }, - "openbl-collector": { - "destination-queues": [ - "openbl-parser-queue" - ] - }, - "openbl-parser": { - "source-queue": "openbl-parser-queue", + "malware-domain-list-collector": { "destination-queues": [ - "deduplicator-expert-queue" + "malware-domain-list-parser-queue" ] }, - "sanitizer-expert": { - "source-queue": "sanitizer-expert-queue", + "malware-domain-list-parser": { + "source-queue": "malware-domain-list-parser-queue", "destination-queues": [ - "taxonomy-expert-queue" + "file-output-queue" ] } -} +} \ No newline at end of file diff --git a/intelmq/conf/runtime.conf b/intelmq/conf/runtime.conf index d097991b8..80408bc33 100644 --- a/intelmq/conf/runtime.conf +++ b/intelmq/conf/runtime.conf @@ -1,65 +1,37 @@ { - "__default__": { - "rate_limit": 0, - "retry_delay": 30, - "redis_cache_host": "127.0.0.1", - "redis_cache_port": "6379", - "redis_cache_db": "10", - "redis_cache_ttl": "86400" - }, "arbor-collector": { + "error_procedure": "retry", + "error_retry_delay": 30, + "error_log_message": true, + "error_dump_message": true, "url": "http://atlas-public.ec2.arbor.net/public/ssh_attackers", "rate_limit": 3600 }, - "vxvault-collector": { - "url": "http://vxvault.siri-urz.net/URL_List.php", - "rate_limit": 3600 - }, "malware-domain-list-collector": { + "error_procedure": "retry", + "error_retry_delay": 30, + "error_log_message": true, + "error_dump_message": true, "url": "http://www.malwaredomainlist.com/updatescsv.php", "rate_limit": 3600 }, - "dragon-research-group-ssh-collector": { - "url": "http://dragonresearchgroup.org/insight/sshpwauth.txt", - "rate_limit": 3600 - }, - "dragon-research-group-vnc-collector": { - "url": "https://dragonresearchgroup.org/insight/vncprobe.txt", - "rate_limit": 3600 - }, "arbor-parser": { - }, - "vxvault-parser": { + "error_procedure": "pass", + "error_retry_delay": 30, + "error_log_message": true, + "error_dump_message": true }, "malware-domain-list-parser": { - }, - "deduplicator-expert": { - "redis_cache_host": "127.0.0.1", - "redis_cache_port": "6379", - "redis_cache_db": "10", - "redis_cache_ttl": "86400" - }, - "dragon-research-group-ssh-parser": { - }, - "dragon-research-group-vnc-parser": { - }, - "sanitizer-expert": { - }, - "taxonomy-expert": { - }, - "cymru-expert": { - "redis_cache_host": "127.0.0.1", - "redis_cache_port": "6379", - "redis_cache_db": "10", - "redis_cache_ttl": "86400" + "error_procedure": "pass", + "error_retry_delay": 30, + "error_log_message": true, + "error_dump_message": true }, "file-output": { + "error_procedure": "retry", + "error_retry_delay": 30, + "error_log_message": true, + "error_dump_message": true, "file": "/opt/intelmq/var/lib/bots/file-output/events.txt" - }, - "openbl-collector": { - "url": "http://www.openbl.org/lists/date_all.txt", - "rate_limit": 7200 - }, - "openbl-parser": { } } diff --git a/intelmq/conf/startup.conf b/intelmq/conf/startup.conf index 4e1ad2568..816c551ff 100644 --- a/intelmq/conf/startup.conf +++ b/intelmq/conf/startup.conf @@ -2,103 +2,31 @@ "arbor-collector": { "group": "Collector", "name": "Arbor", - "module": "intelmq.bots.collectors.url.collector", + "module": "intelmq.bots.collectors.http.collector_http", "description": "Arbor Collector is the bot responsible to get the report from source of information." }, - "vxvault-collector": { - "group": "Collector", - "name": "VXVault", - "module": "intelmq.bots.collectors.url.collector", - "description": "VXVault Collector is the bot responsible to get the report from source of information." - }, "malware-domain-list-collector": { "group": "Collector", "name": "Malware Domain List", - "module": "intelmq.bots.collectors.url.collector", + "module": "intelmq.bots.collectors.http.collector_http", "description": "Malware Domain List Collector is the bot responsible to get the report from source of information." }, - "dragon-research-group-ssh-collector": { - "group": "Collector", - "name": "Dragon Research Group SSH", - "module": "intelmq.bots.collectors.url.collector", - "description": "Dragon Research Group SSH Collector is the bot responsible to get the report from source of information." - }, - "dragon-research-group-vnc-collector": { - "group": "Collector", - "name": "Dragon Research Group VNC", - "module": "intelmq.bots.collectors.url.collector", - "description": "Dragon Research Group VNC Collector is the bot responsible to get the report from source of information." - }, "arbor-parser": { "group": "Parser", "name": "Arbor", "module": "intelmq.bots.parsers.arbor.parser", "description": "Arbor Parser is the bot responsible to parse the report and sanitize the information." }, - "vxvault-parser": { - "group": "Parser", - "name": "VXVault", - "module": "intelmq.bots.parsers.vxvault.parser", - "description": "VXVault Parser is the bot responsible to parse the report and sanitize the information." - }, "malware-domain-list-parser": { "group": "Parser", "name": "Malware Domain List", "module": "intelmq.bots.parsers.malwaredomainlist.parser", "description": "Malware Domain List Parser is the bot responsible to parse the report and sanitize the information." }, - "deduplicator-expert": { - "group": "Expert", - "name": "Deduplicator", - "module": "intelmq.bots.experts.deduplicator.deduplicator", - "description": "Deduplicator is the bot responsible to detect and remove deduplicated events." - }, - "dragon-research-group-ssh-parser": { - "group": "Parser", - "name": "Dragon Research Group SSH", - "module": "intelmq.bots.parsers.dragonresearchgroup.parser-ssh", - "description": "Dragon Research Group SSH Parser is the bot responsible to parse the report and sanitize the information." - }, - "dragon-research-group-vnc-parser": { - "group": "Parser", - "name": "Dragon Research Group VNC", - "module": "intelmq.bots.parsers.dragonresearchgroup.parser-vnc", - "description": "Dragon Research Group VNC Parser is the bot responsible to parse the report and sanitize the information." - }, - "sanitizer-expert": { - "group": "Expert", - "name": "Sanitizer", - "module": "intelmq.bots.experts.sanitizer.sanitizer", - "description": "Sanitizer is the bot responsible to sanitize all events." - }, - "taxonomy-expert": { - "group": "Expert", - "name": "Taxonomy", - "module": "intelmq.bots.experts.taxonomy.taxonomy", - "description": "Taxonomy is the bot responsible to apply the eCSIRT Taxonomy to all events." - }, - "cymru-expert": { - "group": "Expert", - "name": "Cymru", - "module": "intelmq.bots.experts.cymru.cymru", - "description": "Cymry (IP to ASN) is the bot responsible to add network information to the events (BGP, ASN, AS Name, Country, etc..)." - }, "file-output": { "group": "Output", "name": "File", - "module": "intelmq.bots.outputs.file.file", + "module": "intelmq.bots.outputs.file.output", "description": "File is the bot responsible to send events to a file." - }, - "openbl-collector": { - "group": "Collector", - "name": "OpenBL", - "module": "intelmq.bots.collectors.url.collector", - "description": "OpenBL Collector is the bot responsible to get the report from source of information." - }, - "openbl-parser": { - "group": "Parser", - "name": "OpenBL", - "module": "intelmq.bots.parsers.openbl.parser", - "description": "OpenBL Parser is the bot responsible to parse the report and sanitize the information." } } diff --git a/intelmq/conf/system.conf b/intelmq/conf/system.conf index ab517962e..1849531c8 100644 --- a/intelmq/conf/system.conf +++ b/intelmq/conf/system.conf @@ -1,6 +1,6 @@ -{ - "logging_level": "DEBUG", - "logging_path": "/opt/intelmq/var/log/", - "http_proxy": null, - "https_proxy": null -} +{ + "logging_level": "DEBUG", + "logging_path": "/opt/intelmq/var/log/", + "http_proxy": null, + "https_proxy": null +} diff --git a/intelmq/lib/__init__.py b/intelmq/lib/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/lib/cache.py b/intelmq/lib/cache.py deleted file mode 100644 index f67695311..000000000 --- a/intelmq/lib/cache.py +++ /dev/null @@ -1,33 +0,0 @@ -import redis - -""" Cache is a set with information already seen by the system. - This provides a way, for example, to remove duplicated events - and reports in system or cache some results from experts like - Cymru Whois. It's possible to define a TTL value in each information - inserted in cache. This TTL means how much time the system will keep an - information in the cache. -""" - - -class Cache(): - def __init__(self, host, port, db, ttl): - self.redis = redis.Redis(host=host, - port=int(port), - db=db, - socket_timeout=5) - - self.ttl = ttl - - def exists(self, key): - """Returns True if key exists in the cache""" - return self.redis.exists(key) - - def get(self, key): - """Returns the value of key in cache""" - return self.redis.get(key) - - def set(self, key, value): - """Sets key to value in cache""" - # backward compatibility (Redis v2.2) - self.redis.setnx(key, value) - self.redis.expire(key, self.ttl) diff --git a/intelmq/lib/message.py b/intelmq/lib/message.py deleted file mode 100644 index 6b89ad25f..000000000 --- a/intelmq/lib/message.py +++ /dev/null @@ -1,99 +0,0 @@ -import json -import hashlib - - -class Event(object): - - def __init__(self, event=None): - if event: - self.event = event - else: - self.event = dict() - - def add(self, key, value): - """Add key with specified value""" - if not value or key in self.event: - return False - - self.event[key] = value - return True - - def update(self, key, new_value): - """Updates key with new value""" - if not new_value: - return False - - self.event[key] = new_value - return True - - def discard(self, key, value): - """Discards the given key, value pair""" - self.clear(key) - - def clear(self, key): - """Clears the given key from event""" - if key in self.event: - return self.event.pop(key) - else: - return None - - def value(self, key): - """Returns value of key or None if it doesn't exist""" - if key in self.event: - return self.event.get(key) - else: - return None - - def keys(self): - """Return contained keys of event""" - return self.event.keys() - - def items(self): - """Return contained items of event""" - return self.event.items() - - def contains(self, key): - """Returns key in event""" - if key in self.event: - return self.event.get(key) - else: - return None - - def to_dict(self): - """Converts event to a dict""" - return dict(self.event) - - def to_unicode(self): - - return unicode(json.dumps(self.event)) - - @staticmethod - def from_unicode(event_string): - return Event(json.loads(event_string)) - - def __hash__(self): - """Returns this event as a hash""" - evhash = hashlib.sha1() - - for key, value in sorted(self.items()): - evhash.update(key.encode("utf-8")) - evhash.update("\xc0") - evhash.update(value.encode("utf-8")) - evhash.update("\xc0") - - # FIXME: the int stuff should be done by cache - return int(evhash.hexdigest(), 16) - - def __eq__(self, other_event): - """Return self == other_event""" - return self.event == other_event - - def __unicode__(self): - return self.to_unicode() - - def __repr__(self): - return repr(self.event) - - def __str__(self): - """Returns self (the event itself) as a string""" - return str(self.event) diff --git a/intelmq/lib/pipeline.py b/intelmq/lib/pipeline.py deleted file mode 100644 index b06ee5e80..000000000 --- a/intelmq/lib/pipeline.py +++ /dev/null @@ -1,66 +0,0 @@ -import redis -import time - - -class Pipeline(): - def __init__(self, host="127.0.0.1", port="6379", db=2): - self.host = host - self.port = port - self.db = db - - self.redis = redis.Redis(host=self.host, - port=int(self.port), - db=self.db, - socket_timeout=50000) - - def source_queues(self, source_queue): - """Sets the source queue of this pipeline""" - self.source_queue = source_queue - if source_queue: - self.internal_queue = source_queue + "-internal" - - def destination_queues(self, destination_queues): - """Sets the destination queues of this pipeline object""" - if destination_queues and type(destination_queues) is not list: - destination_queues = destination_queues.split() - self.destination_queues = destination_queues - - def disconnect(self): - """Disconnects from pipeline provider""" - pass - - def sleep(self, interval): - """Requests the pipeline to sleep for the given interval""" - time.sleep(interval) - - def send(self, message): - """Send given message on this pipeline object""" - for destination_queue in self.destination_queues: - self.redis.lpush(destination_queue, message) - - def receive(self): - """Returns the last received message or - any last message which wasn't yet acknowledged""" - if self.redis.llen(self.internal_queue) > 0: - return self.redis.lindex(self.internal_queue, -1) - return self.redis.brpoplpush(self.source_queue, self.internal_queue, 0) - - def acknowledge(self): - """Acknowledges the last received message - and removes it from input queue """ - return self.redis.rpop(self.internal_queue) - - def count_queued_messages(self, queues): - """Returns the amount of queued messages - over all given queue names""" - qdict = dict() - for queue in queues: - qdict[queue] = self.redis.llen(queue) - return qdict - - -# Algorithm -# --------- -# [Receive] B RPOP LPUSH source_queue -> internal_queue -# [Send] LPUSH message -> destination_queue -# [Acknowledge] RPOP message <- internal_queue diff --git a/intelmq/lib/utils.py b/intelmq/lib/utils.py deleted file mode 100644 index cc5b40159..000000000 --- a/intelmq/lib/utils.py +++ /dev/null @@ -1,62 +0,0 @@ -import logging -import hashlib - - -def decode(text, encodings=["utf-8", "ISO-8859-15"], force=False): - """Returns the given text with the given encodings, - else it raises an exception""" - for encoding in encodings: - try: - return text.decode(encoding) - except ValueError as e: - pass - - if force: - for encoding in encodings: - try: - return text.decode(encoding, 'ignore') - except ValueError as e: - pass - - raise Exception("Found a problem when decoding.") - - -def encode(text, encodings=["utf-8"], force=False): - """Returns the given text encoded using the given encodings, - else it raises an exception""" - for encoding in encodings: - try: - return text.encode(encoding) - except ValueError as e: - pass - - if force: - for encoding in encodings: - try: - return text.decode(encoding, 'ignore') - except ValueError as e: - pass - - raise Exception("Found a problem when encoding.") - - -def log(logs_path, name, loglevel="DEBUG"): - """Creates a new logger with given logs_path/name""" - logger = logging.getLogger(name) - logger.setLevel(loglevel) - - handler = logging.FileHandler("%s/%s.log" % (logs_path, name)) - handler.setLevel(loglevel) - - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') - handler.setFormatter(formatter) - - logger.addHandler(handler) - return logger - - -def hashgen(data, func=hashlib.sha1): - """Generates a hash string based on the given data""" - result = func() - result.update(data) - return result.hexdigest() diff --git a/intelmq/tests/__init__.py b/intelmq/tests/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/tests/bots/__init__.py b/intelmq/tests/bots/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/tests/bots/parsers/__init__.py b/intelmq/tests/bots/parsers/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/tests/bots/parsers/dcu/__init__.py b/intelmq/tests/bots/parsers/dcu/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/intelmq/tests/bots/parsers/dcu/lib.py b/intelmq/tests/bots/parsers/dcu/lib.py deleted file mode 100644 index 77b6cdb25..000000000 --- a/intelmq/tests/bots/parsers/dcu/lib.py +++ /dev/null @@ -1,83 +0,0 @@ -from intelmq.bots.parsers.dcu import lib -import unittest - - -class TestDCUParserLib(unittest.TestCase): - """Tests if the dcu-parser library works as expected""" - - def setUp(self): - # fake dcu records for later testing - self.test_dcu = ["SinkHoleMessage", - "130366192837417292", - "B54-BASE", - "1.2.3.4", - "51762", - "AS0123", - "1.2.3.6", - "53", - "", - "US", - "", - "FakeCity", - "", - "10.99", - "11.23", - "0", - "0", - "/images/file.php", - "", - "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)", - "POST", - "1.1", - "", - "", - "", - "", - "", - ""] - - def test_windows_filetime(self): - """Tests if the filetime conversion works correctly""" - - dcu_ts1 = "130366708739532588" - dcu_ts2 = "130366708907647925" - converted_ts1 = lib.convert_windows_timestamp(dcu_ts1) - converted_ts2 = lib.convert_windows_timestamp(dcu_ts2) - - self.assertEqual('2014-02-12 09:27:53.953258+00:00', str(converted_ts1)) - self.assertEqual('2014-02-12 09:28:10.764792+00:00', str(converted_ts2)) - - def test_dcu_conversion(self): - """Tests if a dcu line is correctly converted to IntelMQ fields""" - fields = dict(zip(lib.dcu_headers(), self.test_dcu)) - converted = lib.convert_dcu_fields(fields) - - self.assertEqual('2014-02-11 19:08:03.741729+00:00', converted["source_time"]) - self.assertEqual('blacklist', converted["type"]) - self.assertEqual('1.2.3.4', converted["source_ip"]) - self.assertEqual('51762', converted["source_port"]) - self.assertEqual('0123', converted["source_asn"]) - self.assertEqual('FakeCity', converted["source_city"]) - self.assertEqual('B54-BASE', converted["malware"]) - - self.assertEqual('1.2.3.6', converted["destination_ip"]) - self.assertEqual('53', converted["destination_port"]) - self.assertEqual('10.99', converted["source_latitude"]) - self.assertEqual('11.23', converted["source_longitude"]) - - self.assertEqual('Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)', converted["user_agent"]) - - def test_threat_code_conversion(self): - """Tests if a dcu threat code is converted correctly to an IntelMQ type""" - - # some known, relatively obvious dcu threat code to type conversions - self.assertEqual("backdoor", lib.convert_threatcode_to_type("B106-Tapazom")) - self.assertEqual("malware", lib.convert_threatcode_to_type("B106-Vobfus")) - self.assertEqual("blacklist", lib.convert_threatcode_to_type("B54-BASE")) - self.assertEqual("malware configuration", lib.convert_threatcode_to_type("B54-CONFIG")) - - # if we find something we don't know, then add unknown - self.assertEqual("unknown", lib.convert_threatcode_to_type("Foobar")) - -if __name__ == '__main__': - unittest.main() diff --git a/scripts/prettyprint.sh b/scripts/prettyprint.sh new file mode 100644 index 000000000..06b01e843 --- /dev/null +++ b/scripts/prettyprint.sh @@ -0,0 +1,4 @@ +while read i; +do + echo "$i" | python -m json.tool ; +done < $1 diff --git a/scripts/prettyprint.txt b/scripts/prettyprint.txt new file mode 100644 index 000000000..a5e982805 --- /dev/null +++ b/scripts/prettyprint.txt @@ -0,0 +1 @@ +sh scrips/prettyprint.txt /opt/intelmq/var/lib/bots/file-output/events.txt diff --git a/scripts/vagrant/README.md b/scripts/vagrant/README.md new file mode 100644 index 000000000..efb0d6e9e --- /dev/null +++ b/scripts/vagrant/README.md @@ -0,0 +1,47 @@ +# Virtual Machine provision for test IntelMQ and IntelMQ-Manger +*** + +## Information +On this directory exists a Vagrantfile to create a ubuntu/trusty64 virtualbox Virtual Machine (VM) and using *bootstrap.sh* script will install IntelMQ (v1.0-beta branch) and IntelMQ-Manager. + +The VM can be acessed from the host machine on the IP: 192.168.33.10 . + +## Dependencies +This project requires that [VirtualBox][vb] and [Vagrant][vg] is installed on the host machine. + +## How to use it +* If you have not yet cloned this repository: + +git clone https://github.com/certtools/intelmq.git + +* Then go to this script directory: + +cd intelmq/scripts/vagrant + +* The script is configured to clone the v1.0-beta branch. If you what other branch, you should change the variable *INTELMQ_BRANCH* on the *bootstrap.sh* script. +* Fire up vagrant +vagrant up + +* During this process the base image is downloaded and the provision script *bootsrap.sh* is runned to install IntelMQ and IntelMQ-Manager. + +* Access virtual machine via ssh: + +vagrant ssh + +* Test acess to the IntelMQ-Manager + - Point the browser on the host machine to http://192.168.33.10 + +* To halt machine van use the command: + + vagrant halt + +* To delete the machine: + + vagrant destroy + +## Credits + +[vb]: https://www.virtualbox.org/wiki/Downloads "VirtualBox" +[vg]: http://www.vagrantup.com/downloads.html "Vagrant" + +## Todos diff --git a/scripts/vagrant/Vagrantfile b/scripts/vagrant/Vagrantfile new file mode 100644 index 000000000..64fddb0a3 --- /dev/null +++ b/scripts/vagrant/Vagrantfile @@ -0,0 +1,72 @@ +# -*- mode: ruby -*- +# vi: set ft=ruby : + +# All Vagrant configuration is done below. The "2" in Vagrant.configure +# configures the configuration version (we support older styles for +# backwards compatibility). Please don't change it unless you know what +# you're doing. +Vagrant.configure(2) do |config| + # The most common configuration options are documented and commented below. + # For a complete reference, please see the online documentation at + # https://docs.vagrantup.com. + + # Every Vagrant development environment requires a box. You can search for + # boxes at https://atlas.hashicorp.com/search. + config.vm.box = "ubuntu/trusty64" + + # Disable automatic box update checking. If you disable this, then + # boxes will only be checked for updates when the user runs + # `vagrant box outdated`. This is not recommended. + # config.vm.box_check_update = false + + # Create a forwarded port mapping which allows access to a specific port + # within the machine from a port on the host machine. In the example below, + # accessing "localhost:8080" will access port 80 on the guest machine. + # config.vm.network "forwarded_port", guest: 80, host: 8080 + + # Create a private network, which allows host-only access to the machine + # using a specific IP. + config.vm.network "private_network", ip: "192.168.33.10" + + # Create a public network, which generally matched to bridged network. + # Bridged networks make the machine appear as another physical device on + # your network. + # config.vm.network "public_network" + + # Share an additional folder to the guest VM. The first argument is + # the path on the host to the actual folder. The second argument is + # the path on the guest to mount the folder. And the optional third + # argument is a set of non-required options. + # config.vm.synced_folder "../data", "/vagrant_data" + + # Provider-specific configuration so you can fine-tune various + # backing providers for Vagrant. These expose provider-specific options. + # Example for VirtualBox: + # + config.vm.provider "virtualbox" do |vb| + # # Display the VirtualBox GUI when booting the machine + # vb.gui = true + # + # # Customize the amount of memory on the VM: + vb.memory = "4096" + end + # + # View the documentation for the provider you are using for more + # information on available options. + + # Define a Vagrant Push strategy for pushing to Atlas. Other push strategies + # such as FTP and Heroku are also available. See the documentation at + # https://docs.vagrantup.com/v2/push/atlas.html for more information. + # config.push.define "atlas" do |push| + # push.app = "YOUR_ATLAS_USERNAME/YOUR_APPLICATION_NAME" + # end + + # Enable provisioning with a shell script. Additional provisioners such as + # Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the + # documentation for more information about their specific syntax and use. + # config.vm.provision "shell", inline: <<-SHELL + # sudo apt-get update + # sudo apt-get install -y apache2 + # SHELL + config.vm.provision :shell, path: "bootstrap.sh" +end diff --git a/scripts/vagrant/bootstrap.sh b/scripts/vagrant/bootstrap.sh new file mode 100755 index 000000000..dd0e93e10 --- /dev/null +++ b/scripts/vagrant/bootstrap.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +#Declare Variables +#IntelMQ +INTELMQ_REPO="https://github.com/certtools/intelmq.git" +#BRANCH="master" +INTELMQ_BRANCH="v1.0-beta" +#IntelMQ-Manager +INTELMQ_MANAGER_REPO="https://github.com/certtools/intelmq-manager.git" + +function install_intelmq { + #Install Dependencies + apt-get update + apt-get -y install python-pip git build-essential python-dev redis-server + #Requires for installing pyzmq with accelaration + apt-get -y install libzmq3-dev + + #Install IntelMQ + #sudo su - + git clone -b $INTELMQ_BRANCH $INTELMQ_REPO + cd intelmq/ + # If branch v1.0-beta install deps using REQUIREMENTS file + if [[ $INTELMQ_BRANCH == "v1.0-beta" ]] + then + pip install -r REQUIREMENTS; + fi + #Install + python setup.py install + useradd -d /opt/intelmq -U -s /bin/bash intelmq + chmod -R 0770 /opt/intelmq + chown -R intelmq.intelmq /opt/intelmq +} + +function install_intelmq_manager { + #Install Dependencies + apt-get -y install git apache2 php5 libapache2-mod-php5 + #Install Manager + git clone $INTELMQ_MANAGER_REPO /tmp/intelmq-manager + cp -R /tmp/intelmq-manager/intelmq-manager/* /var/www/ + chown -R www-data.www-data /var/www/ + #Configure + usermod -a -G intelmq www-data + echo "www-data ALL=(intelmq) NOPASSWD: /opt/intelmq/bin/intelmqctl" >> /etc/sudoers + sed -i -e 's#DocumentRoot /var/www/html#DocumentRoot /var/www#' /etc/apache2/sites-available/000-default.conf + /etc/init.d/apache2 restart + +} +install_intelmq +install_intelmq_manager \ No newline at end of file diff --git a/tests/README.md b/tests/README.md deleted file mode 100644 index 4bbe948c9..000000000 --- a/tests/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# tests - -This directory contains testing code, obviously diff --git a/intelmq/bots/experts/deduplicator/deduplicator.py b/tests/error_generator/deduplicator.py similarity index 54% rename from intelmq/bots/experts/deduplicator/deduplicator.py rename to tests/error_generator/deduplicator.py index d0cf785d9..84a9a9c63 100644 --- a/intelmq/bots/experts/deduplicator/deduplicator.py +++ b/tests/error_generator/deduplicator.py @@ -1,7 +1,8 @@ -from copy import deepcopy from intelmq.lib.bot import Bot, sys from intelmq.lib.cache import Cache from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime +from intelmq.bots import utils class DeduplicatorBot(Bot): @@ -13,26 +14,26 @@ def init(self): self.parameters.redis_cache_db, self.parameters.redis_cache_ttl ) - + self.counter = 0 + self.lol_message = None def process(self): message = self.receive_message() + message_hash = hash(message) + + if self.lol_message != message: + self.counter += 1 + + self.lol_message = message + + if self.counter == 10 or self.counter == 20: + x = 10 + y = "20" + z = x-y - if message: - - # Event deduplication - if isinstance(message, Event): - event = deepcopy(message) - event.clear("observation_time") - message_hash = hash(event) - - # Generic message deduplication - else: - message_hash = hash(message) - - if not self.cache.exists(message_hash): - self.cache.set(message_hash, 'hash') - self.send_message(message) + if not self.cache.exists(message_hash): + self.cache.set(message_hash, 'hash') + self.send_message(message) self.acknowledge_message() diff --git a/intelmq/lib/bot.py b/tests/log-procedure/bot.py similarity index 70% rename from intelmq/lib/bot.py rename to tests/log-procedure/bot.py index f0648b733..eae9ba783 100644 --- a/intelmq/lib/bot.py +++ b/tests/log-procedure/bot.py @@ -4,7 +4,7 @@ import time import ConfigParser -from intelmq.lib.message import Event +from intelmq.lib.message import * # FIXME from intelmq.lib.pipeline import Pipeline from intelmq.lib.utils import decode, log @@ -19,7 +19,7 @@ class Bot(object): def __init__(self, bot_id): self.parameters = Parameters() - + self.current_message = None self.last_message = None self.message_counter = 0 @@ -28,8 +28,12 @@ def __init__(self, bot_id): self.bot_id = bot_id self.load_system_configurations() - - self.logger = log(self.parameters.logging_path, self.bot_id, self.parameters.logging_level) + + self.logger = log( + self.parameters.logging_path, + self.bot_id, + self.parameters.logging_level + ) self.logger.info('Bot is starting') self.load_runtime_configurations() @@ -37,17 +41,17 @@ def __init__(self, bot_id): self.init() + def init(self): - """Initializes a bot""" pass + def start(self): - """Starts a bot""" self.source_pipeline = None self.destination_pipeline = None local_retry_delay = 0 self.parameters.retry_delay = 30 # Temporary fix. Need to add to BOTS conf - + self.logger.info('Bot start processing') while True: @@ -66,10 +70,15 @@ def start(self): self.destination_pipeline.destination_queues(self.destination_queues) self.logger.info("Connected to destination pipeline") + self.logger.info("Start processing") self.process() - self.logger.info("Bot stops processing. Sleeps for 'rate_limit' = %ds" % self.parameters.rate_limit) self.source_pipeline.sleep(self.parameters.rate_limit) + except IntelMQPipeline, ex: + # NAO LOGAR A MENSAGEM PQ O PROBLEMA É NA PIPELINE + # na excepção em baixo deve estar o caso de não ser um erro de pipeline e assim + # deve loggar consoante o parametro do bot. + except Exception, ex: local_retry_delay = self.parameters.retry_delay self.logger.info("Last Correct Message(event): %r" % self.last_message) @@ -79,7 +88,7 @@ def start(self): self.logger.info('Pipeline will reconnect in %s seconds' % local_retry_delay) self.source_pipeline = None self.destination_pipeline = None - + except KeyboardInterrupt as e: if self.source_pipeline: self.source_pipeline.disconnect() @@ -91,8 +100,8 @@ def start(self): self.logger.info("Bot is shutting down") break + def stop(self): - """Stops a bot""" try: self.logger.error("Bot found an error. Exiting") except: @@ -101,104 +110,120 @@ def stop(self): print "Bot found an error. Exiting" exit(-1) + def check_bot_id(self, str): - """Returns True if the given str is a valid bot id""" res = re.search('[^0-9a-zA-Z\-]+', str) if res: print "Invalid bot id." self.stop() - def load_system_configurations(self): - """Instructs a bot to load the system configuration (json)""" + def load_system_configurations(self): + with open(SYSTEM_CONF_FILE, 'r') as fpconfig: config = json.loads(fpconfig.read()) - - setattr(self.parameters, 'logging_path', DEFAULT_LOGGING_PATH) - setattr(self.parameters, 'logging_level', DEFAULT_LOGGING_LEVEL) - + + setattr(self.parameters, 'logging_path' , DEFAULT_LOGGING_PATH) + setattr(self.parameters, 'logging_level' , DEFAULT_LOGGING_LEVEL) + for option, value in config.iteritems(): setattr(self.parameters, option, value) + def load_runtime_configurations(self): - """Load runtime json configuration for a bot""" with open(RUNTIME_CONF_FILE, 'r') as fpconfig: config = json.loads(fpconfig.read()) # Load __default__ runtime configuration section - self.logger.debug("Runtime configuration: loading '%s' section from '%s' file" % ("__default__", RUNTIME_CONF_FILE)) + self.logger.debug("Runtime configuration: loading '%s' section" \ + " from '%s' file" % ("__default__", RUNTIME_CONF_FILE)) + if "__default__" in config.keys(): for option, value in config["__default__"].iteritems(): setattr(self.parameters, option, value) - self.logger.debug("Runtime configuration: parameter '%s' loaded with the value '%s'" % (option, value)) - + self.logger.debug("Runtime configuration: parameter '%s' " \ + "loaded with value '%s'" % (option, value)) + # Load bot runtime configuration section + + self.logger.debug("Runtime configuration: loading '%s' section from" \ + " '%s' file" % (self.bot_id, RUNTIME_CONF_FILE)) - self.logger.debug("Runtime configuration: loading '%s' section from '%s' file" % (self.bot_id, RUNTIME_CONF_FILE)) if self.bot_id in config.keys(): for option, value in config[self.bot_id].iteritems(): setattr(self.parameters, option, value) - self.logger.debug("Runtime configuration: parameter '%s' loaded with the value '%s'" % (option, value)) + self.logger.debug("Runtime configuration: parameter '%s' " \ + "loaded with value '%s'" % (option, value)) - def load_pipeline_configurations(self): - """Load pipeline json configuration file""" + def load_pipeline_configurations(self): with open(PIPELINE_CONF_FILE, 'r') as fpconfig: config = json.loads(fpconfig.read()) - - self.logger.debug("Pipeline configuration: loading '%s' section from '%s' file" % (self.bot_id, PIPELINE_CONF_FILE)) + + self.logger.debug("Pipeline configuration: loading '%s' section" \ + " from '%s' file" % (self.bot_id, PIPELINE_CONF_FILE)) self.source_queues = None self.destination_queues = None - + if self.bot_id in config.keys(): - + if 'source-queue' in config[self.bot_id].keys(): self.source_queues = config[self.bot_id]['source-queue'] - self.logger.debug("Pipeline configuration: parameter 'source-queue' loaded with the value '%s'" % self.source_queues) - + self.logger.debug("Pipeline configuration: parameter " \ + "'source-queue' loaded with the value '%s'" % self.source_queues) + if 'destination-queues' in config[self.bot_id].keys(): self.destination_queues = config[self.bot_id]['destination-queues'] - self.logger.debug("Pipeline configuration: parameter 'destination-queues' loaded with the value '%s'" % ", ".join(self.destination_queues)) + self.logger.debug("Pipeline configuration: parameter" \ + "'destination-queues' loaded with the value" \ + " '%s'" % ", ".join(self.destination_queues)) else: self.logger.error("Pipeline configuration: failed to load configuration") self.stop() + def send_message(self, message): - """Sends a given message with configured destination pipeline""" - if not message: self.logger.warning("Empty message found.") return False - - if isinstance(message, Event): - message = unicode(message) # convert Event Object to string (UTF-8) - + self.message_counter += 1 if self.message_counter % 500 == 0: self.logger.info("Processed %s messages" % self.message_counter) - self.destination_pipeline.send(message) + message = message.serialize() + + try: + self.destination_pipeline.send() + except: + raise exceptions.IntelMQPipeline + def receive_message(self): - """Receive a message from the configured source queue""" - self.current_message = self.source_pipeline.receive() + try: + self.current_message = self.source_pipeline.receive() + except: + raise exceptions.IntelMQPipeline + print self.current_message + if not self.current_message: return None - - message = self.current_message.decode('utf-8') - - try: # Event Object - return Event.from_unicode(message) - except: # Report Object - return message + + # REWRITE ME - begin + message = Message.unserialize(self.current_message) + if message["__type"] == "event": + return Event(self.current_message) + if message["__type"] == "report": + return Report(self.current_message) + raise Exception + # REWRITE ME - end def acknowledge_message(self): - """Acknowledge a message and remove it from the queue system permanently""" self.last_message = self.current_message self.source_pipeline.acknowledge() diff --git a/tests/message-factory/code.py b/tests/message-factory/code.py new file mode 100644 index 000000000..9163335ad --- /dev/null +++ b/tests/message-factory/code.py @@ -0,0 +1,34 @@ + +message = MessageFactory.unserialize(self.current_message) + + +class MessageFactory(object): + + @staticmethod + def unserialize(raw_message): + message = Message.unserialize(raw_message) + class_reference = get_class_refence(message["__type"], intelmq.lib.message) + return class_reference(message) + + @staticmethod + def serialize(message): + message.add("__type", message.__class__.__name__) + raw_message = Message.serialize(message) + return raw_message + + +class Message(dict): + + harmonization_config = utils.load_configuration(HARMONIZATION_CONF_FILE) + + def __init__(self, message=None): + if message: + super(Message, self).__init__(message) + else: + super(Message, self).__init__() + + self.harmonization_config = self.harmonization_config[self.__class__.__name__] + + +class Report(Message): + pass diff --git a/tests/pipeline-rabbitmq.py b/tests/old/pipeline-rabbitmq.py similarity index 100% rename from tests/pipeline-rabbitmq.py rename to tests/old/pipeline-rabbitmq.py diff --git a/tests/pipeline-redis.py b/tests/old/pipeline-redis.py similarity index 100% rename from tests/pipeline-redis.py rename to tests/old/pipeline-redis.py diff --git a/tests/pipeline.py b/tests/old/pipeline.py similarity index 100% rename from tests/pipeline.py rename to tests/old/pipeline.py diff --git a/tests/redis.conf b/tests/old/redis.conf similarity index 100% rename from tests/redis.conf rename to tests/old/redis.conf diff --git a/tests/scripts/cleanup.sh b/tests/scripts/cleanup.sh new file mode 100644 index 000000000..b1e89e622 --- /dev/null +++ b/tests/scripts/cleanup.sh @@ -0,0 +1,4 @@ +redis-cli FLUSHALL +rm -rf /opt/intelmq/var/lib/bots/file-output/events.txt +rm -rf /opt/intelmq/var/log/* +killall -s 9 python diff --git a/tests/split-pipeline/bot.py b/tests/split-pipeline/bot.py new file mode 100644 index 000000000..c8a0996a9 --- /dev/null +++ b/tests/split-pipeline/bot.py @@ -0,0 +1,208 @@ +import re +import sys +import json +import time +import ConfigParser + +from intelmq.lib.message import Event +from intelmq.lib.pipeline import Pipeline +from intelmq.lib.utils import decode, log + +SYSTEM_CONF_FILE = "/opt/intelmq/etc/system.conf" +PIPELINE_CONF_FILE = "/opt/intelmq/etc/pipeline.conf" +RUNTIME_CONF_FILE = "/opt/intelmq/etc/runtime.conf" +DEFAULT_LOGGING_PATH = "/opt/intelmq/var/log/" +DEFAULT_LOGGING_LEVEL = "INFO" + + +class Bot(object): + + def __init__(self, bot_id): + self.parameters = Parameters() + + self.current_message = None + self.last_message = None + self.message_counter = 0 + + self.check_bot_id(bot_id) + self.bot_id = bot_id + + self.load_system_configurations() + + self.logger = log(self.parameters.logging_path, self.bot_id, self.parameters.logging_level) + self.logger.info('Bot is starting') + + self.load_runtime_configurations() + + self.source_queue, self.destination_queues = self.load_pipeline() + self.parameters.rate_limit = float(self.parameters.rate_limit) + self.parameters.retry_delay = int(self.parameters.retry_delay) + + self.init() + + + def init(self): + pass + + + def start(self): + self.logger.info('Bot start processing') + self.source_pipeline = None + self.destination_pipeline = None + local_retry_delay = 0 + + while True: + try: + if self.parameters.source_pipeline_enable and not self.source_pipeline: + time.sleep(local_retry_delay) + self.logger.info("Connecting to source pipeline") + self.source_pipeline = Pipeline( + self.parameters.source_pipeline_host, + self.parameters.source_pipeline_port, + self.parameters.source_pipeline_db + ) + self.source_pipeline.source_queues(self.source_queue) + self.logger.info("Connected to source pipeline") + + if self.parameters.destination_pipeline_enable and not self.destination_pipeline: + time.sleep(local_retry_delay) + self.logger.info("Connecting to destination pipeline") + self.destination_pipeline = Pipeline( + self.parameters.destination_pipeline_host, + self.parameters.destination_pipeline_port, + self.parameters.destination_pipeline_db + ) + self.destination_pipeline.destination_queues(self.destination_queues) + self.logger.info("Connected to destination pipeline") + + self.logger.info("Start processing") + self.process() + if self.source_pipeline: + self.source_pipeline.sleep(self.parameters.rate_limit) + + except Exception, ex: + local_retry_delay = self.parameters.retry_delay + self.logger.info("Last Correct Message(event): %r" % self.last_message) + self.logger.info("Current Message(event): %r" % self.current_message) + self.logger.exception("Check the following exception:") + self.logger.error('Pipeline connection failed (%r)' % ex) + self.logger.info('Pipeline will reconnect in %s seconds' % local_retry_delay) + self.source_pipeline = None + self.destination_pipeline = None + + except KeyboardInterrupt as e: + if self.source_pipeline: + self.source_pipeline.disconnect() + self.logger.info("Disconnecting from source pipeline") + if self.destination_pipeline: + self.destination_pipeline.disconnect() + self.logger.info("Disconnecting from destination pipeline") + + self.logger.info("Bot is shutting down") + break + + + def stop(self): + try: + self.logger.error("Bot found an error. Exiting") + except: + pass + finally: + print "Bot found an error. Exiting" + exit(-1) + + + def check_bot_id(self, str): + res = re.search('[^0-9a-zA-Z\-]+', str) + if res: + print "Invalid bot id." + self.stop() + + + def load_runtime_configurations(self): + + with open(RUNTIME_CONF_FILE, 'r') as fpconfig: + config = json.loads(fpconfig.read()) + + self.logger.debug("Loading configuration in %s section from '%s' file" % (self.bot_id, RUNTIME_CONF_FILE)) + + if self.bot_id in config.keys(): + for option, value in config[self.bot_id].iteritems(): + setattr(self.parameters, option, value) + self.logger.debug("Parameter '%s' loaded with the value '%s'" % (option, value)) + + + def load_system_configurations(self): + + with open(SYSTEM_CONF_FILE, 'r') as fpconfig: + config = json.loads(fpconfig.read()) + + setattr(self.parameters, 'logging_path' , DEFAULT_LOGGING_PATH) + setattr(self.parameters, 'logging_level' , DEFAULT_LOGGING_LEVEL) + + for option, value in config.iteritems(): + setattr(self.parameters, option, value) + + + def load_pipeline(self): + with open(PIPELINE_CONF_FILE, 'r') as fpconfig: + config = json.loads(fpconfig.read()) + + self.logger.debug("Loading pipeline queues from '%s' file" % PIPELINE_CONF_FILE) + + source_queue = None + destination_queues = None + + if self.bot_id in config.keys(): + + if 'source-queue' in config[self.bot_id].keys(): + source_queue = config[self.bot_id]['source-queue'] + self.logger.info("Source queue '%s'" % source_queue) + + if 'destination-queues' in config[self.bot_id].keys(): + destination_queues = config[self.bot_id]['destination-queues'] + self.logger.info("Destination queues '%s'" % ", ".join(destination_queues)) + + return [source_queue, destination_queues] + + self.logger.error("Failed to load queues") + self.stop() + + + def send_message(self, message): + if not message: + self.logger.warning("Empty message found.") + return False + + if isinstance(message, Event): + message = unicode(message) # convert Event Object to string (UTF-8) + + self.message_counter += 1 + if self.message_counter % 500 == 0: + self.logger.info("Processed %s messages" % self.message_counter) + + self.destination_pipeline.send(message) + + + def receive_message(self): + self.current_message = self.source_pipeline.receive() + + if not self.current_message: + return None + + message = self.current_message.decode('utf-8') + + try: # Event Object + return Event.from_unicode(message) + except: # Report Object + return message + + + def acknowledge_message(self): + self.last_message = self.current_message + self.source_pipeline.acknowledge() + + +class Parameters(object): + pass + diff --git a/tests/threads-test-poc/base.py b/tests/threads-test-poc/base.py new file mode 100644 index 000000000..7aa0a27f4 --- /dev/null +++ b/tests/threads-test-poc/base.py @@ -0,0 +1,76 @@ +import sys +import time +import redis +import threading + +THREADS_NUMBER = 4 +MESSAGES_NUMBER = 200 + + +def fill_it(xredis, queue): + print "Fill source queue with %s messages." % MESSAGES_NUMBER + for i in range(0, MESSAGES_NUMBER): + message = "unknown message" + str(i) + xredis.lpush(queue, message) + + + +class Pipeline(): + + def __init__(self): + self.xredis = redis.Redis() + self.source_queue = "mysource" + self.internal_queue = "myinternal" + self.destination_queue = "mydestination" + + def fill(self): + fill_it(self.xredis, self.source_queue) + + def flush(self): + self.xredis.flushall() + + def receive(self): + #print "[+] Get message" + return self.xredis.brpoplpush(self.source_queue, self.internal_queue, 0) + + def acknowledge(self): + #print "[+] Acknowledge message" + self.xredis.rpop(self.internal_queue) + + def send(self, message): + #print "[+] Send message" + self.xredis.lpush(self.destination_queue, message) + + +class Bot(): + + def __init__(self, flush=False): + if flush: + cleaner = Pipeline() + cleaner.flush() + cleaner.fill() + + def start(self): + for thread_id in range(0, THREADS_NUMBER): + thread = threading.Thread(target = Bot.process, args = (thread_id,)) + thread.start() + + @staticmethod + def process(thread_id): + pipe = Pipeline() + while True: + message = pipe.receive() + #time.sleep(0.5) + pipe.send(message) + print "Thread '%s' is sending message '%s'" % (thread_id, message) + pipe.acknowledge() + + + +if __name__ == "__main__": + flush = False + if len(sys.argv) == 2: + flush = True + bot = Bot(flush) + bot.start() + diff --git a/tests/threads-test-poc/base2.py b/tests/threads-test-poc/base2.py new file mode 100644 index 000000000..c02577ea5 --- /dev/null +++ b/tests/threads-test-poc/base2.py @@ -0,0 +1,154 @@ +import sys +import time +import redis +import threading +import Queue +from intelmq.bots.collectors.url.lib import fetch_url + + +# begin - IGNORE THIS CODE +def fill_it(xredis, queue): + print "Fill source queue with %s messages." % MESSAGES_NUMBER + for i in range(0, MESSAGES_NUMBER): + message = "unknown message" + str(i) + xredis.lpush(queue, message) + +class StupidPipeline(): + + def __init__(self): + self.xredis = redis.Redis() + self.source_queue = "mysource" + + def fill(self): + fill_it(self.xredis, self.source_queue) + + def flush(self): + self.xredis.flushall() + + +def get_url_content(url): + try: + return fetch_url( + url, + timeout = 60.0, + chunk_size = 16384, + http_proxy=None, + https_proxy=None + ) + except Exception, e: + print e + +# end - IGNORE THIS CODE + + + + + + + +class Pipeline(): + + def __init__(self): + pool = redis.ConnectionPool(max_connections=10) + self.xredis = redis.Redis(connection_pool=pool) + + self.source_queue = "mysource" + self.internal_queue = "myinternal" + self.destination_queue = "mydestination" + + def set_internal_queue(self): + self.internal_queue = self.internal_queue + + def get_internal_queue(self): + return self.internal_queue + + def fill(self): + fill_it(self.xredis, self.source_queue) + + def flush(self): + self.xredis.flushall() + + def receive(self): + return self.xredis.brpop(self.destination_queue) + + def acknowledge(self): + self.xredis.rpop(self.destination_queue) + + def send(self, message): + self.xredis.lpush(self.destination_queue, message) + +class BotQueue(): + + def start(self): + pipe = Queue.Queue() + for thread_id in range(0, THREADS_NUMBER): + thread = threading.Thread(target = self.process, args = (thread_id, pipe)) + thread.setDaemon(True) + thread.start() + + size = 0 + while True: + print pipe.get() + size += 1 + if size == MESSAGES_NUMBER: + break + sys.exit(1) + + def process(self, thread_id, pipe): + count = 1 + while True: + data = None + while not data: + try: + data = get_url_content("http://127.0.0.1/head.txt") + pipe.put("thread:%s message-num:%s [%s]" % (str(thread_id), count, data.split()[0])) + except: + data = None + time.sleep(0.2) + count += 1 + +class BotRedis(): + + def start(self): + pipe = Pipeline() + for thread_id in range(0, THREADS_NUMBER): + thread = threading.Thread(target = self.process, args = (thread_id, pipe)) + thread.setDaemon(True) + thread.start() + + size = 0 + while True: + print pipe.receive()[1] + size += 1 + if size == MESSAGES_NUMBER: + break + sys.exit(1) + + def process(self, thread_id, pipe): + count = 1 + while True: + data = None + while not data: + try: + data = get_url_content("http://127.0.0.1/sample.txt") + pipe.send("thread:%s message-num:%s [%s]" % (str(thread_id), count, data.split()[0])) + except: + data = None + time.sleep(1) + count += 1 + + + + +THREADS_NUMBER = 2 +MESSAGES_NUMBER = 50 + + +if __name__ == "__main__": + if sys.argv[1] == "redis": + bot = BotRedis() + elif sys.argv[1] == "queue": + bot = BotQueue() + + bot.start() + diff --git a/tests/threads-test-poc/notes.txt b/tests/threads-test-poc/notes.txt new file mode 100644 index 000000000..41c73d461 --- /dev/null +++ b/tests/threads-test-poc/notes.txt @@ -0,0 +1,13 @@ +01:05 - 50,000 eventos - 20 threads por expert +01:05 - 50,000 eventos - 50 threads por expert +01:05 - 50,000 eventos - 1 thread por expert + + +Conclusões: +================================== +Das duas uma: + * o IntelMQ é já muito rápido usando apenas uma thread e portanto o Redis é que está a limitar + * a implementação de threads no IntelMQ está mal feita e portanto é como se apenas estivesse a ser usada uma thread + +Nota: +* por outro lado, os testsbots vem provar que pelo menos as threads trabalham em simultaneo, havendo a possibilidade de o bottlenetck ser a propria conexão TCP devido ao blocking mode. diff --git a/tests/threading-tests/bot.py b/tests/threads-test/bot.py similarity index 100% rename from tests/threading-tests/bot.py rename to tests/threads-test/bot.py diff --git a/tests/threading-tests/conf/BOTS b/tests/threads-test/conf/BOTS similarity index 100% rename from tests/threading-tests/conf/BOTS rename to tests/threads-test/conf/BOTS diff --git a/tests/threading-tests/conf/pipeline.conf b/tests/threads-test/conf/pipeline.conf similarity index 100% rename from tests/threading-tests/conf/pipeline.conf rename to tests/threads-test/conf/pipeline.conf diff --git a/tests/threading-tests/conf/runtime.conf b/tests/threads-test/conf/runtime.conf similarity index 100% rename from tests/threading-tests/conf/runtime.conf rename to tests/threads-test/conf/runtime.conf diff --git a/tests/threading-tests/conf/startup.conf b/tests/threads-test/conf/startup.conf similarity index 100% rename from tests/threading-tests/conf/startup.conf rename to tests/threads-test/conf/startup.conf diff --git a/tests/threading-tests/conf/system.conf b/tests/threads-test/conf/system.conf similarity index 100% rename from tests/threading-tests/conf/system.conf rename to tests/threads-test/conf/system.conf diff --git a/tests/threading-tests/pipeline.py b/tests/threads-test/pipeline.py similarity index 100% rename from tests/threading-tests/pipeline.py rename to tests/threads-test/pipeline.py diff --git a/tests/translation_problems/harmonization.conf b/tests/translation_problems/harmonization.conf new file mode 100644 index 000000000..f90053aa4 --- /dev/null +++ b/tests/translation_problems/harmonization.conf @@ -0,0 +1,122 @@ +{ + "report": { + "raw": { + "type": "Base64", + "description": "test ..." + }, + "feed.name": { + "type": "String", + "description": "test ..." + }, + "feed.url": { + "type": "URL", + "description": "test ..." + } + }, + "event": { + "raw": { + "type": "Base64", + "description": "test ..." + }, + "feed.name": { + "type": "FeedName", + "description": "test ..." + }, + "feed.url": { + "type": "URL", + "description": "test ..." + }, + "time.source": { + "type": "DateTime", + "description": "test ..." + }, + "time.observation": { + "type": "DateTime", + "description": "test ..." + }, + "classification.type": { + "type": "ClassificationType", + "description": "test ..." + }, + "source.ip": { + "type": "IPAddress", + "description": "test ..." + }, + "source.fqdn": { + "type": "DomainName", + "description": "test ..." + }, + "source.reverse_domain_name": { + "type": "DomainName", + "description": "test ..." + }, + "source.url": { + "type": "URL", + "description": "test ..." + }, + "source.port": { + "type": "String", + "description": "test ..." + }, + "source.asn": { + "type": "String", + "description": "test ..." + }, + "source.bgp_prefix": { + "type": "String", + "description": "test ..." + }, + "source.registry": { + "type": "String", + "description": "test ..." + }, + "source.allocated": { + "type": "String", + "description": "test ..." + }, + "source.as_name": { + "type": "String", + "description": "test ..." + }, + "source.geolocation.cc": { + "type": "String", + "description": "test ..." + }, + "source.geolocation.city": { + "type": "String", + "description": "test ..." + }, + "source.geolocation.latitude": { + "type": "String", + "description": "test ..." + }, + "source.geolocation.longitude": { + "type": "String", + "description": "test ..." + }, + "source.abuse_contact": { + "type": "String", + "description": "test ..." + }, + "protocol.application": { + "type": "String", + "description": "test ..." + }, + "malware.name": { + "type": "MalwareName", + "description": "test ..." + }, + "target": { + "type": "String", + "description": "test ..." + }, + "description": { + "type": "String", + "description": "test ..." + }, + "comment": { + "type": "String", + "description": "test ..." + } + } +} diff --git a/tests/translation_problems/harmonization.py b/tests/translation_problems/harmonization.py new file mode 100644 index 000000000..59b00c2ba --- /dev/null +++ b/tests/translation_problems/harmonization.py @@ -0,0 +1,330 @@ +import sys +import dns +import DNS +import pytz +import time +import json +import ipaddr +import base64 +import inspect +import urlparse +import binascii +import datetime +import dateutil.parser + + +class GenericType(): + + @staticmethod + def is_valid(key, value): + if not value: + return False + + if type(value) is not unicode: + return False + + if len(value) == 0: + return False + + return True + + @staticmethod + def sanitize(value): + if not value: + return None + + if type(value) is unicode: + return value.strip() + + if type(value) is str: + try: + value = value.decode('utf-8') + except: + value = value.decode('utf-8', 'ignore') + return value.strip() + + return None + + +class String(GenericType): + + @staticmethod + def is_valid(key, value): + if not GenericType().is_valid(key, value): + return False + + if type(value) is not unicode: + return False + + if len(value) == 0: + return False + + return True + + @staticmethod + def sanitize(value): + return GenericType().sanitize(value) + + +class FeedName(GenericType): + + @staticmethod + def is_valid(key, value): + if not GenericType().is_valid(key, value): + return False + + if value != value.lower(): + return False + + return True + + @staticmethod + def sanitize(value): + value = value.lower() + return GenericType().sanitize(value) + + +class DateTime(GenericType): + + @staticmethod + def is_valid(key, value): + if not GenericType().is_valid(key, value): + return False + + if value != DateTime.__parse(value): + return False + + return True + + @staticmethod + def sanitize(value): + value = DateTime.__parse(value) + return GenericType().sanitize(value) + + @staticmethod + def __parse(value): + try: + value = dateutil.parser.parse(value) + value = value.astimezone(pytz.utc) + value = value.isoformat() + except: + return None + return value.decode("utf-8") + + @staticmethod + def generate_datetime_now(): + value = datetime.datetime.now(pytz.timezone('UTC')) + value = value.replace(microsecond=0) + value = value.isoformat() + return value.decode("utf-8") + + +class IPAddress(GenericType): + + @staticmethod + def is_valid(key, value): + if not GenericType().is_valid(key, value): + return False + + try: + ipaddr.IPAddress(value) + except: + return False + + return True + + @staticmethod + def sanitize(value): + + try: + network = ipaddr.IPNetwork(value) + except: + return None + + if network.numhosts == 1: + value = str(network.network) + else: + return None + + return GenericType().sanitize(value) + + @staticmethod + def to_int(value): + try: + ip_integer = socket.inet_pton(socket.AF_INET, ip) + except socket.error: + try: + ip_integer = socket.inet_pton(socket.AF_INET6, ip) + except socket.error: + return None + + ip_integer = int(binascii.hexlify(ip_integer), 16) + return ip_integer + + @staticmethod + def version(value): + return unicode(ipaddr.IPAddress(value).version) + + @staticmethod + def to_reverse(ip): + return unicode(dns.reversename.from_address(ip)) + + +class DomainName(GenericType): + + @staticmethod + def is_valid(key, value): + if not GenericType().is_valid(key, value): + return False + + if IPAddress().is_valid(key, value): + return False + + if URL().is_valid(key, value): + return False + + if not len(value.split('.')) > 1: + return False + + return True + + @staticmethod + def sanitize(value): + return GenericType().sanitize(value) + + @staticmethod + def to_ip(value): + try: + value = DNS.dnslookup('www.google.com', 'A') + except: + value = None + return value + + +class MalwareName(GenericType): + + @staticmethod + def is_valid(key, value): + if not GenericType().is_valid(key, value): + return False + + if value != value.lower(): + return False + + return True + + @staticmethod + def sanitize(value): + value = value.lower() + return GenericType().sanitize(value) + + +class Base64(GenericType): + + @staticmethod + def is_valid(key, value): + if not GenericType().is_valid(key, value): + return False + + try: + base64.b64decode(value) + except: + return False + + return True + + @staticmethod + def sanitize(value): + value = GenericType().sanitize(value) + value = base64.b64encode(value) + return GenericType().sanitize(value) + + +class URL(GenericType): + + @staticmethod + def is_valid(key, value): + if not GenericType().is_valid(key, value): + return False + + result = urlparse.urlparse(value) + if result.netloc == "": + return False + + return True + + @staticmethod + def sanitize(value): + if "hxxp://" in value: + value = value.replace('hxxp://', 'http://') + + if "hxxps://" in value: + value = value.replace('hxxps://', 'https://') + + tests = [ + value, + "http://" + value, + "http://" + value + "/" + ] + + for value in tests: + result = urlparse.urlparse(value) + if result.netloc != "": + return GenericType().sanitize(value) + + return None + + @staticmethod + def to_ip(url): + value = urlparse.urlparse(url) + if value.netloc != "": + return DomainName().to_ip(value.netloc) + return None + + @staticmethod + def to_domain_name(url): + value = urlparse(url) + if value.netloc != "" and not IPAddress.is_valid(value.netloc): + return value.netloc + return None + + +class ClassificationType(GenericType): + + __allowed_values = ['spam', + 'malware', + 'botnet drone', + 'ransomware', + 'malware configuration', + 'c&c', + 'scanner', + 'exploit', + 'brute-force', + 'ids alert', + 'defacement', + 'compromised', + 'backdoor', + 'ddos', + 'dropzone', + 'phishing', + 'vulnerable service', + 'blacklist', + 'unknown' + ] + + @staticmethod + def is_valid(key, value): + if not GenericType().is_valid(key, value): + return False + + if type(value) is not unicode: + return False + + if not value in ClassificationType().__allowed_values: + return False + + return True + + @staticmethod + def sanitize(value): + return GenericType().sanitize(value) diff --git a/intelmq/bots/parsers/hpfeeds/__init__.py b/tests/translation_problems/malwaredomainlist/__init__.py similarity index 100% rename from intelmq/bots/parsers/hpfeeds/__init__.py rename to tests/translation_problems/malwaredomainlist/__init__.py diff --git a/tests/translation_problems/malwaredomainlist/parser.py b/tests/translation_problems/malwaredomainlist/parser.py new file mode 100644 index 000000000..17d1efbbd --- /dev/null +++ b/tests/translation_problems/malwaredomainlist/parser.py @@ -0,0 +1,54 @@ +import unicodecsv +from cStringIO import StringIO +from intelmq.lib import utils +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime + +class MalwareDomainListParserBot(Bot): + + def process(self): + report = self.receive_message() + + if not report.contains("raw"): + self.acknowledge_message() + + columns = [ + "time.source", + "source.url", + "source.ip", + "source.reverse_domain_name", + "malware.name", + "__IGNORE__", + "source.asn" + ] + + raw_report = utils.base64_decode(report.value("raw")) + for row in unicodecsv.reader(StringIO(raw_report), encoding='utf-8', errors='ignore'): + event = Event() + + for key, value in zip(columns, row): + + if key is "__IGNORE__": + continue + + if key is "time.source": + value = value.replace('_',' ') + value += " UTC" + + event.add(key, value, sanitize=True) + + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add('feed.name', u'malwaredomainslist') + event.add('feed.url', u'http://www.malwaredomainlist.com/updatescsv.php') + event.add('classification.type', u'malware') # FIXME + event.add("raw", ",".join(row), sanitize=True) + + self.send_message(event) + self.acknowledge_message() + + +if __name__ == "__main__": + bot = MalwareDomainListParserBot(sys.argv[1]) + bot.start() diff --git a/intelmq/bots/experts/cymru/__init__.py b/tests/translation_problems/phishtank/__init__.py similarity index 100% rename from intelmq/bots/experts/cymru/__init__.py rename to tests/translation_problems/phishtank/__init__.py diff --git a/tests/translation_problems/phishtank/parser.py b/tests/translation_problems/phishtank/parser.py new file mode 100644 index 000000000..d35260968 --- /dev/null +++ b/tests/translation_problems/phishtank/parser.py @@ -0,0 +1,51 @@ +import unicodecsv +from cStringIO import StringIO +from intelmq.lib import utils +from intelmq.lib.bot import Bot, sys +from intelmq.lib.message import Event +from intelmq.lib.harmonization import DateTime + +class PhishTankParserBot(Bot): + + def process(self): + report = self.receive_message() + + if not report.contains("raw"): + self.acknowledge_message() + + columns = ["__IGNORE__", "source.url", "description", "time.source", "__IGNORE__", "__IGNORE__", "__IGNORE__", "target"] + + raw_report = utils.base64_decode(report.value("raw")) + for row in unicodecsv.reader(StringIO(raw_report), encoding='utf-8', errors='ignore'): + + self.logger.error("raw: %s - %s" % (type(row), row)) + + # ignore headers + if "phish_id" in row: + continue + + event = Event() + + for key, value in zip(columns, row): + if key == "__IGNORE__": + continue + + event.add(key, value, sanitize=True) + + time_observation = DateTime().generate_datetime_now() + event.add('time.observation', time_observation, sanitize=True) + event.add('feed.name', u'phishtank') + event.add('feed.url', u'http://data.phishtank.com/data/< API KEY >/online-valid.csv') + event.add('classification.type', u'phishing') + res = ",".join(row) + self.logger.error("res: %r - %r" % (type(res), res)) + event.add("raw", res, sanitize=True) + #event.add("raw", ",".join(row), sanitize=True) + + self.send_message(event) + self.acknowledge_message() + + +if __name__ == "__main__": + bot = PhishTankParserBot(sys.argv[1]) + bot.start()