Skip to content

Commit

Permalink
FIX/ENH: HttpMixin refactored and various fixes
Browse files Browse the repository at this point in the history
**General**
Removed 'requests' MissingDependencyError, because requests is a core lib from intelmq
Removed HTTP variables from Bot class in favor of HttpMixin
Removed trying to import requests in pipeline, its a core lib from intelmq
Added additional configuration variables to HttpMixin ( from Bot class )

**Bots**
GitHub API is now using HttpMixin
MS Azure Collector is now using HttpMixin
DO-Portal Expert is now using HttpMixin
GeoHash using MissingDependencyError instead of ValueError (consistency)
HttpContentExpert is now using HttpMixin
HttpStatusExpert is now using HttpMixin
NationalCERTContactCertATExpert is now using HttpMixin
RDAPExpert is now using HttpMixin
RIPEExpert is now using HttpMixin
SplunkSavedSearchExpert is now using HttpMixin
TuencyExpert is now using HttpMixin
RestAPIOutput is now using HttpMixin

**Bot tests**
GitHub API Collector is now using requests_mock instead of MagicMock (consistency)
RestAPI Output is now using correct headers

Fixes #2150
Fixes #2137

Signed-off-by: Sebastian Waldbauer <waldbauer@cert.at>
  • Loading branch information
waldbauer-certat committed Feb 4, 2022
1 parent 8d1c926 commit 60181b1
Show file tree
Hide file tree
Showing 21 changed files with 95 additions and 241 deletions.
4 changes: 0 additions & 4 deletions intelmq/bots/collectors/github_api/REQUIREMENTS.txt

This file was deleted.

16 changes: 5 additions & 11 deletions intelmq/bots/collectors/github_api/_collector_github_api.py
Expand Up @@ -7,13 +7,10 @@
GITHUB API Collector bot
"""
import base64
from requests import exceptions

from intelmq.lib.bot import CollectorBot

try:
import requests
except ImportError:
requests = None
from intelmq.lib.mixins import HttpMixin

static_params = {
'headers': {
Expand All @@ -22,14 +19,11 @@
}


class GithubAPICollectorBot(CollectorBot):
class GithubAPICollectorBot(CollectorBot, HttpMixin):
basic_auth_username = None
basic_auth_password = None

def init(self):
if requests is None:
raise ValueError('Could not import requests. Please install it.')

self.__user_headers = static_params['headers']
if self.basic_auth_username is not None and self.basic_auth_password is not None:
self.__user_headers.update(self.__produce_auth_header(self.basic_auth_username, self.basic_auth_password))
Expand All @@ -47,13 +41,13 @@ def process_request(self):

def github_api(self, api_path: str, **kwargs) -> dict:
try:
response = requests.get(f"{api_path}", params=kwargs, headers=self.__user_headers)
response = self.http_get(api_path, headers=self.__user_headers, params=kwargs)
if response.status_code == 401:
# bad credentials
raise ValueError(response.json()['message'])
else:
return response.json()
except requests.RequestException:
except exceptions.RequestException:
raise ValueError(f"Unknown repository {api_path!r}.")

@staticmethod
Expand Down
Expand Up @@ -14,17 +14,14 @@
'regex': file regex (DEFAULT = '*.json')
"""
import re
from requests import exceptions

from intelmq.lib.exceptions import InvalidArgument
from intelmq.bots.collectors.github_api._collector_github_api import GithubAPICollectorBot
from intelmq.lib.mixins import HttpMixin

try:
import requests
except ImportError:
requests = None


class GithubContentsAPICollectorBot(GithubAPICollectorBot):
class GithubContentsAPICollectorBot(GithubAPICollectorBot, HttpMixin):
"Collect files from a GitHub repository via the API. Optionally with GitHub credentials."
regex: str = None # TODO: could be re
repository: str = None
Expand Down Expand Up @@ -62,7 +59,7 @@ def process_request(self):
if item['extra'] != {}:
report.add('extra.file_metadata', item['extra'])
self.send_message(report)
except requests.RequestException as e:
except exceptions.RequestException as e:
raise ConnectionError(e)

def __recurse_repository_files(self, base_api_url: str, extracted_github_files: list = None) -> list:
Expand All @@ -75,7 +72,7 @@ def __recurse_repository_files(self, base_api_url: str, extracted_github_files:
elif github_file['type'] == 'file' and bool(re.search(self.regex, github_file['name'])):
extracted_github_file_data = {
'download_url': github_file['download_url'],
'content': requests.get(github_file['download_url']).content,
'content': self.http_get(github_file['download_url']).content,
'extra': {}
}
for field_name in self.__extra_fields:
Expand Down
3 changes: 2 additions & 1 deletion intelmq/bots/collectors/mail/collector_mail_url.py
Expand Up @@ -8,6 +8,7 @@
"""
import io
import re
from requests import exceptions

from intelmq.lib.mixins import HttpMixin
from intelmq.lib.splitreports import generate_reports
Expand Down Expand Up @@ -50,7 +51,7 @@ def process_message(self, uid, message):
self.logger.info("Downloading report from %r.", url)
try:
resp = self.http_get(url)
except requests.exceptions.Timeout:
except exceptions.Timeout:
self.logger.error("Request timed out %i times in a row." %
self.http_timeout_max_tries)
erroneous = True
Expand Down
4 changes: 2 additions & 2 deletions intelmq/bots/collectors/microsoft/collector_azure.py
Expand Up @@ -11,7 +11,7 @@

from intelmq.lib.bot import CollectorBot
from intelmq.lib.exceptions import MissingDependencyError
from intelmq.lib.mixins import CacheMixin
from intelmq.lib.mixins import CacheMixin, HttpMixin

try:
from azure.storage.blob import ContainerClient
Expand All @@ -23,7 +23,7 @@
create_configuration = None # noqa


class MicrosoftAzureCollectorBot(CollectorBot, CacheMixin):
class MicrosoftAzureCollectorBot(CollectorBot, CacheMixin, HttpMixin):
"Fetch data blobs from a Microsoft Azure container"
connection_string: str = "<insert your connection string here>"
container_name: str = "<insert the container name>"
Expand Down
20 changes: 3 additions & 17 deletions intelmq/bots/experts/do_portal/expert.py
Expand Up @@ -8,40 +8,26 @@
a "502 Bad Gateway" status code is treated the same as a timeout,
i.e. will be retried instead of a fail.
"""
try:
import requests
except ImportError:
requests = None

from intelmq.lib.mixins import HttpMixin
import intelmq.lib.utils as utils
from intelmq.lib.bot import ExpertBot


class DoPortalExpertBot(ExpertBot):
class DoPortalExpertBot(ExpertBot, HttpMixin):
"""Retrieve abuse contact information for the source IP address from a do-portal instance"""
mode: str = "append"
portal_api_key: str = None
portal_url: str = None

def init(self):
if requests is None:
raise ValueError("Library 'requests' could not be loaded. Please install it.")

self.set_request_parameters()

self.url = self.portal_url + '/api/1.0/ripe/contact?cidr=%s'
self.http_header.update({
"Content-Type": "application/json",
"Accept": "application/json",
"API-Authorization": self.portal_api_key
})

self.session = utils.create_request_session(self)
retries = requests.urllib3.Retry.from_int(self.http_timeout_max_tries)
retries.status_forcelist = [502]
adapter = requests.adapters.HTTPAdapter(max_retries=retries)
self.session.mount('http://', adapter)
self.session.mount('https://', adapter)
self.session = self.http_session()

def process(self):
event = self.receive_message()
Expand Down
3 changes: 2 additions & 1 deletion intelmq/bots/experts/geohash/expert.py
Expand Up @@ -9,6 +9,7 @@
https://github.com/joyanujoy/geolib
'''
from intelmq.lib.bot import ExpertBot
from intelmq.lib.exceptions import MissingDependencyError

try:
from geolib import geohash
Expand All @@ -23,7 +24,7 @@ class GeohashExpertBot(ExpertBot):

def init(self):
if not geohash:
raise ValueError("Library 'geolib' is required, please install it.")
raise MissingDependencyError("geolib")

def process(self):
event = self.receive_message()
Expand Down
7 changes: 3 additions & 4 deletions intelmq/bots/experts/http/expert_content.py
Expand Up @@ -7,10 +7,10 @@
from typing import List

from intelmq.lib.bot import ExpertBot
from intelmq.lib.utils import create_request_session
from intelmq.lib.mixins import HttpMixin


class HttpContentExpertBot(ExpertBot):
class HttpContentExpertBot(ExpertBot, HttpMixin):
"""
Test if a given string is part of the content for a given URL
Expand All @@ -29,8 +29,7 @@ class HttpContentExpertBot(ExpertBot):
__session = None

def init(self):
self.set_request_parameters()
self.__session = create_request_session(self)
self.__session = self.http_session()

def process(self):
event = self.receive_message()
Expand Down
6 changes: 3 additions & 3 deletions intelmq/bots/experts/http/expert_status.py
Expand Up @@ -8,9 +8,10 @@

from intelmq.lib.bot import ExpertBot
from intelmq.lib.utils import create_request_session
from intelmq.lib.mixins import HttpMixin


class HttpStatusExpertBot(ExpertBot):
class HttpStatusExpertBot(ExpertBot, HttpMixin):
"""
Fetch the HTTP Status for a given URL
Expand All @@ -31,8 +32,7 @@ def process(self):
event = self.receive_message()

if self.field in event:
self.set_request_parameters()
session = create_request_session(self)
session = self.http_session()

try:
response = session.get(event[self.field])
Expand Down
14 changes: 3 additions & 11 deletions intelmq/bots/experts/national_cert_contact_certat/expert.py
Expand Up @@ -20,30 +20,22 @@
"""

from intelmq.lib.bot import ExpertBot
from intelmq.lib.mixins import HttpMixin
from intelmq.lib.utils import create_request_session
from intelmq.lib.exceptions import MissingDependencyError

try:
import requests
except ImportError:
requests = None


URL = 'https://contacts.cert.at/cgi-bin/abuse-nationalcert.pl'


class NationalCERTContactCertATExpertBot(ExpertBot):
class NationalCERTContactCertATExpertBot(ExpertBot, HttpMixin):
"""Add country and abuse contact information from the CERT.at national CERT Contact Database. Set filter to true if you want to filter out events for Austria. Set overwrite_cc to true if you want to overwrite an existing country code value"""
filter: bool = False
http_verify_cert: bool = True
overwrite_cc: bool = False

def init(self):
if requests is None:
raise MissingDependencyError("requests")

self.set_request_parameters()
self.session = create_request_session(self)
self.session = self.http_session()

def process(self):
event = self.receive_message()
Expand Down
19 changes: 5 additions & 14 deletions intelmq/bots/experts/rdap/expert.py
Expand Up @@ -3,18 +3,13 @@
# SPDX-License-Identifier: AGPL-3.0-or-later

# -*- coding: utf-8 -*-
import requests
from intelmq.lib.bot import ExpertBot
from intelmq.lib.utils import create_request_session
from intelmq.lib.exceptions import MissingDependencyError
from intelmq.lib.mixins import CacheMixin
from intelmq.lib.mixins import CacheMixin, HttpMixin

try:
import requests
except ImportError:
requests = None


class RDAPExpertBot(ExpertBot, CacheMixin):
class RDAPExpertBot(ExpertBot, CacheMixin, HttpMixin):
""" Get RDAP data"""
rdap_order: list = ['abuse', 'technical', 'administrative', 'registrant', 'registrar']
rdap_bootstrapped_servers: dict = {}
Expand All @@ -30,11 +25,7 @@ class RDAPExpertBot(ExpertBot, CacheMixin):
__session: requests.Session

def init(self):
if requests is None:
raise MissingDependencyError("requests")

self.set_request_parameters()
self.__session = create_request_session(self)
self.__session = self.http_session()

# get overall rdap data from iana
resp = self.__session.get('https://data.iana.org/rdap/dns.json')
Expand Down Expand Up @@ -73,7 +64,7 @@ def process(self):
if result:
event.add('source.abuse_contact', result, overwrite=self.overwrite)
else:
self.__session = create_request_session(self)
self.__session = self.http_session()
domain_parts = url.split('.')
domain_suffix = None
while domain_suffix is None:
Expand Down
23 changes: 5 additions & 18 deletions intelmq/bots/experts/ripe/expert.py
Expand Up @@ -15,12 +15,7 @@
import intelmq.lib.utils as utils
from intelmq.lib.bot import ExpertBot
from intelmq.lib.exceptions import MissingDependencyError
from intelmq.lib.mixins import CacheMixin

try:
import requests
except ImportError:
requests = None
from intelmq.lib.mixins import CacheMixin, HttpMixin


STATUS_CODE_ERROR = 'HTTP status code was {}. Possible problem at the connection endpoint or network issue.'
Expand All @@ -41,7 +36,7 @@ def clean_geo(geo_data):
return geo_data


class RIPEExpertBot(ExpertBot, CacheMixin):
class RIPEExpertBot(ExpertBot, CacheMixin, HttpMixin):
"""Fetch abuse contact and/or geolocation information for the source and/or destination IP addresses and/or ASNs of the events"""
mode: str = "append"
query_ripe_db_asn: bool = True
Expand Down Expand Up @@ -77,22 +72,14 @@ class RIPEExpertBot(ExpertBot, CacheMixin):
}

def init(self):
if requests is None:
raise MissingDependencyError("requests")

self.__query = {
"db_asn": self.query_ripe_db_asn,
"db_ip": self.query_ripe_db_ip,
"stat_asn": self.query_ripe_stat_asn,
"stat_ip": self.query_ripe_stat_ip,
"stat_geo": self.query_ripe_stat_geolocation,
}

self.__initialize_http_session()

def __initialize_http_session(self):
self.set_request_parameters()
self.http_session = utils.create_request_session(self)
self.session = self.http_session()

def process(self):
event = self.receive_message()
Expand Down Expand Up @@ -134,8 +121,8 @@ def __perform_cached_query(self, type, resource):
else:
return json.loads(cached_value)
else:
response = self.http_session.get(self.QUERY[type].format(resource),
data="", timeout=self.http_timeout_sec)
response = self.session.get(self.QUERY[type].format(resource),
data="", timeout=self.http_timeout_sec)

if response.status_code != 200:
if type == 'db_asn' and response.status_code == 404:
Expand Down

0 comments on commit 60181b1

Please sign in to comment.