Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Plugin, Connector, controllers, dataclasses, tons of refactoring #524

Merged
merged 46 commits into from Jul 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
a1b3b59
Refactor BaseAnalyzerMixin to add connectors base class
sp35 Jul 3, 2021
8dd57cf
Fix: remove unique constraint from connector name
sp35 Jul 6, 2021
f1b8849
Add tasks for triggering connectors
sp35 Jul 6, 2021
9a40b78
Add start connectors method
sp35 Jul 6, 2021
e723dad
Add dummy misp connector
sp35 Jul 6, 2021
064812a
core: Plugin class, _read_secrets in serializer
eshaan7 Jul 8, 2021
90262b0
analyzers_manager: controller.py, modify classes, use Plugin class
eshaan7 Jul 8, 2021
b781b37
connectors_manager: controller.py, modify classes.py, use Plugin class
eshaan7 Jul 8, 2021
bd5d484
intelowl.tasks: use controller modules, refactor
eshaan7 Jul 8, 2021
c9e0e14
api_app: use controller module, refactor
eshaan7 Jul 8, 2021
b8e08d8
test_files: small adjust
eshaan7 Jul 8, 2021
7eb3fea
moved runtime_config field to AnalyzerReport, updated migrations
m0mosenpai Jul 9, 2021
94f60eb
misc fixes and refactoring (moved filter_analyzers() to controller.py)
m0mosenpai Jul 9, 2021
9b7bd6e
misc fixes
m0mosenpai Jul 10, 2021
e33afa0
removed extra general.py file
m0mosenpai Jul 10, 2021
450d46b
fixes for observable analyzers
m0mosenpai Jul 11, 2021
3f404fe
fixes for file analyzers
m0mosenpai Jul 11, 2021
ec540fe
misc fixes and refactoring
m0mosenpai Jul 11, 2021
0a2721c
api_app: fix runtime_configuration, check_stuck_analysis cron
eshaan7 Jul 12, 2021
ad787b6
small fixes
m0mosenpai Jul 12, 2021
4b79907
replaced hardcoded observable_classification types with Enum values
m0mosenpai Jul 12, 2021
807b2b8
Fix classes, controller, move tasks into controller
eshaan7 Jul 12, 2021
93ef85b
fixip
eshaan7 Jul 12, 2021
aa51209
adjusts to analyzer_config.json
eshaan7 Jul 12, 2021
7d057b1
New way to run analyzer tests
eshaan7 Jul 12, 2021
9a02295
[fixes] analyzer: darksearch
eshaan7 Jul 12, 2021
6e9b66f
fix merge conflicts
eshaan7 Jul 12, 2021
2587e7e
api_app.core: set_param not abstract_method
eshaan7 Jul 12, 2021
c545d9e
analyzer_manager: fix ObservableTypes import in scripts
eshaan7 Jul 12, 2021
5263ee9
fixup: New way to run analyzer tests
eshaan7 Jul 12, 2021
9990820
fixup: New way to run analyzer tests
eshaan7 Jul 12, 2021
6fe7a0e
api_app.core: set_param not abstract_method, fixup
eshaan7 Jul 12, 2021
ee468ad
analyzer: mb_google refactor
eshaan7 Jul 12, 2021
1f9cd48
analyzer: Pulsedive, fixes & refactor
eshaan7 Jul 12, 2021
8aad3c2
Fix: invalidate cache before status check
sp35 Jul 12, 2021
60e2560
Add connectors called in job list serializer
sp35 Jul 12, 2021
da1c4a2
fix formatting
m0mosenpai Jul 12, 2021
a3b1767
Fix tests and linting
sp35 Jul 13, 2021
aab76b0
Fix import in darksearch
sp35 Jul 13, 2021
9042261
unique_together constrait in report models
eshaan7 Jul 13, 2021
9739307
fixes(analyzer): in honeydb and mb_get modules
eshaan7 Jul 13, 2021
a1ea786
core: AbstractConfig and other dataclasses
eshaan7 Jul 13, 2021
aea2f49
small adjusts to analyzer_config.json
eshaan7 Jul 13, 2021
b25b000
analyzers_manager: AnalyzerConfig dataclass, refactor controller, ser…
eshaan7 Jul 13, 2021
69e611c
left over changes
eshaan7 Jul 13, 2021
b388e58
analyzers_manager: fixup
eshaan7 Jul 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
172 changes: 67 additions & 105 deletions api_app/analyzers_manager/classes.py
@@ -1,68 +1,70 @@
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
# See the file 'LICENSE' for copying permission.

from django.utils import timezone
import traceback
import time
import logging
import requests
import json

from abc import ABCMeta, abstractmethod
from celery.exceptions import SoftTimeLimitExceeded
from abc import ABCMeta

from api_app.exceptions import (
AnalyzerRunNotImplemented,
AnalyzerRunException,
AnalyzerConfigurationException,
)
from api_app.models import Job
from api_app.core.classes import Plugin
from api_app.helpers import generate_sha256

from .models import AnalyzerReport
from .serializers import AnalyzerConfigSerializer

logger = logging.getLogger(__name__)


class BaseAnalyzerMixin(metaclass=ABCMeta):
class BaseAnalyzerMixin(Plugin):
"""
Abstract Base class for Analyzers.
Never inherit from this branch,
always use either one of ObservableAnalyzer or FileAnalyzer classes.
"""

__job_id: int
analyzer_name: str
report: dict

@property
def job_id(self):
return self.__job_id
def analyzer_name(self) -> str:
return self._config_dict["name"]

@abstractmethod
def before_run(self):
def init_report_object(self) -> AnalyzerReport:
"""
function called directly before run function.
Returns report object set in *start* fn
"""
return AnalyzerReport.objects.create(
job_id=self.job_id,
analyzer_name=self.analyzer_name,
report={},
errors=[],
status=AnalyzerReport.Statuses.PENDING.name,
runtime_configuration=self.kwargs.get("runtime_conf", {}),
)

@abstractmethod
def run(self):
def get_exceptions_to_catch(self):
"""
Called from *start* fn and wrapped in a try-catch block.
Should be overwritten in child class
:returns report: JSON
Returns additional exceptions to catch when running *start* fn
"""
raise AnalyzerRunNotImplemented(self.analyzer_name)
return (
AnalyzerConfigurationException,
AnalyzerRunException,
)

@abstractmethod
def after_run(self):
"""
function called after run function.
"""
def get_serializer_class(self) -> AnalyzerConfigSerializer:
return AnalyzerConfigSerializer

def set_config(self, additional_config_params):
def get_error_message(self, err, is_base_err=False):
"""
function to parse additional_config_params.
verify params, API keys, etc.
In most cases, this would be overwritten.
Returns error message for
*_handle_analyzer_exception* and *_handle_base_exception* fn
"""
return (
f"{self.__repr__()}."
f" {'Unexpected error' if is_base_err else 'Analyzer error'}: '{err}'"
)

def _validate_result(self, result, level=0, max_recursion=190):
"""
Expand Down Expand Up @@ -94,60 +96,11 @@ def _validate_result(self, result, level=0, max_recursion=190):
result = 9223372036854775807
return result

def start(self):
"""
Entrypoint function to execute the analyzer.
calls `before_run`, `run`, `after_run`
in that order with exception handling.
"""
try:
self.before_run()
self.report = Job.init_report(self.analyzer_name, self.job_id)
result = self.run()
result = self._validate_result(result)
self.report.report = result
except (
AnalyzerConfigurationException,
AnalyzerRunException,
SoftTimeLimitExceeded,
) as e:
self._handle_analyzer_exception(e)
except Exception as e:
self._handle_base_exception(e)
else:
self.report.status = self.report.Statuses.SUCCESS.name

# add end time of process
self.report.end_time = timezone.now()

self.after_run()
self.report.save()

return self.report

def _handle_analyzer_exception(self, err):
error_message = (
f"job_id:{self.job_id}, analyzer: '{self.analyzer_name}'."
f" Analyzer error: '{err}'"
)
logger.error(error_message)
self.report.errors.append(str(err))
self.report.status = self.report.Statuses.FAILED.name

def _handle_base_exception(self, err):
traceback.print_exc()
error_message = (
f"job_id:{self.job_id}, analyzer:'{self.analyzer_name}'."
f" Unexpected error: '{err}'"
)
logger.exception(error_message)
self.report.errors.append(str(err))
self.report.status = self.report.Statuses.FAILED.name
def before_run(self):
self.report.update_status(status=self.report.Statuses.RUNNING.name)

def __init__(self, analyzer_name, job_id, additional_config_params):
self.analyzer_name = analyzer_name
self.__job_id = job_id
self.set_config(additional_config_params) # lgtm [py/init-calls-subclass]
def after_run(self):
self.report.report = self._validate_result(self.report.report)

def __repr__(self):
return f"({self.analyzer_name}, job_id: #{self.job_id})"
Expand All @@ -157,32 +110,39 @@ class ObservableAnalyzer(BaseAnalyzerMixin):
"""
Abstract class for Observable Analyzers.
Inherit from this branch when defining a IP, URL or domain analyzer.
Need to overrwrite `set_config(self, additional_config_params)`
Need to overrwrite `set_params(self, params)`
and `run(self)` functions.
"""

observable_name: str
observable_classification: str

def __init__(
self,
analyzer_name,
job_id,
obs_name,
obs_classification,
additional_config_params,
):
self.observable_name = obs_name
self.observable_classification = obs_classification
super().__init__(analyzer_name, job_id, additional_config_params)
def __init__(self, config_dict: dict, job_id: int, **kwargs):
super(ObservableAnalyzer, self).__init__(config_dict, job_id, **kwargs)
# check if we should run the hash instead of the binary
if self._job.is_sample and config_dict.get("run_hash", False):
self.observable_classification = (
AnalyzerConfigSerializer.self._serializer.ObservableTypes.HASH
)
# check which kind of hash the analyzer needs
run_hash_type = config_dict["run_hash_type"]
if run_hash_type == AnalyzerConfigSerializer.HashChoices.MD5:
self.observable_name = self._job.md5
else:
self.observable_name = generate_sha256(self.job_id)
else:
self.observable_name = self._job.observable_name
self.observable_classification = self._job.observable_classification

def before_run(self):
super().before_run()
logger.info(
f"STARTED analyzer: {self.__repr__()} -> "
f"Observable: {self.observable_name}."
)

def after_run(self):
super().after_run()
logger.info(
f"FINISHED analyzer: {self.__repr__()} -> "
f"Observable: {self.observable_name}."
Expand All @@ -193,29 +153,31 @@ class FileAnalyzer(BaseAnalyzerMixin):
"""
Abstract class for File Analyzers.
Inherit from this branch when defining a file analyzer.
Need to overrwrite `set_config(self, additional_config_params)`
Need to overrwrite `set_params(self, params)`
and `run(self)` functions.
"""

md5: str
filepath: str
filename: str
file_mimetype: str

def __init__(
self, analyzer_name, job_id, fpath, fname, md5, additional_config_params
):
self.md5 = md5
self.filepath = fpath
self.filename = fname
super().__init__(analyzer_name, job_id, additional_config_params)
def __init__(self, config_dict: dict, job_id: int, **kwargs):
super(FileAnalyzer, self).__init__(config_dict, job_id, **kwargs)
self.md5 = self._job.md5
self.filepath = self._job.file.path
self.filename = self._job.filename
self.file_mimetype = self._job.file_mimetype

def before_run(self):
super().before_run()
logger.info(
f"STARTED analyzer: {self.__repr__()} -> "
f"File: ({self.filename}, md5: {self.md5})"
)

def after_run(self):
super().after_run()
logger.info(
f"FINISHED analyzer: {self.__repr__()} -> "
f"File: ({self.filename}, md5: {self.md5})"
Expand Down
34 changes: 34 additions & 0 deletions api_app/analyzers_manager/constants.py
@@ -0,0 +1,34 @@
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
# See the file 'LICENSE' for copying permission.

from enum import Enum


class TypeChoices(Enum):
FILE = "file"
OBSERVABLE = "observable"

@classmethod
def aslist(cls) -> list:
return [c.value for c in cls]


class HashChoices(Enum):
MD5 = "md5"
SHA256 = "sha256"

@classmethod
def aslist(cls) -> list:
return [c.value for c in cls]


class ObservableTypes(Enum):
IP = "ip"
URL = "url"
DOMAIN = "domain"
HASH = "hash"
GENERIC = "generic"

@classmethod
def aslist(cls) -> list:
return [c.value for c in cls]