Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Malware bazaar ingestor #2259

Merged
merged 39 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
e4f450f
added malware bazaar ingestor
federicofantini Apr 4, 2024
8ed18f1
typo
federicofantini Apr 4, 2024
1d27bf5
added support to delayed celery jobs startup for ingestors
federicofantini Apr 9, 2024
6497a98
moved url to config parameter
federicofantini Apr 10, 2024
5ac7d17
fixed wrong access to observable name
federicofantini Apr 10, 2024
b3d20a3
changed timedelta from class to object
federicofantini Apr 11, 2024
e359d83
added _monkeypatch()
federicofantini Apr 11, 2024
87d9a3d
omitted full_name field and generate ingestors plugin config
federicofantini Apr 11, 2024
570c62e
added threatfox url migration
federicofantini Apr 11, 2024
e636124
Merge remote-tracking branch 'public/develop' into malware_bazaar_inj…
federicofantini Apr 11, 2024
90e30e6
fixed linter
federicofantini Apr 11, 2024
9913052
fixed linter
federicofantini Apr 11, 2024
30ca00a
fixed linter
federicofantini Apr 11, 2024
8088cf4
fixed linter
federicofantini Apr 11, 2024
5eda409
fixed linter
federicofantini Apr 11, 2024
718ff36
fixed linter
federicofantini Apr 11, 2024
ee48aaf
fixed linter
federicofantini Apr 11, 2024
01bf119
fixed linter
federicofantini Apr 11, 2024
0682935
updated threatfox migration
federicofantini Jun 3, 2024
bb6e436
Merge remote-tracking branch 'public/develop' into malware_bazaar_inj…
federicofantini Jun 3, 2024
ff7cf12
changed migration order
federicofantini Jun 3, 2024
a60eab2
fixed reverse migrations
federicofantini Jun 3, 2024
570b047
fixed default signatures
federicofantini Jun 3, 2024
33fc56a
fixed default signatures
federicofantini Jun 3, 2024
bb5e1c6
added malware bazaar userprofile
federicofantini Jun 5, 2024
2164e00
isort
federicofantini Jun 5, 2024
a44d70e
added default value to timedelta
federicofantini Jun 5, 2024
d530b74
fixed delay parameter default value and int conversion
federicofantini Jun 5, 2024
63544a4
fixed userprofile dumpplugin
federicofantini Jun 5, 2024
33ffb01
reduced code complexity and fixed generator job creation
federicofantini Jun 10, 2024
8d946bb
fixed deepsource warnings
federicofantini Jun 10, 2024
ff7fa2a
Merge remote-tracking branch 'public/develop' into malware_bazaar_inj…
federicofantini Jun 10, 2024
f419a5a
fixed deepsoruce cyclic import
federicofantini Jun 10, 2024
9e7e6b3
changed order PivotConfigurationException
federicofantini Jun 10, 2024
1892e4d
Merge branch 'develop' into malware_bazaar_injestor
federicofantini Jul 1, 2024
7f7b814
made code review changes
federicofantini Jul 1, 2024
75a450c
fixed errors
federicofantini Jul 1, 2024
ad63bc0
Merge branch 'develop' into malware_bazaar_injestor
federicofantini Jul 1, 2024
7edd869
fixed errors
federicofantini Jul 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion api_app/classes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import logging
import traceback
import typing
Expand All @@ -7,6 +8,7 @@
import requests
from billiard.exceptions import SoftTimeLimitExceeded
from django.conf import settings
from django.core.files import File
from django.utils import timezone
from django.utils.functional import cached_property
from requests import HTTPError
Expand Down Expand Up @@ -121,9 +123,22 @@
self.report.save()

def after_run_success(self, content: typing.Any):
# exhaust generator
if isinstance(content, typing.Generator):
content = list(content)
self.report.report = content
# avoiding JSON serialization errors for types: File and bytes
report_content = content
if isinstance(report_content, typing.List):
if all(isinstance(n, File) for n in report_content):
report_content = [
base64.b64encode(f.read()).decode("utf-8") for f in report_content
]
elif all(isinstance(n, bytes) for n in report_content):
report_content = [
base64.b64encode(b).decode("utf-8") for b in report_content
]
0ssigeno marked this conversation as resolved.
Show resolved Hide resolved

self.report.report = report_content
self.report.status = self.report.Status.SUCCESS.value
self.report.save(update_fields=["status", "report"])

Expand Down Expand Up @@ -269,7 +284,7 @@
logger.info(f"healthcheck url {url} for {self}")
try:
# momentarily set this to False to
# avoid fails for https services

Check failure on line 287 in api_app/classes.py

View check run for this annotation

codefactor.io / CodeFactor

api_app/classes.py#L287

Call to requests with verify=False disabling SSL certificate checks, security issue. (B501)
response = requests.head(url, timeout=10, verify=False)
response.raise_for_status()
except (
Expand Down
5 changes: 3 additions & 2 deletions api_app/ingestors_manager/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,16 @@ def before_run(self):
self._config.validate_playbook_to_execute(self._user)

def after_run_success(self, content):
pre_parsing_content = content
super().after_run_success(content)
self._config: IngestorConfig
# exhaust generator
deque(
self._config.create_jobs(
# every job created from an ingestor
content,
pre_parsing_content,
TLP.CLEAR.value,
self._user,
self._config.delay,
),
maxlen=0,
)
Expand Down
138 changes: 138 additions & 0 deletions api_app/ingestors_manager/ingestors/malware_bazaar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import io
import logging
from datetime import datetime
from typing import Any, Iterable
from unittest.mock import patch

import pyzipper
import requests

from api_app.ingestors_manager.classes import Ingestor
from api_app.ingestors_manager.exceptions import IngestorRunException
from tests.mock_utils import MockUpResponse, if_mock_connections

logger = logging.getLogger(__name__)


class MalwareBazaar(Ingestor):
# API endpoint
url: str
# Download samples that are up to X hours old
hours: int
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add as comment the same description that you added in the Parameter model?
Just so that if someone only reads this code, understand what is this parameter for

# Download samples from chosen signatures (aka malware families)
signatures: str
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if the Parameter called signatures it is actually a secret, the config() function would have assigned it to _signatures in this class.
Idk how this is working on your side


def run(self) -> Iterable[Any]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function imho should be split in 3 parts:

  • Where we are retrieving the results of each signatures
  • Where we are parsing the results and adding stuff in the hashes variable
  • Where we are downloading the file from the hashes
    I would split this function in these 3 parts, to decrease its complexity

# extract file hashes per signature
hashes = set()
now = datetime.now()
federicofantini marked this conversation as resolved.
Show resolved Hide resolved
for signature in self.signatures:
result = requests.post(
self.url,
data={"query": "get_siginfo", "signature": signature, "limit": 100},
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a timeout to the post request? Just in case

result.raise_for_status()
content = result.json()
logger.info(f"Malware bazaar data for signature {signature} is {content}")
if content["query_status"] != "ok":
raise IngestorRunException(
f"Query status is invalid: {content['query_status']}"
)
if not isinstance(content["data"], list):
raise IngestorRunException(f"Content {content} not expected")

for elem in content["data"]:
first_seen = datetime.strptime(elem["first_seen"], "%Y-%m-%d %H:%M:%S")
diff = int((now - first_seen).total_seconds()) // 3600
if elem["signature"] == signature and diff <= self.hours:
hashes.add(elem["sha256_hash"])

last_hours_str = (
"Last hour" if self.hours == 1 else f"Last {self.hours} hours"
)
logger.info(
f"{last_hours_str} {signature} samples: "
f"{len(hashes)}/{len(content['data'])}"
)

# download sample and create new analysis
for h in hashes:
logger.info(f"Downloading sample {h}")
sample_archive = requests.post(
self.url,
data={
"query": "get_file",
"sha256_hash": h,
},
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a timeout

sample_archive.raise_for_status()
logger.info(f"Correctly downloaded sample {h}")
with pyzipper.AESZipFile(io.BytesIO(sample_archive.content)) as zf:
zf.setpassword(b"infected")
files = zf.namelist()
if files and len(files) == 1:
sample = zf.read(files[0])

yield sample

@classmethod
def _monkeypatch(cls):
patches = [
if_mock_connections(
patch(
"requests.post",
return_value=MockUpResponse(
{
"query_status": "ok",
"data": [
{
"sha256_hash": "c5c810beaf075f8fee52146b381b0f94a6"
"e303fada3bce12bcc07fbfa07ba07e",
"sha3_384_hash": "bdd25a594b5a5d8ab14b00c04ee75d6a"
"476bf2a7df49223284eebfac82be107a"
"b94ffaae294ef4cf0a1c23a206e1fbd9",
"sha1_hash": "3fea40223c02a15678912a29147d2b32d05c"
"46df",
"md5_hash": "dc591fd6d108b50bd9aa1f3dce2f3fe4",
"first_seen": "2024-04-11 12:35:10",
"last_seen": None,
"file_name": "17128389081d4616ae42b2693f5ea6783112"
"f41cb2ee5184f49d983f8bf833df0b0e97b4"
"29449.dat-decoded",
"file_size": 240128,
"file_type_mime": "application/x-dosexec",
"file_type": "exe",
"reporter": "abuse_ch",
"anonymous": 0,
"signature": "AgentTesla",
"imphash": "f34d5f2d4577ed6d9ceec516c1f5a744",
"tlsh": "T17534FD037E88EB15E5A87E3782EF6C2413B2B0C"
"71633C60B6F49AF6518516426D7E72D",
"telfhash": None,
"gimphash": None,
"ssdeep": "3072:z+ymieCL2QfOdb/TmqtbqRFP55EMX+CWQ:"
"z+ymieCLPfOdbqq9qRFvXJW",
"dhash_icon": None,
"tags": ["AgentTesla", "base64-decoded", "exe"],
"code_sign": [],
"intelligence": {
"clamav": None,
"downloads": "338",
"uploads": "1",
"mail": None,
},
}
],
},
200,
),
),
patch(
"requests.post",
return_value=MockUpResponse(
{}, content=b"AgentTesla malware downloaded!", status_code=200
),
),
)
]
return super()._monkeypatch(patches=patches)
11 changes: 5 additions & 6 deletions api_app/ingestors_manager/ingestors/threatfox.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,16 @@


class ThreatFox(Ingestor):
# API endpoint
url: str
federicofantini marked this conversation as resolved.
Show resolved Hide resolved
# Days to check. From 1 to 7
days: int

BASE_URL = "https://threatfox-api.abuse.ch/api/v1/"

def run(self) -> Iterable[Any]:
result = requests.post(
self.BASE_URL, json={"query": "get_iocs", "days": self.days}
)
result = requests.post(self.url, json={"query": "get_iocs", "days": self.days})
result.raise_for_status()
content = result.json()
logger.info(f"Threatfox data is {content}")
logger.info(f"ThreatFox data is {content}")
if content["query_status"] != "ok":
raise IngestorRunException(
f"Query status is invalid: {content['query_status']}"
Expand Down
22 changes: 22 additions & 0 deletions api_app/ingestors_manager/migrations/0018_ingestorconfig_delay.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.11 on 2024-04-09 15:19

import datetime

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("ingestors_manager", "0017_4_change_primary_key"),
]

operations = [
migrations.AddField(
model_name="ingestorconfig",
name="delay",
field=models.DurationField(
default=datetime.timedelta,
help_text="Expects data in the format 'DD HH:MM:SS'",
),
),
]
Loading
Loading