Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Malware bazaar ingestor #2259

Merged
merged 39 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
e4f450f
added malware bazaar ingestor
federicofantini Apr 4, 2024
8ed18f1
typo
federicofantini Apr 4, 2024
1d27bf5
added support to delayed celery jobs startup for ingestors
federicofantini Apr 9, 2024
6497a98
moved url to config parameter
federicofantini Apr 10, 2024
5ac7d17
fixed wrong access to observable name
federicofantini Apr 10, 2024
b3d20a3
changed timedelta from class to object
federicofantini Apr 11, 2024
e359d83
added _monkeypatch()
federicofantini Apr 11, 2024
87d9a3d
omitted full_name field and generate ingestors plugin config
federicofantini Apr 11, 2024
570c62e
added threatfox url migration
federicofantini Apr 11, 2024
e636124
Merge remote-tracking branch 'public/develop' into malware_bazaar_inj…
federicofantini Apr 11, 2024
90e30e6
fixed linter
federicofantini Apr 11, 2024
9913052
fixed linter
federicofantini Apr 11, 2024
30ca00a
fixed linter
federicofantini Apr 11, 2024
8088cf4
fixed linter
federicofantini Apr 11, 2024
5eda409
fixed linter
federicofantini Apr 11, 2024
718ff36
fixed linter
federicofantini Apr 11, 2024
ee48aaf
fixed linter
federicofantini Apr 11, 2024
01bf119
fixed linter
federicofantini Apr 11, 2024
0682935
updated threatfox migration
federicofantini Jun 3, 2024
bb6e436
Merge remote-tracking branch 'public/develop' into malware_bazaar_inj…
federicofantini Jun 3, 2024
ff7cf12
changed migration order
federicofantini Jun 3, 2024
a60eab2
fixed reverse migrations
federicofantini Jun 3, 2024
570b047
fixed default signatures
federicofantini Jun 3, 2024
33fc56a
fixed default signatures
federicofantini Jun 3, 2024
bb5e1c6
added malware bazaar userprofile
federicofantini Jun 5, 2024
2164e00
isort
federicofantini Jun 5, 2024
a44d70e
added default value to timedelta
federicofantini Jun 5, 2024
d530b74
fixed delay parameter default value and int conversion
federicofantini Jun 5, 2024
63544a4
fixed userprofile dumpplugin
federicofantini Jun 5, 2024
33ffb01
reduced code complexity and fixed generator job creation
federicofantini Jun 10, 2024
8d946bb
fixed deepsource warnings
federicofantini Jun 10, 2024
ff7fa2a
Merge remote-tracking branch 'public/develop' into malware_bazaar_inj…
federicofantini Jun 10, 2024
f419a5a
fixed deepsoruce cyclic import
federicofantini Jun 10, 2024
9e7e6b3
changed order PivotConfigurationException
federicofantini Jun 10, 2024
1892e4d
Merge branch 'develop' into malware_bazaar_injestor
federicofantini Jul 1, 2024
7f7b814
made code review changes
federicofantini Jul 1, 2024
75a450c
fixed errors
federicofantini Jul 1, 2024
ad63bc0
Merge branch 'develop' into malware_bazaar_injestor
federicofantini Jul 1, 2024
7edd869
fixed errors
federicofantini Jul 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions api_app/classes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import logging
import traceback
import typing
Expand All @@ -7,6 +8,7 @@
import requests
from billiard.exceptions import SoftTimeLimitExceeded
from django.conf import settings
from django.core.files import File
from django.utils import timezone
from django.utils.functional import cached_property
from requests import HTTPError
Expand Down Expand Up @@ -121,9 +123,18 @@
self.report.save()

def after_run_success(self, content: typing.Any):
if isinstance(content, typing.Generator):
content = list(content)
self.report.report = content
# avoiding JSON serialization errors for types: File and bytes
report_content = []
if isinstance(content, typing.List):
for n in content:
if isinstance(n, File):
report_content.append(base64.b64encode(n.read()).decode("utf-8"))
elif isinstance(n, bytes):
report_content.append(base64.b64encode(n).decode("utf-8"))
else:
report_content.append(n)

self.report.report = report_content
self.report.status = self.report.Status.SUCCESS.value
self.report.save(update_fields=["status", "report"])

Expand Down Expand Up @@ -273,7 +284,7 @@
response = requests.head(url, timeout=10, verify=False)
response.raise_for_status()
except (
requests.exceptions.ConnectionError,

Check failure on line 287 in api_app/classes.py

View check run for this annotation

codefactor.io / CodeFactor

api_app/classes.py#L287

Call to requests with verify=False disabling SSL certificate checks, security issue. (B501)
requests.exceptions.Timeout,
requests.exceptions.HTTPError,
) as e:
Expand Down
6 changes: 5 additions & 1 deletion api_app/ingestors_manager/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,19 @@ def before_run(self):
self._config.validate_playbook_to_execute(self._user)

def after_run_success(self, content):
# exhaust generator
if isinstance(content, typing.Generator):
content = list(content)

super().after_run_success(content)
self._config: IngestorConfig
# exhaust generator
deque(
self._config.create_jobs(
# every job created from an ingestor
content,
TLP.CLEAR.value,
self._user,
self._config.delay,
),
maxlen=0,
)
Expand Down
160 changes: 160 additions & 0 deletions api_app/ingestors_manager/ingestors/malware_bazaar.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import io
import logging
import time
from typing import Any, Iterable
from unittest.mock import patch

import pyzipper
import requests
from django.utils import timezone

from api_app.ingestors_manager.classes import Ingestor
from api_app.ingestors_manager.exceptions import IngestorRunException
from tests.mock_utils import MockUpResponse, if_mock_connections

logger = logging.getLogger(__name__)


class MalwareBazaar(Ingestor):
# API endpoint
url: str
# Download samples that are up to X hours old
hours: int
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add as comment the same description that you added in the Parameter model?
Just so that if someone only reads this code, understand what is this parameter for

# Download samples from chosen signatures (aka malware families)
signatures: str
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if the Parameter called signatures it is actually a secret, the config() function would have assigned it to _signatures in this class.
Idk how this is working on your side


@classmethod
def update(cls) -> bool:
pass

# retrieve information about the given signature
def get_signature_information(self, signature):
result = requests.post(
self.url,
data={"query": "get_siginfo", "signature": signature, "limit": 100},
timeout=30,
)
result.raise_for_status()
content = result.json()
logger.info(f"Malware bazaar data for signature {signature} is {content}")
if content["query_status"] != "ok":
raise IngestorRunException(
f"Query status is invalid: {content['query_status']}"
)
if not isinstance(content["data"], list):
raise IngestorRunException(f"Content {content} not expected")
return content["data"]

# extract file hashes per signature
def get_recent_samples(self):
hashes = set()
current_time = timezone.now()
for signature in self.signatures:
data = self.get_signature_information(signature)
for elem in data:
first_seen = timezone.make_aware(
timezone.datetime.strptime(elem["first_seen"], "%Y-%m-%d %H:%M:%S")
)
diff = int((current_time - first_seen).total_seconds()) // 3600
if elem["signature"] == signature and diff <= self.hours:
hashes.add(elem["sha256_hash"])

last_hours_str = (
"Last hour" if self.hours == 1 else f"Last {self.hours} hours"
)
logger.info(
f"{last_hours_str} {signature} samples: " f"{len(hashes)}/{len(data)}"
)
return hashes

def download_sample(self, h):
logger.info(f"Downloading sample {h}")
sample_archive = requests.post(
self.url,
data={
"query": "get_file",
"sha256_hash": h,
},
timeout=60,
)
sample_archive.raise_for_status()
logger.info(f"Correctly downloaded sample {h}")
logger.info("Sleeping for 1 second")
time.sleep(1)
with pyzipper.AESZipFile(io.BytesIO(sample_archive.content)) as zf:
zf.setpassword(b"infected")
files = zf.namelist()
# expected only one file
if files and len(files) == 1:
return zf.read(files[0])

def run(self) -> Iterable[Any]:
hashes = self.get_recent_samples()
hashes_len = len(hashes)
# download sample and create new analysis
for idx, h in enumerate(hashes):
logger.info(f"Downloading sample {idx+1}/{hashes_len}")
sample = self.download_sample(h)
yield sample

@classmethod
def _monkeypatch(cls):
patches = [
if_mock_connections(
patch(
"requests.post",
return_value=MockUpResponse(
{
"query_status": "ok",
"data": [
{
"sha256_hash": "c5c810beaf075f8fee52146b381b0f94a6"
"e303fada3bce12bcc07fbfa07ba07e",
"sha3_384_hash": "bdd25a594b5a5d8ab14b00c04ee75d6a"
"476bf2a7df49223284eebfac82be107a"
"b94ffaae294ef4cf0a1c23a206e1fbd9",
"sha1_hash": "3fea40223c02a15678912a29147d2b32d05c"
"46df",
"md5_hash": "dc591fd6d108b50bd9aa1f3dce2f3fe4",
"first_seen": "2024-04-11 12:35:10",
"last_seen": None,
"file_name": "17128389081d4616ae42b2693f5ea6783112"
"f41cb2ee5184f49d983f8bf833df0b0e97b4"
"29449.dat-decoded",
"file_size": 240128,
"file_type_mime": "application/x-dosexec",
"file_type": "exe",
"reporter": "abuse_ch",
"anonymous": 0,
"signature": "AgentTesla",
"imphash": "f34d5f2d4577ed6d9ceec516c1f5a744",
"tlsh": "T17534FD037E88EB15E5A87E3782EF6C2413B2B0C"
"71633C60B6F49AF6518516426D7E72D",
"telfhash": None,
"gimphash": None,
"ssdeep": "3072:z+ymieCL2QfOdb/TmqtbqRFP55EMX+CWQ:"
"z+ymieCLPfOdbqq9qRFvXJW",
"dhash_icon": None,
"tags": ["AgentTesla", "base64-decoded", "exe"],
"code_sign": [],
"intelligence": {
"clamav": None,
"downloads": "338",
"uploads": "1",
"mail": None,
},
}
],
},
200,
),
),
patch(
"requests.post",
return_value=MockUpResponse(
{}, content=b"AgentTesla malware downloaded!", status_code=200
),
),
)
]
return super()._monkeypatch(patches=patches)
13 changes: 8 additions & 5 deletions api_app/ingestors_manager/ingestors/threatfox.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,20 @@


class ThreatFox(Ingestor):
# API endpoint
url = "https://threatfox-api.abuse.ch/api/v1/"
# Days to check. From 1 to 7
days: int

BASE_URL = "https://threatfox-api.abuse.ch/api/v1/"
@classmethod
def update(cls) -> bool:
pass

def run(self) -> Iterable[Any]:
result = requests.post(
self.BASE_URL, json={"query": "get_iocs", "days": self.days}
)
result = requests.post(self.url, json={"query": "get_iocs", "days": self.days})
result.raise_for_status()
content = result.json()
logger.info(f"Threatfox data is {content}")
logger.info(f"ThreatFox data is {content}")
if content["query_status"] != "ok":
raise IngestorRunException(
f"Query status is invalid: {content['query_status']}"
Expand Down
22 changes: 22 additions & 0 deletions api_app/ingestors_manager/migrations/0018_ingestorconfig_delay.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 4.2.11 on 2024-04-09 15:19

import datetime

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("ingestors_manager", "0017_4_change_primary_key"),
]

operations = [
migrations.AddField(
model_name="ingestorconfig",
name="delay",
field=models.DurationField(
default=datetime.timedelta,
help_text="Expects data in the format 'DD HH:MM:SS'",
),
),
]
Loading
Loading