Skip to content

Commit

Permalink
Extracts web domain and IP address, implements rendering functions an…
Browse files Browse the repository at this point in the history
…d tests

This PR partially resolves mandiant#1907. It extracts web domains and IP addresses, and implements rendering functions and tests.

These changes likely don't require updates to the documentation, but if some users want to, they should be able to repurpose many of the extraction functions without too much trouble.

Unfortunately, I'll probably be unavailable during the next few days, but this weekend, I'll ensure the PR passes the CI tests.

I'll probably also add some more tests for the rendering functions.

Please let me know if you have any questions or suggestions!

Below is example output for the default mode:

        +------------------------------+
        | IP addresses and web domains |
        |------------------------------+
        | google.com                   |
        | 192.123.232.08               |
        | my-w3bs1te.net               |
        | maliciooous.r4ndom-site.uhoh |
        | whoops.net                   |
        +------------------------------+

Here is example output for verbose and vverbose modes:

        +-----------------------------------------------------------+
        | IP addresses and web domains                              |
        |-----------------------------------------------------------+
        | google.com                                                |
        |    |----IP address:                                       |
        |            |----192.0.0.1                                 |
        |    |----Functions used to communicate with google.com:    |
        |            |----InternetConnectA                          |
        |            |----HttpOpenRequestA                          |
        |            |----FtpGetFileA                               |
        |    |----3 occurrances                                     |
        |                                                           |                                                                          |
        | 192.123.232.08                                            |
        |    |----Functions used to communicate with 192.123.232.08:|
        |            |----...                                       |
        |                                                           |
        +-----------------------------------------------------------+
  • Loading branch information
aaronatp committed Jan 24, 2024
1 parent 1c89d01 commit 93af89e
Show file tree
Hide file tree
Showing 7 changed files with 591 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## master (unreleased)

### New Features
- output IP addresses and domain names #1907 @aaronatp
- add Ghidra backend #1770 #1767 @colton-gabertan @mike-hunhoff
- add dynamic analysis via CAPE sandbox reports #48 #1535 @yelhamer
- add call scope #771 @yelhamer
Expand Down
110 changes: 110 additions & 0 deletions capa/capabilities/domain_ip_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from typing import Iterator, Tuple, List
from pathlib import Path

from capa.helpers import is_runtime_ida, get_auto_format, is_runtime_ghidra
from capa.exceptions import UnsupportedFormatError
from capa.features.common import FORMAT_PE, FORMAT_ELF, FORMAT_CAPE, String
from capa.features.address import Address
from capa.features.extractors import (
ida,
ghidra,
elffile,
viv,
pefile,
binja,
dnfile,
cape,
)

from capa.render.result_document import ResultDocument
from capa.features.extractors.base_extractor import FeatureExtractor

CD = Path(__file__).resolve().parent.parent.parent

# these constants are also defined in capa.main
# defined here to avoid a circular import
BACKEND_VIV = "vivisect"
BACKEND_DOTNET = "dotnet"
BACKEND_BINJA = "binja"
BACKEND_PEFILE = "pefile"


def get_file_strings(doc: ResultDocument) -> Iterator[str]:
"""extract strings from a given file"""
extractor = get_extractor_from_doc(doc)
if is_runtime_ida():
strings = fix_up(ida.helpers.extract_file_strings())
elif is_runtime_ghidra():
strings = fix_up(ghidra.helpers.extract_file_strings())
else:
file = get_file_path(doc)
format_ = get_auto_format(file)
buf = file.read_bytes()
if format_ == FORMAT_ELF:
strings = fix_up(elffile.extract_file_strings(buf))
elif format_ == BACKEND_VIV:
strings = fix_up(viv.file.extract_file_strings(buf))
elif format_ == BACKEND_PEFILE or format_ == FORMAT_PE:
strings = fix_up(pefile.extract_file_strings(buf))
elif format_ == BACKEND_BINJA:
strings = fix_up(binja.file.extract_file_strings(extractor.bv))
elif format_ == BACKEND_DOTNET:
strings = fix_up(dnfile.file.extract_file_strings(extractor.pe))
elif format_ == FORMAT_CAPE:
strings = fix_up(cape.file.extract_file_strings(extractor.report))
else:
raise UnsupportedFormatError(f"Unknown file format! Format: {format_}")

return strings


def fix_up(obj: Iterator[Tuple[String, Address]]) -> List[str]:
"""
basically a wrapper for 'extract_file_strings' calls
to actually get list of strings
"""
strings = []
for tuple in obj:
strings.append(tuple[0])

return strings


def get_file_path(doc: ResultDocument) -> Path:
return Path(doc.meta.sample.path)


def get_extractor_from_doc(doc: ResultDocument) -> FeatureExtractor:
import capa.main

path = get_file_path(doc)
format = doc.meta.analysis.format
os = doc.meta.analysis.os

_ = get_auto_format(get_file_path(doc))
if _ == BACKEND_VIV:
backend = BACKEND_VIV
elif _ == BACKEND_PEFILE:
backend = BACKEND_PEFILE
elif _ == BACKEND_BINJA:
backend = BACKEND_BINJA
elif _ == BACKEND_DOTNET:
backend = BACKEND_DOTNET
else:
backend = BACKEND_VIV # according to capa.main this is the default

sigpaths = [
CD / "tests" / "data" / "sigs" / "test_aulldiv.pat",
CD / "tests" / "data" / "sigs" / "test_aullrem.pat.gz",
CD / "sigs" / "1_flare_msvc_rtf_32_64.sig",
CD / "sigs" / "2_flare_msvc_atlmfc_32_64.sig",
CD / "sigs" / "3_flare_common_libs.sig",
]

return capa.main.get_extractor(
path=path,
format_=format,
os_=os,
backend=backend,
sigpaths=sigpaths,
)
Loading

0 comments on commit 93af89e

Please sign in to comment.