Skip to content

Commit

Permalink
Merge f338b9b into e6bfb28
Browse files Browse the repository at this point in the history
  • Loading branch information
mcdonnnj committed Sep 20, 2019
2 parents e6bfb28 + f338b9b commit 81ed795
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 29 deletions.
90 changes: 61 additions & 29 deletions src/ioc_scan/ioc_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,10 @@

from collections import defaultdict
from datetime import datetime
import hashlib
import logging
import platform
import os
import re
import subprocess # nosec
from string import Template
import sys

# just paste the text that has the indicators into BLOB.
Expand All @@ -35,6 +34,11 @@
MDS: 2a2410cef5497cbd3f6c13eaff9619da
MDS: 3e7eb6abcce304de0822a618de756fd2
MDS: 350cba65e28c723cbf0724c19bd7ee69
SHA256: b509f8545501588ecd828f970d91afc7c4aa6e238e838bd6a08ee2cd920fbe98
SHA-1: 31B54AEBDAF5FBC73A66AC41CCB35943CC9B7F72
SHA-1: 50973A3FC57D70C7911F7A952356188B9939E56B
SHA-1: 244EB62B9AC30934098CA4204447440D6FC4E259
SHA-1: 5C8F83CC4FF57E7C67925DF4D9DAABE5D0CC07E2
few things that should hit:
GNU bash, version 3.2.57(1)-release (x86_64-apple-darwin18)
0313fd399b143fc40cd52a1679018305
Expand All @@ -44,11 +48,30 @@
69630e4574ec6798239b091cda43dca0
"""

MD5_RE = r"([a-fA-F\d]{32})"
COMMANDS = {
"Linux": Template(r"find $root -xdev -type f -exec md5sum {} \;"),
"Darwin": Template(r"find $root -xdev -type f -exec md5 -r {} \;"),
}
# use word boundaries ('\b') to bracket the specific hash lengths
MD5_RE = r"\b([a-fA-F\d]{32})\b"
SHA1_RE = r"\b([a-fA-F\d]{40})\b"
SHA256_RE = r"\b([a-fA-F\d]{64})\b"


def hash_file(file):
"""Generate MD5, SHA1, and SHA256 hashes for a given file."""
hash_md5 = hashlib.md5() # nosec
hash_sha1 = hashlib.sha1() # nosec
hash_sha256 = hashlib.sha256()

# try except to eat filesystem errors like Permission Denied etc
try:
with open(file, "rb") as f:
# read it in chunks so memory use isn't outlandish
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
hash_sha1.update(chunk)
hash_sha256.update(chunk)
except OSError:
pass

return (hash_md5.hexdigest(), hash_sha1.hexdigest(), hash_sha256.hexdigest())


def main(blob=None, root="/"):
Expand All @@ -60,39 +83,48 @@ def main(blob=None, root="/"):
blob = BLOB

# get a list of all the md5 hashes from some inconsiderate source.
indicators = re.findall(MD5_RE, blob.lower())
indicators_md5 = re.findall(MD5_RE, blob.lower())
indicators_sha1 = re.findall(SHA1_RE, blob.lower())
indicators_sha256 = re.findall(SHA256_RE, blob.lower())
indicators = indicators_md5 + indicators_sha1 + indicators_sha256

logging.debug(f"Scan will search for {len(indicators)} indicators")

# compile a regular expression to search for all indicators
indicators_re = re.compile("|".join(indicators))

# choose the correct command based on the platform, and apply root to template
command = COMMANDS.get(platform.system()).substitute(root=root)
logging.debug(f"Scan command: {command}")

# start hashing files
logging.debug(f"Starting scan with root: {root}")
p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) # nosec
logging.debug("Scan completed")

# store an array of ioc hits
ioc_list = []
# keep a tally of the hits
tallies = defaultdict(lambda: 0)
# walk the filesystem starting at root
for rootdir, subdirs, files in os.walk(root):
# find -xdev equivalent
subdirs[:] = [
d for d in subdirs if not os.path.ismount(os.path.join(rootdir, d))
]

# check each file in the current directory
for file in [os.path.join(rootdir, f) for f in files]:
# get hashes for the current file
hashes = hash_file(file)

for hash in hashes:
matches = indicators_re.findall(hash)

# tally it up and report if we get a hit
if matches:
ioc_list.append(f"{hash} {file}")
tallies[hash] += 1

for line in p.stdout:
line = line.decode("utf-8")
# a line looks like this:
# 0313fd399b143fc40cd52a1679018305 /bin/bash

# save just the hash
file_hash = line.split()[0]

# check the line for matches
matches = indicators_re.findall(file_hash)
logging.debug("Scan completed")

# tally it up and report if we get a hit
if matches:
print(line)
tallies[matches[0]] += 1
# print all indicators that were found
for ioc in ioc_list:
print(ioc)

# stop the clock
end_time = datetime.utcnow()
Expand Down
22 changes: 22 additions & 0 deletions tests/test_ioc_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import ioc_scan
from ioc_scan import ioc_scan_cli
from ioc_scan import ioc_scanner


log_levels = (
Expand Down Expand Up @@ -50,6 +51,27 @@ def test_log_levels(level):
assert return_code == 0, "main() should return success (0)"


def test_hash_file_hashing():
"""Test that hashes are being generated correctly."""
hashes = ioc_scanner.hash_file("tests/targets/eicar.txt")
assert hashes[0] == "69630e4574ec6798239b091cda43dca0"
assert hashes[1] == "cf8bd9dfddff007f75adf4c2be48005cea317c62"
assert (
hashes[2] == "131f95c51cc819465fa1797f6ccacf9d494aaaff46fa3eac73ae63ffbdfd8267"
)


def test_hash_file_except():
"""Test that hash_file() passes when an OSError exception is raised."""
hashes = ioc_scanner.hash_file("tests/targets/doesnotexist.txt")
# values for hashes of nothing
assert hashes[0] == "d41d8cd98f00b204e9800998ecf8427e"
assert hashes[1] == "da39a3ee5e6b4b0d3255bfef95601890afd80709"
assert (
hashes[2] == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
)


def test_scan_file(capsys):
"""Test running the scanner with an input target file."""
with patch.object(
Expand Down

0 comments on commit 81ed795

Please sign in to comment.