Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable downloading of external rulesets (NBS, Magemojo, Byte) #37

Merged
merged 7 commits into from Jan 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -9,3 +9,4 @@ __pycache__
.pybuild
build/lib*
dist/
rulesets*.sqlite
4 changes: 2 additions & 2 deletions .travis.yml
Expand Up @@ -29,8 +29,8 @@ before_script:
- bash tools/travis/prepare_magento.sh

script:
- pip install yara-python
- nosetests -v mwscan
- pip install yara-python requests-cache
- PYTHONPATH=mwscan nosetests -v mwscan/tests/test_mwscan.py
- python tools/validate_signatures.py
- bash tools/travis/test_magento.sh

4 changes: 2 additions & 2 deletions docs/usage.md
Expand Up @@ -13,7 +13,7 @@ For faster scanning and more features, you can install the scanner from this rep

```bash
# Install prerequisites on Debian/Ubuntu flavoured server
sudo apt install -qy python-pip
sudo apt install -qy python-pip gcc python-dev
sudo pip install --no-cache-dir --upgrade mwscan
```

Expand All @@ -24,7 +24,7 @@ sudo pip install --no-cache-dir --upgrade mwscan
wget https://dl.fedoraproject.org/pub/epel/epel-release-latest-6.noarch.rpm
sudo rpm -ivh epel-release-latest-6.noarch.rpm

sudo yum install yara python-pip
sudo yum -y install python-pip python-devel gcc
sudo pip install --no-cache-dir --upgrade mwscan
```

Expand Down
57 changes: 14 additions & 43 deletions mwscan/mwscan.py
Expand Up @@ -29,31 +29,25 @@
"""

import os
import re
import sys
import argparse
import logging
import hashlib
import json
import time
from os import path

try:
import yara
except ImportError:
yara = None
from ruleset import providers

try:
import psutil
except ImportError:
psutil = None


DEFAULT_RULES_FILE = path.join(path.dirname(__file__), 'data', 'all-confirmed.yar')
LAST_RUN_FILE = path.expanduser('~/.mwscan_last_run')
CODE_EXT = ('php', 'phtml', 'js', 'jsx', 'html', 'php3', 'php4', 'php5', 'php7', 'sh')

logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%c', level=logging.DEBUG)
logging.getLogger('requests').setLevel(logging.WARNING)


def parse_args():
Expand All @@ -62,16 +56,23 @@ def parse_args():
description="Scan webfolders for hidden malware.",
)
parser.add_argument('path', help='File or dir to scan.')
parser.add_argument('-r', '--rules', help='Yara rules file.', default=DEFAULT_RULES_FILE)
parser.add_argument('-n', '--newonly', help='Only consider files that were modified/created since last succesful run.'.format(LAST_RUN_FILE), action='store_true')
parser.add_argument('-d', '--deep', action='store_true', help='Scan all files, not just code.')
parser.add_argument('-q', '--quiet', action='store_true', help='Supress output except for matches.')
parser.add_argument('-r', '--rules', help='Yara rules file.', default=DEFAULT_RULES_FILE)
parser.add_argument('-s', '--ruleset', choices=sorted(providers.keys()), help='Download and use from upstream')

args = parser.parse_args()

if args.quiet:
logging.root.setLevel(logging.WARNING)

if (args.ruleset and args.ruleset != 'file'):
args.rules = None

if args.rules:
args.ruleset = 'file'

if args.newonly and not os.path.isdir(args.path):
logging.error('I can only use --newonly on a directory!')
sys.exit(1)
Expand All @@ -85,7 +86,7 @@ def parse_args():
else:
args.req_ext = CODE_EXT

if not os.path.isfile(args.rules):
if args.rules and not os.path.isfile(args.rules):
logging.error("Rules file {0} does not exist. Use --rules <path>".format(args.rules))
sys.exit(1)

Expand Down Expand Up @@ -137,8 +138,7 @@ def find_targets(root_path, newer_than=None, req_ext=None):

if os.path.isfile(root_path):
yield root_path

if not os.path.isdir(root_path):
elif not os.path.isdir(root_path):
raise RuntimeError("%s is not a file or directory" % root_path)

for root, dirs, files in os.walk(root_path):
Expand All @@ -159,26 +159,6 @@ def find_targets(root_path, newer_than=None, req_ext=None):
yield path


def load_rules(path):

with open(path) as fh:
rawrules = fh.read()

# Find whitelist hashes from comments, because yara whitelist
# hashing is too slow. See https://github.com/VirusTotal/yara/issues/592
m = re.search('/\*[^*]*WHITELIST = (\{.*?\})\s*\*/', rawrules, flags=re.DOTALL)
whitelist = set(json.loads(m.group(1)) if m else [])
rules = yara.compile(source=rawrules)

logging.debug("Loaded {0}: {1} yara rules and {2} whitelist entries".format(
path,
len(list(iter(rules))),
len(whitelist),
))

return rules, whitelist


def scan_files(files, rules, whitelist):
num_files = 0
num_malware = 0
Expand All @@ -193,7 +173,6 @@ def scan_files(files, rules, whitelist):
sha1sum = hashlib.sha1(data).hexdigest()
if sha1sum in whitelist:
num_whitelisted += 1
logging.debug("Whitelisted: {0}".format(path))
continue

matches = rules.match(data=data)
Expand All @@ -208,22 +187,15 @@ def scan_files(files, rules, whitelist):

def main():

if not yara:
logging.warning("You need to install python(3)-yara. Try one of these\n\n"
"\tsudo apt-get install python-yara\n"
"\tsudo pip install yara")
return 1

args = parse_args()

# don't swamp the machine
if psutil:
mylife = psutil.Process()
mylife.ionice(psutil.IOPRIO_CLASS_IDLE)
else:
logging.warning("Missing psutil, not adjusting IO priority.")

rules, whitelist = load_rules(args.rules)
provider = providers[args.ruleset]
rules, whitelist = provider(args=args).get()

try:
files = find_targets(args.path,
Expand All @@ -241,6 +213,5 @@ def main():
return 1



if __name__ == '__main__':
sys.exit(main())
149 changes: 149 additions & 0 deletions mwscan/ruleset.py
@@ -0,0 +1,149 @@
import requests
import re
import json
import logging
import yara


def strip_last_url_path(url):
parent, _, _ = url.rpartition('/')
return parent


def last_url_path(url):
return url.rpartition('/')[2]


class RulesProvider:
rules_url = None
whitelist_url = None

def __init__(self, **kwargs):
logging.info("Using {0} rules.".format(self.__class__.__name__))
self._args = kwargs.get('args')

def find_whitelist_in_rawrules(self, rawrules):
# Find whitelist hashes from comments, because yara whitelist
# hashing is too slow. See https://github.com/VirusTotal/yara/issues/592

m = re.search('/\*[^*]*WHITELIST = (\{.*?\})\s*\*/', rawrules, flags=re.DOTALL)
return set(json.loads(m.group(1)) if m else [])

def get_rules(self):
return self._recursive_fetch(self.rules_url)

def get_whitelist(self):
if not self.whitelist_url:
return set()

data = self._httpget(self.whitelist_url)
hashes = re.findall('[a-f0-9]{40}', data) # assume sha1 hex hash
return set(hashes)

def transform_rules(self, rawrules):
"""For specific rules providers, to mangle into mwscan compatible form"""
whitelist = set()
return rawrules, whitelist


def _httpget(self, url):

filename = last_url_path(url)
logging.debug("Fetching {0}".format(filename))

return requests.get(url).text


def get(self):
"""Returns rules, whitelist"""

rawrules = self.get_rules()

# provider specific transformation
rawrules, whitelist = self.transform_rules(rawrules)

# if alternative whitelist method is required
whitelist.update(self.get_whitelist())
whitelist.update(self.find_whitelist_in_rawrules(rawrules))

rules = yara.compile(source=rawrules)

logging.debug("Loaded {0} yara rules and {1} whitelist entries".format(
len(list(iter(rules))),
len(whitelist),
))

return rules, whitelist

def _recursive_fetch(self, url):

def include(match):
relpath = match.group(1)
# return match.group(1)
newurl = strip_last_url_path(url) + '/' + relpath
return "/* included from {0} */\n".format(newurl) + self._recursive_fetch(newurl)

data = self._httpget(url)
data = re.sub('include "([^"]+?)"\s+', include, data)
# data = re.sub('import "hash"\s*', '', data)
return data


class Files(RulesProvider):

# initialize with Files(args)
def get_rules(self):
path = self._args.rules
logging.info("Loading {0}".format(self._args.rules))
with open(path) as fh:
return fh.read()


class NBS(RulesProvider):

rules_url = 'https://raw.githubusercontent.com/nbs-system/php-malware-finder/master/php-malware-finder/php.yar'

def transform_rules(self, rawrules):
whitelist = set()
rules = list()

tokens = re.findall('\n(?:global )?(?:private )?rule .+?\n\{\n.+?\n\}', rawrules, flags=re.DOTALL)

for token in tokens:
hashes = re.findall('== "([a-f0-9]{40})"', token)

if 'rule IsWhitelisted' in token:
continue

if hashes or 'hash.sha1' in token:
whitelist.update(hashes)
else:
token = token.strip()
token = re.sub(' and not IsWhitelisted', '', token)

rules.append(token.strip())

return '\n'.join(rules), whitelist


class Magemojo(RulesProvider):
rules_url = 'https://raw.githubusercontent.com/magesec/magesecurityscanner/master/yararules.yar'
whitelist_url = 'https://raw.githubusercontent.com/magesec/magesecurityscanner/master/magesecurityscan/sha1whitelist.json'


class Magesec(RulesProvider):
rules_url = 'https://magesec.org/download/yara-standard.yar'
whitelist_url = 'https://magesec.org/download/whitelist.json'


class Byte(RulesProvider):
rules_url = 'https://raw.githubusercontent.com/gwillem/magento-malware-scanner/master/build/all-confirmed.yar'


providers = {
'nbs': NBS,
'byte': Byte,
'magemojo': Magemojo,
'magesec': Magesec,
'file': Files,
}
15 changes: 12 additions & 3 deletions mwscan/tests/test_mwscan.py
@@ -1,11 +1,18 @@
import os
from unittest import TestCase
from mwscan.mwscan import find_targets, load_rules, find_last_run_timestamp, write_last_run_timestamp, \
from mwscan.mwscan import find_targets, find_last_run_timestamp, write_last_run_timestamp, \
scan_files, path_to_timestamp_file
from mwscan.ruleset import Files
from collections import namedtuple


class TestWebMalwareScanner(TestCase):

def _load_file_rules(self, path):
args = namedtuple('Args', 'rules')(rules=path)
return Files(args=args).get()


def setUp(self):
self.fixture_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'fixtures')
self.rules_path = os.path.join(self.fixture_path, 'rules.yar')
Expand All @@ -20,7 +27,8 @@ def setUp(self):
except OSError:
pass

self.rules, self.whitelist = load_rules(self.rules_path)
self.rules, self.whitelist = self._load_file_rules(self.rules_path)


def test_normal_scan(self):
files = find_targets(self.target_path)
Expand Down Expand Up @@ -62,7 +70,8 @@ def test_filter_extensions(self):
def test_external_rule_file(self):
files = find_targets(self.target_path)

self.rules, self.whitelist = load_rules(os.path.join(self.fixture_path, 'rules-vanilla.yar'))
rules_path = os.path.join(self.fixture_path, 'rules-vanilla.yar')
self.rules, self.whitelist = self._load_file_rules(rules_path)

total, malware, whitelisted = scan_files(files, self.rules, self.whitelist)

Expand Down