From f7deb31dc1f08ed130192ce18a16502d8c5b9182 Mon Sep 17 00:00:00 2001 From: Robert Harris Date: Fri, 25 Jan 2019 14:52:55 -0500 Subject: [PATCH 1/8] fix for --- build_lambda.sh | 4 ++- clamav.py | 65 +++++++++++++++++++++++++++++++------------------ common.py | 3 ++- 3 files changed, 46 insertions(+), 26 deletions(-) diff --git a/build_lambda.sh b/build_lambda.sh index 456600e5..51eadaeb 100755 --- a/build_lambda.sh +++ b/build_lambda.sh @@ -24,7 +24,9 @@ yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarc pip install --no-cache-dir virtualenv virtualenv env . env/bin/activate -pip install --no-cache-dir -r requirements.txt +# This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel +pip install -r requirements.txt +rm -rf /root/.cache/pip pushd /tmp yumdownloader -x \*i686 --archlist=x86_64 clamav clamav-lib clamav-update json-c pcre2 diff --git a/clamav.py b/clamav.py index 219899c1..5fcd25d3 100644 --- a/clamav.py +++ b/clamav.py @@ -29,34 +29,51 @@ def current_library_search_path(): def update_defs_from_s3(bucket, prefix): create_dir(AV_DEFINITION_PATH) - for filename in AV_DEFINITION_FILENAMES: - s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) - local_path = os.path.join(AV_DEFINITION_PATH, filename) - s3_md5 = md5_from_s3_tags(bucket, s3_path) - if os.path.exists(local_path) and md5_from_file(local_path) == s3_md5: - print("Not downloading %s because local md5 matches s3." % filename) - continue - if s3_md5: - print("Downloading definition file %s from s3://%s" % (filename, os.path.join(bucket, prefix))) - s3.Bucket(bucket).download_file(s3_path, local_path) + to_download = {} + for file_prefix in AV_DEFINITION_FILE_PREFIXES: + s3_time_previous = None + for file_suffix in AV_DEFINITION_FILE_SUFFIXES: + filename = file_prefix + '.' + file_suffix + s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) + local_path = os.path.join(AV_DEFINITION_PATH, filename) + s3_md5 = md5_from_s3_tags(bucket, s3_path) + s3_time = s3.Object(bucket, s3_path).last_modified + + if s3_time_previous is not None and s3_time < s3_time_previous: + print("Not downloading older file in series: %s" % filename) + continue + if os.path.exists(local_path) and md5_from_file(local_path) == s3_md5: + print("Not downloading %s because local md5 matches s3." % filename) + continue + if s3_md5: + s3_time_previous = s3_time + print("Downloading definition file %s from s3://%s" % (filename, os.path.join(bucket, prefix))) + to_download[file_prefix] = {"s3_path": s3_path, "local_path": local_path} + + for file in to_download.values(): + s3.Bucket(bucket).download_file(file["s3_path"], file["local_path"]) def upload_defs_to_s3(bucket, prefix, local_path): - for filename in AV_DEFINITION_FILENAMES: - local_file_path = os.path.join(local_path, filename) - if os.path.exists(local_file_path): - local_file_md5 = md5_from_file(local_file_path) - if local_file_md5 != md5_from_s3_tags(bucket, os.path.join(prefix, filename)): - print("Uploading %s to s3://%s" % (local_file_path, os.path.join(bucket, prefix, filename))) - s3_object = s3.Object(bucket, os.path.join(prefix, filename)) - s3_object.upload_file(os.path.join(local_path, filename)) - s3_client.put_object_tagging( - Bucket=s3_object.bucket_name, - Key=s3_object.key, - Tagging={"TagSet": [{"Key": "md5", "Value": local_file_md5}]} - ) + for file_prefix in AV_DEFINITION_FILE_PREFIXES: + for file_suffix in AV_DEFINITION_FILE_SUFFIXES: + filename = file_prefix + '.' + file_suffix + local_file_path = os.path.join(local_path, filename) + if os.path.exists(local_file_path): + local_file_md5 = md5_from_file(local_file_path) + if local_file_md5 != md5_from_s3_tags(bucket, os.path.join(prefix, filename)): + print("Uploading %s to s3://%s" % (local_file_path, os.path.join(bucket, prefix, filename))) + s3_object = s3.Object(bucket, os.path.join(prefix, filename)) + s3_object.upload_file(os.path.join(local_path, filename)) + s3_client.put_object_tagging( + Bucket=s3_object.bucket_name, + Key=s3_object.key, + Tagging={"TagSet": [{"Key": "md5", "Value": local_file_md5}]} + ) + else: + print("Not uploading %s because md5 on remote matches local." % filename) else: - print("Not uploading %s because md5 on remote matches local." % filename) + print("File does not exist: %s" % filename) def update_defs_from_freshclam(path, library_path=""): diff --git a/common.py b/common.py index d45189c7..1c488633 100644 --- a/common.py +++ b/common.py @@ -32,7 +32,8 @@ AV_PROCESS_ORIGINAL_VERSION_ONLY = os.getenv("AV_PROCESS_ORIGINAL_VERSION_ONLY", "False") AV_DELETE_INFECTED_FILES = os.getenv("AV_DELETE_INFECTED_FILES", "False") -AV_DEFINITION_FILENAMES = ["main.cvd","daily.cvd", "daily.cud", "bytecode.cvd", "bytecode.cud"] +AV_DEFINITION_FILE_PREFIXES = ["main", "daily", "bytecode"] +AV_DEFINITION_FILE_SUFFIXES = ["cld", "cvd"] s3 = boto3.resource('s3') s3_client = boto3.client('s3') From 8958b59aaa94a9bdc7e50409d405a892ee8a90b7 Mon Sep 17 00:00:00 2001 From: Robert Harris Date: Mon, 28 Jan 2019 10:18:33 -0500 Subject: [PATCH 2/8] fix for #51, #11 --- clamav.py | 14 +++++++++++++- requirements.txt | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/clamav.py b/clamav.py index 5fcd25d3..e194c987 100644 --- a/clamav.py +++ b/clamav.py @@ -19,6 +19,8 @@ import re from common import * from subprocess import check_output, Popen, PIPE, STDOUT +from pytz import utc +from datetime import datetime def current_library_search_path(): @@ -37,7 +39,7 @@ def update_defs_from_s3(bucket, prefix): s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) local_path = os.path.join(AV_DEFINITION_PATH, filename) s3_md5 = md5_from_s3_tags(bucket, s3_path) - s3_time = s3.Object(bucket, s3_path).last_modified + s3_time = time_from_s3(bucket, s3_path) if s3_time_previous is not None and s3_time < s3_time_previous: print("Not downloading older file in series: %s" % filename) @@ -122,6 +124,16 @@ def md5_from_s3_tags(bucket, key): return tag["Value"] return "" +def time_from_s3(bucket, key): + try: + time = s3.Object(bucket, key).last_modified + except botocore.exceptions.ClientError as e: + expected_errors = {'404', 'AccessDenied', 'NoSuchKey'} + if e.response['Error']['Code'] in expected_errors: + return datetime.fromtimestamp(0, utc) + else: + raise + return time def scan_file(path): av_env = os.environ.copy() diff --git a/requirements.txt b/requirements.txt index db53d65c..bf5297bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ idna==2.8 requests==2.21 simplejson==3.16 urllib3==1.24.1 +pytz==2018.9 \ No newline at end of file From cf5ef9bd3f3d97acce1675bebc351fc6d5ab2b7c Mon Sep 17 00:00:00 2001 From: Robert Harris Date: Mon, 28 Jan 2019 10:53:40 -0500 Subject: [PATCH 3/8] some time logic errors --- clamav.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/clamav.py b/clamav.py index e194c987..66bbd962 100644 --- a/clamav.py +++ b/clamav.py @@ -33,7 +33,7 @@ def update_defs_from_s3(bucket, prefix): create_dir(AV_DEFINITION_PATH) to_download = {} for file_prefix in AV_DEFINITION_FILE_PREFIXES: - s3_time_previous = None + s3_best_time = None for file_suffix in AV_DEFINITION_FILE_SUFFIXES: filename = file_prefix + '.' + file_suffix s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) @@ -41,14 +41,16 @@ def update_defs_from_s3(bucket, prefix): s3_md5 = md5_from_s3_tags(bucket, s3_path) s3_time = time_from_s3(bucket, s3_path) - if s3_time_previous is not None and s3_time < s3_time_previous: + if s3_best_time is not None and s3_time < s3_best_time: print("Not downloading older file in series: %s" % filename) continue + else: + s3_best_time = s3_time + if os.path.exists(local_path) and md5_from_file(local_path) == s3_md5: print("Not downloading %s because local md5 matches s3." % filename) continue if s3_md5: - s3_time_previous = s3_time print("Downloading definition file %s from s3://%s" % (filename, os.path.join(bucket, prefix))) to_download[file_prefix] = {"s3_path": s3_path, "local_path": local_path} From 16669c81081bd95424084785ee3e4d4fbe657546 Mon Sep 17 00:00:00 2001 From: Robert Harris Date: Tue, 29 Jan 2019 15:42:36 -0500 Subject: [PATCH 4/8] FNM-7193 - incorporate option from pull 62 --- build_lambda.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/build_lambda.sh b/build_lambda.sh index 51eadaeb..11d0ab46 100755 --- a/build_lambda.sh +++ b/build_lambda.sh @@ -39,6 +39,7 @@ popd mkdir -p bin cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* bin/. echo "DatabaseMirror database.clamav.net" > bin/freshclam.conf +echo "CompressLocalDatabase yes" >> bin/freshclam.conf mkdir -p build zip -r9 $lambda_output_file *.py bin From 3ada65e15910323c3ae4a46750327009e7ec96f3 Mon Sep 17 00:00:00 2001 From: Robert Harris Date: Tue, 29 Jan 2019 15:42:36 -0500 Subject: [PATCH 5/8] incorporate option from pull 62 --- build_lambda.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/build_lambda.sh b/build_lambda.sh index 51eadaeb..11d0ab46 100755 --- a/build_lambda.sh +++ b/build_lambda.sh @@ -39,6 +39,7 @@ popd mkdir -p bin cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* bin/. echo "DatabaseMirror database.clamav.net" > bin/freshclam.conf +echo "CompressLocalDatabase yes" >> bin/freshclam.conf mkdir -p build zip -r9 $lambda_output_file *.py bin From 49e57a13e18fc25536e90f9d14b9bf197f5f3f7b Mon Sep 17 00:00:00 2001 From: Robert Harris Date: Wed, 30 Jan 2019 15:37:41 -0500 Subject: [PATCH 6/8] using the client only requires the GetObject Action --- clamav.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clamav.py b/clamav.py index 66bbd962..75b6fc73 100644 --- a/clamav.py +++ b/clamav.py @@ -128,7 +128,7 @@ def md5_from_s3_tags(bucket, key): def time_from_s3(bucket, key): try: - time = s3.Object(bucket, key).last_modified + time = s3_client.head_object(Bucket=bucket, Key=key)["LastModified"] except botocore.exceptions.ClientError as e: expected_errors = {'404', 'AccessDenied', 'NoSuchKey'} if e.response['Error']['Code'] in expected_errors: From f0f6283d18831f0630fac2f257cd49fcbfae5315 Mon Sep 17 00:00:00 2001 From: Chris Gilmer Date: Fri, 18 Oct 2019 13:56:39 -0700 Subject: [PATCH 7/8] Update pytz --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index bf5297bf..f6514d22 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ idna==2.8 requests==2.21 simplejson==3.16 urllib3==1.24.1 -pytz==2018.9 \ No newline at end of file +pytz==2019.3 From 6ab1836b3aaf0d05d2fb59f61be43e1f05181175 Mon Sep 17 00:00:00 2001 From: Chris Gilmer Date: Fri, 18 Oct 2019 15:56:12 -0700 Subject: [PATCH 8/8] Add tests for update_defs_from_s3 and related functions --- clamav.py | 22 ++-- clamav_test.py | 262 +++++++++++++++++++++++++++++++++++++++++++ common.py | 1 + common_test.py | 62 ++++++++++ requirements-dev.txt | 1 + scan.py | 4 +- update.py | 17 ++- 7 files changed, 352 insertions(+), 17 deletions(-) create mode 100644 common_test.py diff --git a/clamav.py b/clamav.py index f64683cd..0e68a36a 100644 --- a/clamav.py +++ b/clamav.py @@ -45,7 +45,7 @@ def current_library_search_path(): return rd_ld.findall(ld_verbose) -def update_defs_from_s3(bucket, prefix): +def update_defs_from_s3(s3_client, bucket, prefix): create_dir(AV_DEFINITION_PATH) to_download = {} for file_prefix in AV_DEFINITION_FILE_PREFIXES: @@ -54,8 +54,8 @@ def update_defs_from_s3(bucket, prefix): filename = file_prefix + "." + file_suffix s3_path = os.path.join(AV_DEFINITION_S3_PREFIX, filename) local_path = os.path.join(AV_DEFINITION_PATH, filename) - s3_md5 = md5_from_s3_tags(bucket, s3_path) - s3_time = time_from_s3(bucket, s3_path) + s3_md5 = md5_from_s3_tags(s3_client, bucket, s3_path) + s3_time = time_from_s3(s3_client, bucket, s3_path) if s3_best_time is not None and s3_time < s3_best_time: print("Not downloading older file in series: %s" % filename) @@ -75,14 +75,10 @@ def update_defs_from_s3(bucket, prefix): "s3_path": s3_path, "local_path": local_path, } + return to_download - s3 = boto3.resource("s3") - for file in to_download.values(): - s3.Bucket(bucket).download_file(file["s3_path"], file["local_path"]) - -def upload_defs_to_s3(bucket, prefix, local_path): - s3_client = boto3.client("s3") +def upload_defs_to_s3(s3_client, bucket, prefix, local_path): for file_prefix in AV_DEFINITION_FILE_PREFIXES: for file_suffix in AV_DEFINITION_FILE_SUFFIXES: filename = file_prefix + "." + file_suffix @@ -90,7 +86,7 @@ def upload_defs_to_s3(bucket, prefix, local_path): if os.path.exists(local_file_path): local_file_md5 = md5_from_file(local_file_path) if local_file_md5 != md5_from_s3_tags( - bucket, os.path.join(prefix, filename) + s3_client, bucket, os.path.join(prefix, filename) ): print( "Uploading %s to s3://%s" @@ -148,8 +144,7 @@ def md5_from_file(filename): return hash_md5.hexdigest() -def md5_from_s3_tags(bucket, key): - s3_client = boto3.client("s3") +def md5_from_s3_tags(s3_client, bucket, key): try: tags = s3_client.get_object_tagging(Bucket=bucket, Key=key)["TagSet"] except botocore.exceptions.ClientError as e: @@ -164,8 +159,7 @@ def md5_from_s3_tags(bucket, key): return "" -def time_from_s3(bucket, key): - s3_client = boto3.client("s3") +def time_from_s3(s3_client, bucket, key): try: time = s3_client.head_object(Bucket=bucket, Key=key)["LastModified"] except botocore.exceptions.ClientError as e: diff --git a/clamav_test.py b/clamav_test.py index f1db7dc7..0ffbbf96 100644 --- a/clamav_test.py +++ b/clamav_test.py @@ -13,16 +13,41 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime +import os import re import textwrap import unittest +import boto3 +import botocore.session +from botocore.stub import Stubber +import mock + from clamav import RE_SEARCH_DIR from clamav import scan_output_to_json +from clamav import md5_from_s3_tags +from clamav import time_from_s3 +from clamav import update_defs_from_s3 +from common import AV_DEFINITION_FILE_PREFIXES +from common import AV_DEFINITION_FILE_SUFFIXES +from common import AV_DEFINITION_S3_PREFIX from common import AV_SIGNATURE_OK class TestClamAV(unittest.TestCase): + def setUp(self): + # Common data + self.s3_bucket_name = "test_bucket" + self.s3_key_name = "test_key" + + # Clients and Resources + self.s3 = boto3.resource("s3") + self.s3_client = botocore.session.get_session().create_client("s3") + self.sns_client = botocore.session.get_session().create_client( + "sns", region_name="us-west-2" + ) + def test_current_library_search_path(self): # Calling `ld --verbose` returns a lot of text but the line to check is this one: search_path = """SEARCH_DIR("=/usr/x86_64-redhat-linux/lib64"); SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib64"); SEARCH_DIR("=/usr/x86_64-redhat-linux/lib"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); SEARCH_DIR("=/usr/lib");""" # noqa @@ -88,3 +113,240 @@ def test_scan_output_to_json_infected(self): summary = scan_output_to_json(output) self.assertEqual(summary[file_path], signature) self.assertEqual(summary["Infected files"], "1") + + def test_md5_from_s3_tags_no_md5(self): + tag_set = {"TagSet": []} + + s3_stubber = Stubber(self.s3_client) + get_object_tagging_response = tag_set + get_object_tagging_expected_params = { + "Bucket": self.s3_bucket_name, + "Key": self.s3_key_name, + } + s3_stubber.add_response( + "get_object_tagging", + get_object_tagging_response, + get_object_tagging_expected_params, + ) + with s3_stubber: + md5_hash = md5_from_s3_tags( + self.s3_client, self.s3_bucket_name, self.s3_key_name + ) + self.assertEquals("", md5_hash) + + def test_md5_from_s3_tags_has_md5(self): + expected_md5_hash = "d41d8cd98f00b204e9800998ecf8427e" + tag_set = {"TagSet": [{"Key": "md5", "Value": expected_md5_hash}]} + + s3_stubber = Stubber(self.s3_client) + get_object_tagging_response = tag_set + get_object_tagging_expected_params = { + "Bucket": self.s3_bucket_name, + "Key": self.s3_key_name, + } + s3_stubber.add_response( + "get_object_tagging", + get_object_tagging_response, + get_object_tagging_expected_params, + ) + with s3_stubber: + md5_hash = md5_from_s3_tags( + self.s3_client, self.s3_bucket_name, self.s3_key_name + ) + self.assertEquals(expected_md5_hash, md5_hash) + + def test_time_from_s3(self): + + expected_s3_time = datetime.datetime(2019, 1, 1) + + s3_stubber = Stubber(self.s3_client) + head_object_response = {"LastModified": expected_s3_time} + head_object_expected_params = { + "Bucket": self.s3_bucket_name, + "Key": self.s3_key_name, + } + s3_stubber.add_response( + "head_object", head_object_response, head_object_expected_params + ) + with s3_stubber: + s3_time = time_from_s3( + self.s3_client, self.s3_bucket_name, self.s3_key_name + ) + self.assertEquals(expected_s3_time, s3_time) + + @mock.patch("clamav.md5_from_file") + @mock.patch("common.os.path.exists") + def test_update_defs_from_s3(self, mock_exists, mock_md5_from_file): + expected_md5_hash = "d41d8cd98f00b204e9800998ecf8427e" + different_md5_hash = "d41d8cd98f00b204e9800998ecf8427f" + + mock_md5_from_file.return_value = different_md5_hash + + tag_set = {"TagSet": [{"Key": "md5", "Value": expected_md5_hash}]} + expected_s3_time = datetime.datetime(2019, 1, 1) + + s3_stubber = Stubber(self.s3_client) + + key_names = [] + side_effect = [] + for file_prefix in AV_DEFINITION_FILE_PREFIXES: + for file_suffix in AV_DEFINITION_FILE_SUFFIXES: + side_effect.extend([True, True]) + filename = file_prefix + "." + file_suffix + key_names.append(os.path.join(AV_DEFINITION_S3_PREFIX, filename)) + mock_exists.side_effect = side_effect + + for s3_key_name in key_names: + get_object_tagging_response = tag_set + get_object_tagging_expected_params = { + "Bucket": self.s3_bucket_name, + "Key": s3_key_name, + } + s3_stubber.add_response( + "get_object_tagging", + get_object_tagging_response, + get_object_tagging_expected_params, + ) + head_object_response = {"LastModified": expected_s3_time} + head_object_expected_params = { + "Bucket": self.s3_bucket_name, + "Key": s3_key_name, + } + s3_stubber.add_response( + "head_object", head_object_response, head_object_expected_params + ) + + expected_to_download = { + "bytecode": { + "local_path": "/tmp/clamav_defs/bytecode.cvd", + "s3_path": "clamav_defs/bytecode.cvd", + }, + "daily": { + "local_path": "/tmp/clamav_defs/daily.cvd", + "s3_path": "clamav_defs/daily.cvd", + }, + "main": { + "local_path": "/tmp/clamav_defs/main.cvd", + "s3_path": "clamav_defs/main.cvd", + }, + } + with s3_stubber: + to_download = update_defs_from_s3( + self.s3_client, self.s3_bucket_name, AV_DEFINITION_S3_PREFIX + ) + self.assertEquals(expected_to_download, to_download) + + @mock.patch("clamav.md5_from_file") + @mock.patch("common.os.path.exists") + def test_update_defs_from_s3_same_hash(self, mock_exists, mock_md5_from_file): + expected_md5_hash = "d41d8cd98f00b204e9800998ecf8427e" + different_md5_hash = expected_md5_hash + + mock_md5_from_file.return_value = different_md5_hash + + tag_set = {"TagSet": [{"Key": "md5", "Value": expected_md5_hash}]} + expected_s3_time = datetime.datetime(2019, 1, 1) + + s3_stubber = Stubber(self.s3_client) + + key_names = [] + side_effect = [] + for file_prefix in AV_DEFINITION_FILE_PREFIXES: + for file_suffix in AV_DEFINITION_FILE_SUFFIXES: + side_effect.extend([True, True]) + filename = file_prefix + "." + file_suffix + key_names.append(os.path.join(AV_DEFINITION_S3_PREFIX, filename)) + mock_exists.side_effect = side_effect + + for s3_key_name in key_names: + get_object_tagging_response = tag_set + get_object_tagging_expected_params = { + "Bucket": self.s3_bucket_name, + "Key": s3_key_name, + } + s3_stubber.add_response( + "get_object_tagging", + get_object_tagging_response, + get_object_tagging_expected_params, + ) + head_object_response = {"LastModified": expected_s3_time} + head_object_expected_params = { + "Bucket": self.s3_bucket_name, + "Key": s3_key_name, + } + s3_stubber.add_response( + "head_object", head_object_response, head_object_expected_params + ) + + expected_to_download = {} + with s3_stubber: + to_download = update_defs_from_s3( + self.s3_client, self.s3_bucket_name, AV_DEFINITION_S3_PREFIX + ) + self.assertEquals(expected_to_download, to_download) + + @mock.patch("clamav.md5_from_file") + @mock.patch("common.os.path.exists") + def test_update_defs_from_s3_old_files(self, mock_exists, mock_md5_from_file): + expected_md5_hash = "d41d8cd98f00b204e9800998ecf8427e" + different_md5_hash = "d41d8cd98f00b204e9800998ecf8427f" + + mock_md5_from_file.return_value = different_md5_hash + + tag_set = {"TagSet": [{"Key": "md5", "Value": expected_md5_hash}]} + expected_s3_time = datetime.datetime(2019, 1, 1) + + s3_stubber = Stubber(self.s3_client) + + key_names = [] + side_effect = [] + for file_prefix in AV_DEFINITION_FILE_PREFIXES: + for file_suffix in AV_DEFINITION_FILE_SUFFIXES: + side_effect.extend([True, True]) + filename = file_prefix + "." + file_suffix + key_names.append(os.path.join(AV_DEFINITION_S3_PREFIX, filename)) + mock_exists.side_effect = side_effect + + count = 0 + for s3_key_name in key_names: + get_object_tagging_response = tag_set + get_object_tagging_expected_params = { + "Bucket": self.s3_bucket_name, + "Key": s3_key_name, + } + s3_stubber.add_response( + "get_object_tagging", + get_object_tagging_response, + get_object_tagging_expected_params, + ) + head_object_response = { + "LastModified": expected_s3_time - datetime.timedelta(hours=count) + } + head_object_expected_params = { + "Bucket": self.s3_bucket_name, + "Key": s3_key_name, + } + s3_stubber.add_response( + "head_object", head_object_response, head_object_expected_params + ) + count += 1 + + expected_to_download = { + "bytecode": { + "local_path": "/tmp/clamav_defs/bytecode.cld", + "s3_path": "clamav_defs/bytecode.cld", + }, + "daily": { + "local_path": "/tmp/clamav_defs/daily.cld", + "s3_path": "clamav_defs/daily.cld", + }, + "main": { + "local_path": "/tmp/clamav_defs/main.cld", + "s3_path": "clamav_defs/main.cld", + }, + } + with s3_stubber: + to_download = update_defs_from_s3( + self.s3_client, self.s3_bucket_name, AV_DEFINITION_S3_PREFIX + ) + self.assertEquals(expected_to_download, to_download) diff --git a/common.py b/common.py index 0de4db08..9e95af96 100644 --- a/common.py +++ b/common.py @@ -16,6 +16,7 @@ import errno import datetime import os +import os.path AV_DEFINITION_S3_BUCKET = os.getenv("AV_DEFINITION_S3_BUCKET") AV_DEFINITION_S3_PREFIX = os.getenv("AV_DEFINITION_S3_PREFIX", "clamav_defs") diff --git a/common_test.py b/common_test.py new file mode 100644 index 00000000..cfe89b9e --- /dev/null +++ b/common_test.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# Upside Travel, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import errno +import unittest + +import mock + +from common import create_dir + + +class TestCommon(unittest.TestCase): + @mock.patch("common.os.path") + @mock.patch("common.os") + def test_create_dir_already_exists(self, mock_os, mock_path): + mock_path.exists.return_value = True + create_dir("testpath") + self.assertFalse( + mock_os.makedirs.called, "Failed to not make directories if path present." + ) + + @mock.patch("common.os.path") + @mock.patch("common.os") + def test_create_dir_doesnt_exist(self, mock_os, mock_path): + mock_path.exists.return_value = False + create_dir("testpath") + self.assertTrue( + mock_os.makedirs.called, "Failed to make directories if path not present." + ) + + @mock.patch("common.os.path") + @mock.patch("common.os") + def test_create_dir_doesnt_exist_no_raises(self, mock_os, mock_path): + mock_path.exists.return_value = False + mock_os.makedirs.side_effect = OSError(errno.EEXIST, "exists") + create_dir("testpath") + self.assertTrue( + mock_os.makedirs.called, "Failed to make directories if path not present." + ) + + @mock.patch("common.os.path") + @mock.patch("common.os") + def test_create_dir_doesnt_exist_but_raises(self, mock_os, mock_path): + mock_path.exists.return_value = False + mock_os.makedirs.side_effect = OSError(errno.ENAMETOOLONG, "nametoolong") + with self.assertRaises(OSError): + create_dir("testpath") + self.assertTrue( + mock_os.makedirs.called, "Failed to make directories if path not present." + ) diff --git a/requirements-dev.txt b/requirements-dev.txt index 65e15f7e..b6f1ae9a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,4 +3,5 @@ boto3 # Test requirements coverage +mock==3.0.5 nose diff --git a/scan.py b/scan.py index a6d2784c..df267827 100644 --- a/scan.py +++ b/scan.py @@ -222,7 +222,9 @@ def lambda_handler(event, context): file_path = get_local_path(s3_object, "/tmp") create_dir(os.path.dirname(file_path)) s3_object.download_file(file_path) - clamav.update_defs_from_s3(AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX) + clamav.update_defs_from_s3( + s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX + ) scan_result, scan_signature = clamav.scan_file(file_path) print( "Scan of s3://%s resulted in %s\n" diff --git a/update.py b/update.py index 37fdf1aa..02bdeede 100644 --- a/update.py +++ b/update.py @@ -15,6 +15,8 @@ import os +import boto3 + import clamav from common import AV_DEFINITION_PATH from common import AV_DEFINITION_S3_BUCKET @@ -24,8 +26,19 @@ def lambda_handler(event, context): + s3 = boto3.resource("s3") + s3_client = boto3.client("s3") + print("Script starting at %s\n" % (get_timestamp())) - clamav.update_defs_from_s3(AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX) + to_download = clamav.update_defs_from_s3( + s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX + ) + + for download in to_download.values(): + s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file( + download["s3_path"], download["local_path"] + ) + clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) # If main.cvd gets updated (very rare), we will need to force freshclam # to download the compressed version to keep file sizes down. @@ -36,6 +49,6 @@ def lambda_handler(event, context): os.remove(os.path.join(AV_DEFINITION_PATH, "main.cvd")) clamav.update_defs_from_freshclam(AV_DEFINITION_PATH, CLAMAVLIB_PATH) clamav.upload_defs_to_s3( - AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX, AV_DEFINITION_PATH + s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX, AV_DEFINITION_PATH ) print("Script finished at %s\n" % get_timestamp())