diff --git a/README.md b/README.md index e0ce5cb6..821171ab 100644 --- a/README.md +++ b/README.md @@ -264,6 +264,7 @@ the table below for reference. | AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | | AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No | | AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No | +| AV_STATUS_FAILED | The value assigned to items when scanning fails to complete | SCAN_FAILED | No | | AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No | | AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No | | AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | diff --git a/clamav.py b/clamav.py index 8042e739..8cdc0418 100644 --- a/clamav.py +++ b/clamav.py @@ -32,6 +32,7 @@ from common import AV_SIGNATURE_UNKNOWN from common import AV_STATUS_CLEAN from common import AV_STATUS_INFECTED +from common import AV_STATUS_FAILED from common import CLAMAVLIB_PATH from common import CLAMSCAN_PATH from common import FRESHCLAM_PATH @@ -207,4 +208,4 @@ def scan_file(path): else: msg = "Unexpected exit code from clamscan: %s.\n" % av_proc.returncode print(msg) - raise Exception(msg) + return AV_STATUS_FAILED, AV_SIGNATURE_UNKNOWN diff --git a/common.py b/common.py index 9e95af96..d04fb140 100644 --- a/common.py +++ b/common.py @@ -28,6 +28,7 @@ AV_SIGNATURE_UNKNOWN = "UNKNOWN" AV_STATUS_CLEAN = os.getenv("AV_STATUS_CLEAN", "CLEAN") AV_STATUS_INFECTED = os.getenv("AV_STATUS_INFECTED", "INFECTED") +AV_STATUS_FAILED = "SCAN_FAILED" AV_STATUS_METADATA = os.getenv("AV_STATUS_METADATA", "av-status") AV_STATUS_SNS_ARN = os.getenv("AV_STATUS_SNS_ARN") AV_STATUS_SNS_PUBLISH_CLEAN = os.getenv("AV_STATUS_SNS_PUBLISH_CLEAN", "True") diff --git a/metrics.py b/metrics.py index 412a4997..2f3ced39 100644 --- a/metrics.py +++ b/metrics.py @@ -18,6 +18,7 @@ import datadog from common import AV_STATUS_CLEAN from common import AV_STATUS_INFECTED +from common import AV_STATUS_FAILED def send(env, bucket, key, status): @@ -37,6 +38,8 @@ def send(env, bucket, key, status): text="Virus found in s3://%s/%s." % (bucket, key), tags=metric_tags, ) + elif status == AV_STATUS_FAILED: + result_metric_name = "scan_failed" scanned_metric = { "metric": "s3_antivirus.scanned", diff --git a/scan.py b/scan.py index c55716d4..03d84bc1 100644 --- a/scan.py +++ b/scan.py @@ -32,6 +32,7 @@ from common import AV_SIGNATURE_METADATA from common import AV_STATUS_CLEAN from common import AV_STATUS_INFECTED +from common import AV_STATUS_FAILED from common import AV_STATUS_METADATA from common import AV_STATUS_SNS_ARN from common import AV_STATUS_SNS_PUBLISH_CLEAN @@ -198,31 +199,9 @@ def sns_scan_results( ) -def lambda_handler(event, context): - s3 = boto3.resource("s3") +def download_clamav_databases(): s3_client = boto3.client("s3") - sns_client = boto3.client("sns") - - # Get some environment variables - ENV = os.getenv("ENV", "") - EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3") - - start_time = get_timestamp() - print("Script starting at %s\n" % (start_time)) - s3_object = event_object(event, event_source=EVENT_SOURCE) - - if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY): - verify_s3_object_version(s3, s3_object) - - # Publish the start time of the scan - if AV_SCAN_START_SNS_ARN not in [None, ""]: - start_scan_time = get_timestamp() - sns_start_scan(sns_client, s3_object, AV_SCAN_START_SNS_ARN, start_scan_time) - - file_path = get_local_path(s3_object, "/tmp") - create_dir(os.path.dirname(file_path)) - s3_object.download_file(file_path) - + s3 = boto3.resource("s3") to_download = clamav.update_defs_from_s3( s3_client, AV_DEFINITION_S3_BUCKET, AV_DEFINITION_S3_PREFIX ) @@ -233,13 +212,20 @@ def lambda_handler(event, context): print("Downloading definition file %s from s3://%s" % (local_path, s3_path)) s3.Bucket(AV_DEFINITION_S3_BUCKET).download_file(s3_path, local_path) print("Downloading definition file %s complete!" % (local_path)) - scan_result, scan_signature = clamav.scan_file(file_path) - print( - "Scan of s3://%s resulted in %s\n" - % (os.path.join(s3_object.bucket_name, s3_object.key), scan_result) - ) + +def remove_file(file_path): + try: + os.remove(file_path) + except OSError: + pass + + +def publish_results(s3_object, scan_result, scan_signature): result_time = get_timestamp() + sns_client = boto3.client("sns") + s3_client = boto3.client("s3") + ENV = os.getenv("ENV", "") # Set the properties on the object with the scan results if "AV_UPDATE_METADATA" in os.environ: set_av_metadata(s3_object, scan_result, scan_signature, result_time) @@ -259,11 +245,52 @@ def lambda_handler(event, context): metrics.send( env=ENV, bucket=s3_object.bucket_name, key=s3_object.key, status=scan_result ) - # Delete downloaded file to free up room on re-usable lambda function container + + +def lambda_handler(event, context): + s3 = boto3.resource("s3") + sns_client = boto3.client("sns") + + # Get some environment variables + EVENT_SOURCE = os.getenv("EVENT_SOURCE", "S3") + + start_time = get_timestamp() + print("Script starting at %s\n" % (start_time)) + s3_object = event_object(event, event_source=EVENT_SOURCE) + + if str_to_bool(AV_PROCESS_ORIGINAL_VERSION_ONLY): + verify_s3_object_version(s3, s3_object) + + # Publish the start time of the scan + if AV_SCAN_START_SNS_ARN not in [None, ""]: + start_scan_time = get_timestamp() + sns_start_scan(sns_client, s3_object, AV_SCAN_START_SNS_ARN, start_scan_time) + + file_path = get_local_path(s3_object, "/tmp") + create_dir(os.path.dirname(file_path)) try: - os.remove(file_path) - except OSError: - pass + s3_object.download_file(file_path) + except OSError as e: + remove_file(file_path) + if e.errno == 28: + print("Ran out of disk space. Scan failed") + publish_results(s3_object, AV_STATUS_FAILED, "File too large to scan") + return + else: + raise + + download_clamav_databases() + + scan_result, scan_signature = clamav.scan_file(file_path) + print( + "Scan of s3://%s resulted in %s\n" + % (os.path.join(s3_object.bucket_name, s3_object.key), scan_result) + ) + + publish_results(s3_object, scan_result, scan_signature) + + # Delete downloaded file to free up room on re-usable lambda function container + remove_file(file_path) if str_to_bool(AV_DELETE_INFECTED_FILES) and scan_result == AV_STATUS_INFECTED: delete_s3_object(s3_object) stop_scan_time = get_timestamp() diff --git a/scripts/run-scan-lambda b/scripts/run-scan-lambda index c70e1e41..c5c9363b 100755 --- a/scripts/run-scan-lambda +++ b/scripts/run-scan-lambda @@ -35,6 +35,7 @@ docker run --rm \ -e AV_SIGNATURE_METADATA \ -e AV_STATUS_CLEAN \ -e AV_STATUS_INFECTED \ + -e AV_STATUS_FAILED \ -e AV_STATUS_METADATA \ -e AV_STATUS_SNS_ARN \ -e AV_STATUS_SNS_PUBLISH_CLEAN \