In [2]:
import sagemaker
from sagemaker import get_execution_role
import json
import boto3

ENDPOINT_NAME = "test4"

sess = sagemaker.Session()
runtime = boto3.Session().client('runtime.sagemaker')
role = get_execution_role()

# Using via Endpoint API

In this part we will see how to simply query the API with JSON format

In [3]:
potential_phishing1 = "Dear Info, Please confirm account password to enable a better service communication, and avoid mail delivery malfunction."
potential_phishing2 = "Major Update: General Availability Based on your feedback, we’re making some updates to the plan for users to receive helpful product training and tips via email. Thank you for taking time to share your thoughts. We want to take time to review your suggestions, so we are pausing the release of this feature. How does this affect me? There will be no impact at this time. We will provide information on an updated plan via a new Message Center post. What do I need to do to prepare for this change? There is no need to take any..."
payload = [potential_phishing1, potential_phishing2]
print "Request:\n", payload
response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                   ContentType='application/json',
                                   Body=json.dumps(payload))
result = json.loads(response['Body'].read().decode())
print "Response:\n", result

Request:
['Dear Info, Please confirm account password to enable a better service communication, and avoid mail delivery malfunction.', 'Major Update: General Availability Based on your feedback, we\xe2\x80\x99re making some updates to the plan for users to receive helpful product training and tips via email. Thank you for taking time to share your thoughts. We want to take time to review your suggestions, so we are pausing the release of this feature. How does this affect me? There will be no impact at this time. We will provide information on an updated plan via a new Message Center post. What do I need to do to prepare for this change? There is no need to take any...']
Response:
[{u'probability': u'1.00', u'label': [u'__label__malicious']}, {u'probability': u'1.00', u'label': [u'__label__other']}]


# Using with Batch Transform

To use demisto phishing classifier via Batch Transform, you will need to create a JSON file contains the email contents, and preform the transform job. Here is a simple example for that:

In [None]:
import boto3
import sagemaker
import json, os

BUCKET_NAME = 'demisto-sagemaker-phishing-ml'
JSON_FILE_INPUT_KEY = 'input/sample.json'
MODEL_NAME = '25e48e3b-65d9-438b-82b8-ae2864a0892b'
OUTPUT_DIRECTORY = 'output'

input_location = 's3://{}/{}'.format(BUCKET_NAME, JSON_FILE_INPUT_KEY)
output_location = 's3://{}/{}'.format(BUCKET_NAME, OUTPUT_DIRECTORY)

# Initialize s3 client
s3_client = boto3.client('s3')

# Initialize the transformer object
transformer =sagemaker.transformer.Transformer(
    base_transform_job_name='Batch-Transform',
    model_name=MODEL_NAME,
    instance_count=1,
    instance_type='ml.c4.xlarge',
    output_path=output_location
    )

# To start a transform job
transformer.transform(input_location, content_type='application/json', split_type='Line')
# Then wait until transform job is completed
transformer.wait()

# To fetch validation result 
input_file_basename = os.path.basename(JSON_FILE_INPUT_KEY)
s3_client.download_file(BUCKET_NAME, '{}/{}.out'.format(OUTPUT_DIRECTORY, input_file_basename), 'valid-result')
with open('valid-result') as f:
    results = f.readlines()   
print("Sample transform result: {}".format(results[0]))


INFO:sagemaker:Creating transform job with name: Batch-Transform-2018-11-08-10-24-45-835


......................

# Preform prediction to an EML file

In this part we will see an example of how to handle multiple email files (EML foramt).
We will read the EML files from s3, extract email subjcet and body with mail-parser package, and query the API.

In [None]:
try:
    import mailparser
except Exception:
    !pip install mail-parser

In [None]:
import boto3

BUCKET_NAME = 'demisto-sagemaker-phishing-ml'
EMLS_FOLDER_KEY = 'emls'

# Get all eml keys from s3
emls_keys = []
s3_client = boto3.client('s3')
paginator = s3_client.get_paginator('list_objects_v2')
print "EML files:"
for page in paginator.paginate(Bucket=BUCKET_NAME, Prefix=EMLS_FOLDER_KEY):
    if "Contents" in page:
        for key in page["Contents"]:
            if key['Size'] > 0 and key['Key'].lower().endswith('eml'):
                print key["Key"]
                emls_keys.append(key["Key"])
            

In [None]:
def read_file_as_string(s3_client, bucket, object_key):
    filename = 'file.eml'
    s3_client.download_file(BUCKET_NAME, object_key, filename)
    with open(filename) as f:
        data = f.read()   
    return data

def get_email_as_text(raw_email_string):
    msg = mailparser.parse_from_string(raw_email_string)
    return msg.subject + ' ' + msg.body

eml_as_strings = map(lambda x: read_file_as_string(s3_client, BUCKET_NAME, x), emls_keys)
eml_as_texts = map(get_email_as_text, eml_as_strings)

In [None]:
def encode_to_json(s):
    return json.dumps(s, ensure_ascii=False).encode('utf-8', 'ignore')

# Query the endpoint
response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME,
                                   ContentType='application/json',
                                   Body=encode_to_json(email_texts))
result = json.loads(response['Body'].read().decode())
print "Response:\n", result