## Run config.sh script

In [None]:
# Download requirements

import subprocess

print("Running config.sh")
subprocess.run(["./config.sh"],  stdout = subprocess.DEVNULL)

print("DONE!")

## Init boto3 resources

In [None]:
import boto3
from sagemaker import get_execution_role
import sagemaker

sm_boto3 = boto3.client("sagemaker")

sess = sagemaker.Session()

region = sess.boto_session.region_name

bucket = sess.default_bucket()  # this could also be a hard-coded bucket name
print("Using bucket " + bucket)

In [None]:
# List S3 bucket objects

s3_boto = boto3.client("s3")
objects = s3_boto.list_objects_v2(Bucket=bucket)

print("Current files in bucket")
for obj in objects['Contents']:
    print(obj['Key'])
    


In [None]:
# Download data.
import tarfile 

print("Download data")
s3_boto.download_file(bucket, "data.tar.gz", "downloaded_data.tar.gz")

print("Extractintg data")
file = tarfile.open('downloaded_data.tar.gz')
file.extractall('./')


### Optional: Predict sample

In [None]:
# Just an example to predict
import tensorflow as tf
import os
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer

categorical_encoder_folder = "./data/categorical_encoders/"

subject = "Email subject"
sender_name = "test"
sender_email = "test@booking.com"
sender_domain = "booking.com"
description = "Example desription"
sender_string = "%s (%s)"%(sender_name, sender_email)

sender_name_one_hot_encoder = pickle.load(open(categorical_encoder_folder + "sender_name_one_hot_encoder.pkl", "rb"))
sender_email_one_hot_encoder = pickle.load(open(categorical_encoder_folder + "sender_email_one_hot_encoder.pkl", "rb"))
sender_domain_one_hot_encoder = pickle.load(open(categorical_encoder_folder + "sender_domain_one_hot_encoder.pkl", "rb"))

input_categorical_fea = np.concatenate([
    sender_name_one_hot_encoder.transform(np.array(sender_name).reshape(-1, 1)),
    sender_email_one_hot_encoder.transform(np.array(sender_email).reshape(-1, 1)),
    sender_domain_one_hot_encoder.transform(np.array(sender_domain).reshape(-1, 1))
], axis=1)


transformer_path = "./data/distiluse-base-multilingual-cased-v2"
transformer = SentenceTransformer(transformer_path)

subject_fea = transformer.encode(subject).reshape(1, -1)
description_fea = transformer.encode(description).reshape(1, -1)
sender_fea = transformer.encode(sender_string).reshape(1, -1)

model = tf.keras.models.load_model('./data/phishing_classifier_simple.h5')

pred = model.predict([subject_fea, description_fea, sender_fea, input_categorical_fea])
pred

## Writing a *Script Mode* script

In [None]:
%%writefile script.py

import argparse
import joblib
import os

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor


# inference functions ---------------
def model_fn(model_dir):
    clf = tf.keras.models.load_model('./data/phishing_classifier_simple.h5')
    # clf = joblib.load(os.path.join(model_dir, "model.joblib"))
    return clf


## Deploy to a real-time endpoint

An `Estimator` could be deployed directly after training, with an `Estimator.deploy()` but here we showcase the more extensive process of creating a model from s3 artifacts, that could be used to deploy a model that was trained in a different session or even out of SageMaker.

In [None]:
from sagemaker.sklearn.model import SKLearnModel

artifact = "https://sagemaker-eu-west-1-889192205753.s3.eu-west-1.amazonaws.com/phishing_classifier_simple.h5"

model = SKLearnModel(
    model_data=artifact,
    role=get_execution_role(),
    entry_point="script.py",
)

In [None]:
predictor = model.deploy(instance_type="ml.c5.large", initial_instance_count=1)

### Alternative: invoke with `boto3`

In [None]:
runtime = boto3.client("sagemaker-runtime")

In [None]:
# csv serialization
response = runtime.invoke_endpoint(
    EndpointName=predictor.endpoint,
    Body=testX[data.feature_names].to_csv(header=False, index=False).encode("utf-8"),
    ContentType="text/csv",
)

print(response["Body"].read())

In [None]:
sm_boto3.delete_endpoint(EndpointName=predictor.endpoint)