## Initialise bucket & session

In [None]:
import boto3
from sagemaker import get_execution_role
import sagemaker

sm_boto3 = boto3.client("sagemaker")

sess = sagemaker.Session()

region = sess.boto_session.region_name

bucket = sess.default_bucket()  # this could also be a hard-coded bucket name
print("Using bucket " + bucket)

### Optional: Predict sample

In [14]:
# TODO Move me below.
import tensorflow as tf
import os
import pickle
from sentence_transformers import SentenceTransformer

categorical_encoder_folder = "categorical_encoders/"

subject = "Email subject"
sender_name = "test"
sender_email = "test@booking.com"
sender_domain = "booking.com"
description = "Example desription"
sender_string = "%s (%s)"%(sender_name, sender_email)

sender_name_one_hot_encoder = pickle.load(open(categorical_encoder_folder + "sender_name_one_hot_encoder.pkl", "rb"))
sender_email_one_hot_encoder = pickle.load(open(categorical_encoder_folder + "sender_email_one_hot_encoder.pkl", "rb"))
sender_domain_one_hot_encoder = pickle.load(open(categorical_encoder_folder + "sender_domain_one_hot_encoder.pkl", "rb"))

input_categorical_fea = np.concatenate([
    sender_name_one_hot_encoder.transform(np.array(sender_name).reshape(-1, 1)),
    sender_email_one_hot_encoder.transform(np.array(sender_email).reshape(-1, 1)),
    sender_domain_one_hot_encoder.transform(np.array(sender_domain).reshape(-1, 1))
], axis=1)


transformer_path = "./distiluse-base-multilingual-cased-v2"
transformer = SentenceTransformer(transformer_path)

subject_fea = transformer.encode(subject).reshape(1, -1)
description_fea = transformer.encode(description).reshape(1, -1)
sender_fea = transformer.encode(sender_string).reshape(1, -1)

model = tf.keras.models.load_model('./phishing_classifier_simple.h5')

pred = model.predict([subject_fea, description_fea, sender_fea, input_categorical_fea])
pred



array([[2.492732e-12]], dtype=float32)

## Writing a *Script Mode* script
The below script contains both training and inference functionality and can run both in SageMaker Training hardware or locally (desktop, SageMaker notebook, on prem, etc). Detailed guidance here https://sagemaker.readthedocs.io/en/stable/using_sklearn.html#preparing-the-scikit-learn-training-script

In [None]:
%%writefile script.py

import argparse
import joblib
import os

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor


# inference functions ---------------
def model_fn(model_dir):
    clf = tf.keras.models.load_model('phishing_model/phishing_classifier_simple.h5')
    # clf = joblib.load(os.path.join(model_dir, "model.joblib"))
    return clf


## Deploy to a real-time endpoint

### Deploy with Python SDK

An `Estimator` could be deployed directly after training, with an `Estimator.deploy()` but here we showcase the more extensive process of creating a model from s3 artifacts, that could be used to deploy a model that was trained in a different session or even out of SageMaker.

In [None]:
from sagemaker.sklearn.model import SKLearnModel

artifact = "https://sagemaker-eu-west-1-889192205753.s3.eu-west-1.amazonaws.com/phishing_classifier_simple.h5"

model = SKLearnModel(
    model_data=artifact,
    role=get_execution_role(),
    entry_point="script.py",
)

In [None]:
predictor = model.deploy(instance_type="ml.c5.large", initial_instance_count=1)

### Alternative: invoke with `boto3`

In [None]:
runtime = boto3.client("sagemaker-runtime")

In [None]:
# csv serialization
response = runtime.invoke_endpoint(
    EndpointName=predictor.endpoint,
    Body=testX[data.feature_names].to_csv(header=False, index=False).encode("utf-8"),
    ContentType="text/csv",
)

print(response["Body"].read())

In [None]:
sm_boto3.delete_endpoint(EndpointName=predictor.endpoint)