## Run config.sh script

In [None]:
# Download requirements

import subprocess

print("Running config.sh")
subprocess.run(["./config.sh"],  stdout = subprocess.DEVNULL)

print("DONE!")

## Init boto3 resources

In [None]:
import boto3
from sagemaker import get_execution_role
import sagemaker

sm_boto3 = boto3.client("sagemaker")

sess = sagemaker.Session()

region = sess.boto_session.region_name

bucket = sess.default_bucket()  # this could also be a hard-coded bucket name
print("Using bucket " + bucket)

In [None]:
# List S3 bucket objects

s3_boto = boto3.client("s3")
objects = s3_boto.list_objects_v2(Bucket=bucket)

print("Current files in bucket")
for obj in objects['Contents']:
    print(obj['Key'])
    


In [None]:
# Download data.
import tarfile 

print("Download data")
s3_boto.download_file(bucket, "data.tar.gz", "downloaded_data.tar.gz")

print("Extractintg data")
file = tarfile.open('downloaded_data.tar.gz')
file.extractall('./')


### Predict Example

In [None]:
# Just an example to predict
import tensorflow as tf
import os
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer

categorical_encoder_folder = "./data/categorical_encoders/"

subject = "Email subject"
sender_name = "test"
sender_email = "test@booking.com"
sender_domain = "booking.com"
description = "Example desription"
sender_string = "%s (%s)"%(sender_name, sender_email)
# Step: Encode(custom TK encoder) to represent providerse
sender_name_one_hot_encoder = pickle.load(open(categorical_encoder_folder + "sender_name_one_hot_encoder.pkl", "rb"))
sender_email_one_hot_encoder = pickle.load(open(categorical_encoder_folder + "sender_email_one_hot_encoder.pkl", "rb"))
sender_domain_one_hot_encoder = pickle.load(open(categorical_encoder_folder + "sender_domain_one_hot_encoder.pkl", "rb"))
input_categorical_fea = np.concatenate([
    sender_name_one_hot_encoder.transform(np.array(sender_name).reshape(-1, 1)),
    sender_email_one_hot_encoder.transform(np.array(sender_email).reshape(-1, 1)),
    sender_domain_one_hot_encoder.transform(np.array(sender_domain).reshape(-1, 1))
], axis=1)

# Step 2: Encode pure text(using google stuff)
transformer_path = "./data/distiluse-base-multilingual-cased-v2"
transformer = SentenceTransformer(transformer_path)

subject_fea = transformer.encode(subject).reshape(1, -1)
description_fea = transformer.encode(description).reshape(1, -1)
sender_fea = transformer.encode(sender_string).reshape(1, -1)

# Predict using the features.
model = tf.keras.models.load_model('./data/phishing_classifier_simple.h5')
# This has to be in the same order
pred = model.predict([subject_fea, description_fea, sender_fea, input_categorical_fea])
print("The possiblity of a phishing 0(phishing) -> 1(phishing)")
pred[0][0]

In [None]:
import os.path
import tarfile

saved_model = model.to_json()

# Write JSON object to S3 as "keras-model.json"
client = boto3.client('s3')
client.put_object(Body=saved_model,
                  Bucket=bucket,
                  Key='tk-phishing-model-v1.json')

artificate_upload_file_name = "tk-phishing-model-v1-artifacts.tar.gz"

with tarfile.open(artificate_upload_file_name, "w:gz") as tar:
    source_dir = "./data/phishing_classifier_simple.h5"
    tar.add(source_dir, arcname=os.path.basename(source_dir))

client.upload_file('tk-phishing-model-v1-artifacts.tar.gz',
                  bucket,
                  'tk-phishing-model-v1-artifacts.tar.gz')


## Writing a *Script Mode* script

In [None]:
%%writefile script.py

import boto3


# inference functions ---------------
def model_fn(model_dir):
    bucket = "sagemaker-eu-west-1-889192205753" # hardcoded
    s3_boto.download_file(bucket, "tk-phishing-model-v1.json", "tk-phishing-model-v1.json")

    # Read the downloaded JSON file
    with open('tk-phishing-model-v1.json', 'r') as model_file:
       loaded_model = model_file.read()

    clf = model_from_json(loaded_model)
    return clf


## Deploy to a real-time endpoint

An `Estimator` could be deployed directly after training, with an `Estimator.deploy()` but here we showcase the more extensive process of creating a model from s3 artifacts, that could be used to deploy a model that was trained in a different session or even out of SageMaker.

In [None]:
from sagemaker.tensorflow import TensorFlowModel

model = TensorFlowModel(
    model_data='https://sagemaker-eu-west-1-889192205753.s3.eu-west-1.amazonaws.com/tk-phishing-model-v1-artifacts.tar.gz',
    framework_version="2.13",
    role='AmazonSageMaker-ExecutionRole-20230719T105672')

predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge')

In [None]:
predictor = model.deploy(instance_type="ml.c5.large", initial_instance_count=1)

### Alternative: invoke with `boto3`

In [None]:
runtime = boto3.client("sagemaker-runtime")

In [None]:
# csv serialization
response = runtime.invoke_endpoint(
    EndpointName=predictor.endpoint,
    Body=testX[data.feature_names].to_csv(header=False, index=False).encode("utf-8"),
    ContentType="text/csv",
)

print(response["Body"].read())

In [None]:
sm_boto3.delete_endpoint(EndpointName=predictor.endpoint)