## Run config.sh script

In [None]:
# Download requirements

import subprocess

print("Running config.sh")
subprocess.run(["./config.sh"],  stdout = subprocess.DEVNULL)

print("DONE!")

## Init boto3 resources

In [None]:
import boto3
import sagemaker

sm_boto3 = boto3.client("sagemaker")

sess = sagemaker.Session()
role = sagemaker.get_execution_role()
region = sess.boto_session.region_name

bucket = sess.default_bucket()  # this could also be a hard-coded bucket name
print("Using bucket " + bucket)

In [None]:
# List S3 bucket objects

s3_boto = boto3.client("s3")
objects = s3_boto.list_objects_v2(Bucket=bucket)

print("Current files in bucket")
for obj in objects['Contents']:
    print(obj['Key'])
    


In [None]:
# Download data.
import tarfile 

print("Download data")
s3_boto.download_file(bucket, "data.tar.gz", "downloaded_data.tar.gz")

print("Extractintg data")
file = tarfile.open('downloaded_data.tar.gz')
file.extractall('./')


### Create keras model and predict sample

In [None]:
# Just an example to predict
import tensorflow as tf
import os
import pickle
import numpy as np
from sentence_transformers import SentenceTransformer

categorical_encoder_folder = "./data/categorical_encoders/"

subject = "Email subject"
sender_name = "test"
sender_email = "test@booking.com"
sender_domain = "booking.com"
description = "Example desription"
sender_string = "%s (%s)"%(sender_name, sender_email)
# Step: Encode(custom TK encoder) to represent providerse
sender_name_one_hot_encoder = pickle.load(open(categorical_encoder_folder + "sender_name_one_hot_encoder.pkl", "rb"))
sender_email_one_hot_encoder = pickle.load(open(categorical_encoder_folder + "sender_email_one_hot_encoder.pkl", "rb"))
sender_domain_one_hot_encoder = pickle.load(open(categorical_encoder_folder + "sender_domain_one_hot_encoder.pkl", "rb"))
input_categorical_fea = np.concatenate([
    sender_name_one_hot_encoder.transform(np.array(sender_name).reshape(-1, 1)),
    sender_email_one_hot_encoder.transform(np.array(sender_email).reshape(-1, 1)),
    sender_domain_one_hot_encoder.transform(np.array(sender_domain).reshape(-1, 1))
], axis=1)

# Step 2: Encode pure text(using google stuff)
transformer_path = "./data/distiluse-base-multilingual-cased-v2"
transformer = SentenceTransformer(transformer_path)

subject_fea = transformer.encode(subject).reshape(1, -1)
description_fea = transformer.encode(description).reshape(1, -1)
sender_fea = transformer.encode(sender_string).reshape(1, -1)

# Predict using the features.
model = tf.keras.models.load_model('./data/phishing_classifier_simple.h5')
# This has to be in the same order
pred = model.predict([subject_fea, description_fea, sender_fea, input_categorical_fea])
print("The possiblity of a phishing 0(phishing) -> 1(phishing)")
pred[0][0]

### Upload model to a compatible SageMaker model

In [None]:
import os.path
import tarfile

tf.saved_model.save(model, "./model/phishing/1")

artificate_upload_file_name = "model.tar.gz"

with tarfile.open('model.tar.gz', mode='w:gz') as archive:
    archive.add("./model/phishing")


client.upload_file(artificate_upload_file_name,
                  bucket,
                  artificate_upload_file_name)


## Deploy to a real-time endpoint

An `Estimator` could be deployed directly after training, with an `Estimator.deploy()` but here we showcase the more extensive process of creating a model from s3 artifacts, that could be used to deploy a model that was trained in a different session or even out of SageMaker.

In [None]:
%%time

from sagemaker.tensorflow import TensorFlowModel

model_data = 's3://sagemaker-eu-west-1-889192205753/model.tar.gz'

model = TensorFlowModel(
    entry_point="inference.py",
    source_dir="code",
    model_data=model_data,
    name=name_from_base("model"),
    framework_version="2.2.0",
    role="AmazonSageMaker-ExecutionRole-20230719T105672")

predictor = model.deploy(initial_instance_count=1, instance_type='ml.c5.xlarge')

### Alternative: invoke with `boto3`

In [None]:
runtime = boto3.client("sagemaker-runtime")

In [None]:
import json

payload = json.dumps({
})

response = runtime.invoke_endpoint(
    EndpointName=predictor.endpoint,
    Body=payload,
    ContentType="application/json",
)

print(response["Body"].read())

In [None]:
sm_boto3.delete_endpoint(EndpointName=predictor.endpoint)