# Creating requirements.txt file to install missing dependencies on Sagemaker container

In [30]:
%%writefile requirements.txt
joblib
scipy
numpy
scikit-learn

Overwriting requirements.txt


# Saving in Models and Ziping them

In [31]:
!mkdir -p opt/ml/model  
!cp model.pkl opt/ml/model/model.pkl
!cp tfidf_vectorizer.pkl opt/ml/model/tfidf_vectorizer.pkl

# Writing Inference Script
This script process the input data on Sagemaker end and returns the predictions

In [32]:
%%writefile inference.py
import joblib
import os
import json
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import pickle
import boto3

"""
Deserialize fitted model
"""
def model_fn(model_dir):
    model = joblib.load(os.path.join(model_dir, "model.pkl"))
    return model

"""
input_fn
    request_body: The body of the request sent to the model.
    request_content_type: (string) specifies the format/variable type of the request
"""
def input_fn(request_body, request_content_type):
    if request_content_type == 'application/json':
        input_data = json.loads(request_body)
        return input_data
    else:
        raise ValueError("This model only supports application/json input")

"""
predict_fn
    input_data: returned data from input_fn above
    model (sklearn model) returned model loaded from model_fn above
"""
def predict_fn(input_data, model):
    # Process the input data if necessary
    processed_data = process_input(input_data, model)
    # Make predictions using the model
    predictions = model.predict(processed_data)
    return predictions

def process_input(input_data, model):
    # Process input data as needed before passing to the model for prediction
    X = input_data['url']
    vectorizer = joblib.load(os.path.join("opt/ml/model", "tfidf_vectorizer.pkl"))
    X_vect = vectorizer.transform(X)
    return X_vect

"""
output_fn
    prediction: the returned value from predict_fn above
    content_type: the content type the endpoint expects to be returned. Ex: JSON, string
"""
def output_fn(prediction, content_type):
    prediction_str = prediction[0]
    response = {"type": prediction_str}
    return json.dumps(response)


Overwriting inference.py


In [34]:
!tar -cvpzf model.tar.gz model.pkl inference.py tfidf_vectorizer.pkl requirements.txt opt

model.pkl
inference.py
tfidf_vectorizer.pkl
requirements.txt
opt/
opt/ml/
opt/ml/model/
opt/ml/model/model.pkl
opt/ml/model/tfidf_vectorizer.pkl


# Deployment
Importing libraries

In [35]:
import boto3
import json
import os
import joblib
import pickle
import tarfile
import sagemaker
from sagemaker.estimator import Estimator
import time
from time import gmtime, strftime
import subprocess

# Setting up the boto 3

In [36]:
#Setup
client = boto3.client(service_name="sagemaker")
runtime = boto3.client(service_name="sagemaker-runtime")
boto_session = boto3.session.Session()
s3 = boto_session.resource('s3')
region = boto_session.region_name
print(region)
sagemaker_session = sagemaker.Session()
role = "arn:aws:iam::502575812615:role/LabRole"

us-east-1


# Retrieving a SKLearn image uri

In [37]:
# retrieve sklearn image
image_uri = sagemaker.image_uris.retrieve(
    framework="sklearn",
    region=region,
    version="1.2-1",
    py_version="py3",
    instance_type="ml.t2.medium",
)

# Importing the previously zipped model from S3 Bucket(I have uploaded it manually)

In [38]:
#Bucket for model artifacts
default_bucket = sagemaker_session.default_bucket()
print(default_bucket)

#Upload tar.gz to bucket
model_artifacts = f"s3://{default_bucket}/model.tar.gz"
response = s3.meta.client.upload_file('model.tar.gz', default_bucket, 'model.tar.gz')

sagemaker-us-east-1-502575812615


# Creating model

In [39]:
#Step 1: Model Creation
model_name = "sklearn-test" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print("Model name: " + model_name)
create_model_response = client.create_model(
    ModelName=model_name,
    Containers=[
        {
            "Image": image_uri,
            "Mode": "SingleModel",
            "ModelDataUrl": model_artifacts,
            "Environment": {'SAGEMAKER_SUBMIT_DIRECTORY': model_artifacts,
                           'SAGEMAKER_PROGRAM': 'inference.py'} 
        }
    ],
    ExecutionRoleArn=role,
)
print("Model Arn: " + create_model_response["ModelArn"])

Model name: sklearn-test2024-05-01-18-14-01
Model Arn: arn:aws:sagemaker:us-east-1:502575812615:model/sklearn-test2024-05-01-18-14-01


# Configuring the Endpoint

In [40]:
#Step 2: EPC Creation
sklearn_epc_name = "sklearn-epc" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
endpoint_config_response = client.create_endpoint_config(
    EndpointConfigName=sklearn_epc_name,
    ProductionVariants=[
        {
            "VariantName": "sklearnvariant",
            "ModelName": model_name,
            "InstanceType": "ml.t2.medium",
            "InitialInstanceCount": 1
        },
    ],
)
print("Endpoint Configuration Arn: " + endpoint_config_response["EndpointConfigArn"])

Endpoint Configuration Arn: arn:aws:sagemaker:us-east-1:502575812615:endpoint-config/sklearn-epc2024-05-01-18-14-03


Creating Endpoint on Sagemaker

In [41]:
#Step 3: EP Creation
endpoint_name = "sklearn-local-ep" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
create_endpoint_response = client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=sklearn_epc_name,
)
print("Endpoint Arn: " + create_endpoint_response["EndpointArn"])

Endpoint Arn: arn:aws:sagemaker:us-east-1:502575812615:endpoint/sklearn-local-ep2024-05-01-18-14-07


# Monitoring code snippet of deployment

In [42]:
#Monitor creation
describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)
while describe_endpoint_response["EndpointStatus"] == "Creating":
    describe_endpoint_response = client.describe_endpoint(EndpointName=endpoint_name)
    print(describe_endpoint_response["EndpointStatus"])
    time.sleep(15)
print(describe_endpoint_response)

Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
Creating
InService
{'EndpointName': 'sklearn-local-ep2024-05-01-18-14-07', 'EndpointArn': 'arn:aws:sagemaker:us-east-1:502575812615:endpoint/sklearn-local-ep2024-05-01-18-14-07', 'EndpointConfigName': 'sklearn-epc2024-05-01-18-14-03', 'ProductionVariants': [{'VariantName': 'sklearnvariant', 'DeployedImages': [{'SpecifiedImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3', 'ResolvedImage': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn@sha256:f2af579f989b682271f6f791aa44b6a755d08003d9c8f81be16e4a1e93141291', 'ResolutionTime': datetime.datetime(2024, 5, 1, 18, 14, 8, 23000, tzinfo=tzlocal())}], 'CurrentWeight': 1.0, 'DesiredWeight': 1.0, 'CurrentInstanceCount': 1, 'DesiredInstanceCount': 1}], 'EndpointStatus': 'InService', 'CreationTime': datetime.datetime(2024, 5, 1,

# Testing the Endpoint is working or not?

In [43]:
import json
import boto3

# Initialize the SageMaker runtime client
runtime = boto3.client('sagemaker-runtime')

# Define your input data
"""vanderbilt.rivals.com/viewcoach.asp?coach=2079&sport=1&year=2011	benign
http://peluqueriadeautor.com/index.php?option=com_virtuemart&page=shop.browse&category_id=31&Itemid=70	defacement
movies.yahoo.com/shop?d=hv&cf=info&id=1800340831	benign
cyndislist.com/us/pa/counties	benign
http://www.824555.com/app/member/SportOption.php?uid=guest&langx=gb	malware
http://www.raci.it/component/user/reset.html	defacement
https://docs.google.com/spreadsheet/viewform?formkey=dGg2Z1lCUHlSdjllTVNRUW50TFIzSkE6MQ	phishing
psychology.wikia.com/wiki/Phonemes	benign
"""
input_data = {
    'url': ["http://peluqueriadeautor.com/index.php?option=com_virtuemart&page=shop.browse&category_id=31&Itemid=70"]
}

# Convert input data to JSON string
payload = json.dumps(input_data)

# Specify the endpoint name
endpoint_name = 'sklearn-local-ep2024-05-01-18-14-07'

# Call the endpoint
response = runtime.invoke_endpoint(EndpointName=endpoint_name,
                                   ContentType='application/json',
                                   Body=payload)

# Decode and print the response
result = json.loads(response['Body'].read().decode())
print(result)

{'type': 'defacement'}
