<a href="https://colab.research.google.com/github/amitchug/ALMlops/blob/main/Invoke_SageMaker_Endpoint.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Invoke SageMaker Endpoint

In [None]:
!pip -q install boto3      # Python SDK for interacting with AWS services

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m88.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.2/83.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import boto3

In [None]:
# Create Access key, and secret access key

import os
from google.colab import userdata

os.environ['AWS_ACCESS_KEY_ID'] = userdata.get('access_key')
os.environ['AWS_SECRET_ACCESS_KEY'] = userdata.get('secret_key')

# Root user & IAM user (specify policies )

# Create IAM User -> Create access keys


In [None]:
import boto3
import pandas as pd

client = boto3.client("runtime.sagemaker", region_name='ap-south-1')    # This initializes a boto3 client that allows you to interact with
                                                                        # the Amazon SageMaker runtime service. The service is used to perform
                                                                        # actions like invoking an endpoint for model inference.

body = pd.DataFrame(
    [[1, 'female', 30, 8.05, 'S', 3, 1, 'Mrs']]            # Features that will be passed to the model for prediction.
).to_csv(header=False, index=False).encode("utf-8")        # .encode("utf-8"): The resulting CSV string is encoded to UTF-8.
                                                           # This is necessary because SageMaker expects the data to be passed in bytes format.


# The below method is used to invoke an endpoint deployed in Amazon SageMaker to perform inference (predicting results) based on the model.
response = client.invoke_endpoint(
    EndpointName="canvas-titanic-deployment",
    ContentType="text/csv",                      # Specifies the type of input data being sent to the endpoint. In this case, it is in CSV format.
    Body=body,
    Accept="application/json"                    # Specifies that the response from the endpoint should be in JSON format.
)

In [None]:
response

{'ResponseMetadata': {'RequestId': '02986b11-194c-4925-a615-7cce8902a337',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '02986b11-194c-4925-a615-7cce8902a337',
   'x-amzn-invoked-production-variant': 'canvas-model-variant-2024-12-21-06-20-17-052055',
   'date': 'Sat, 21 Dec 2024 07:02:38 GMT',
   'content-type': 'application/json',
   'content-length': '165',
   'connection': 'keep-alive'},
  'RetryAttempts': 0},
 'ContentType': 'application/json',
 'InvokedProductionVariant': 'canvas-model-variant-2024-12-21-06-20-17-052055',
 'Body': <botocore.response.StreamingBody at 0x7870fffbb610>}

In [None]:
output = response['Body'].read().decode('utf-8')
output

'{"predictions": [{"predicted_label": "1", "probability": 0.9447951912879944, "probabilities": "[0.055204808712005615, 0.9447951912879944]", "labels": "[\'0\', \'1\']"}]}'

In [None]:
import json
output = json.loads(output)
output

{'predictions': [{'predicted_label': '1',
   'probability': 0.9447951912879944,
   'probabilities': '[0.055204808712005615, 0.9447951912879944]',
   'labels': "['0', '1']"}]}

In [None]:
pred_label = ["No", "Yes"][int(output['predictions'][0]['predicted_label'])]
pred_label

'Yes'

# Gradio Implementation

In [None]:
!pip -q install gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.2/57.2 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.4/320.4 kB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.2/11.2 MB[0m [31m56.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import gradio
import gradio as gr

In [None]:

# UI - Input components
in_Title = gradio.Radio(["Mrs", "Master", "Miss", "Mr", "Other"], type="value", label='Title')
in_Pclass = gradio.Radio([1, 2, 3], type="value", label='Passenger class')
in_Sex = gradio.Radio(["male", "female"], type="value", label='Gender')
in_Age = gradio.Textbox(lines=1, placeholder=None, value="14", label='Age of the passenger in yrs')
in_Fare = gradio.Textbox(lines=1, placeholder=None, value="29", label='Passenger fare')
in_Embarked = gradio.Radio(["Southampton", "Cherbourg", "Queenstown"], type="value", label='Port of Embarkation')
in_FamilySize = gradio.Textbox(lines=1, placeholder=None, value="0", label='Family Size')
in_Has_cabin = gradio.Radio(["No", "Yes"], type="index", label='Has Cabin')


# UI - Output component
out_label = gradio.Textbox(type="text", label='Survived', elem_id="out_textbox")


# Label prediction function
def get_output_label(in_Pclass, in_Sex, in_Age, in_Fare, in_Embarked, in_FamilySize, in_Has_cabin, in_Title):

    body = pd.DataFrame(
        [[in_Pclass, in_Sex, in_Age, in_Fare, in_Embarked[0], in_FamilySize, in_Has_cabin, in_Title]]
    ).to_csv(header=False, index=False).encode("utf-8")

    response = client.invoke_endpoint(
        EndpointName="canvas-titanic-deployment",
        ContentType="text/csv",
        Body=body,
        Accept="application/json"
    )

    output = response['Body'].read().decode('utf-8')
    output = json.loads(output)
    pred_label = ["No", "Yes"][int(output['predictions'][0]['predicted_label'])]

    return pred_label


# Create Gradio interface object
iface = gradio.Interface(fn = get_output_label,
                         inputs = [in_Pclass, in_Sex, in_Age, in_Fare, in_Embarked, in_FamilySize, in_Has_cabin, in_Title],
                         outputs = [out_label],
                         title="Titanic Survival Prediction API ⛴",
                         description="Predictive model that answers the question: “What sort of people were more likely to survive?”",
                         flagging_mode='never'
                         )

# Launch gradio interface
iface.launch()           # set debug=True for debugging.
                         # set server_name = "0.0.0.0" and server_port = 7860 while launching it inside container.
                         # default server_name = "127.0.0.1", and server_port = 7860

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d6c0661d0fcc34c800.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


