# Imports

#### Update SageMaker

In [2]:
!pip install -U sagemaker --quiet

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


## Import Libraries

In [45]:
import json
import boto3
import pickle
import sagemaker as sm

from sagemaker import image_uris
from sagemaker import model_uris
from sagemaker import script_uris
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base

from sagemaker.jumpstart.notebook_utils import list_jumpstart_models

### SageMaker Configuration Parameters

In [22]:
role               = sm.get_execution_role()
sagemaker_session  = sm.session.Session()
region             = sagemaker_session._region_name
sagemaker_metadata = json.load(open('/opt/ml/metadata/resource-metadata.json', 'r'))
domain_id          = sagemaker_metadata['DomainId']
resource_name      = sagemaker_metadata['ResourceName']

# Model

## Select Model

In [7]:
filter_value = "task == text2text"
text_generation_models = list_jumpstart_models(filter=filter_value)
print('Available text2text Models:\n--------------------------------')
_ = [print(m) for m in text_generation_models]

Available text2text Models:
--------------------------------
huggingface-text2text-bart4csc-base-chinese
huggingface-text2text-bigscience-t0pp
huggingface-text2text-bigscience-t0pp-bnb-int8
huggingface-text2text-bigscience-t0pp-fp16
huggingface-text2text-flan-t5-base
huggingface-text2text-flan-t5-base-samsum
huggingface-text2text-flan-t5-large
huggingface-text2text-flan-t5-small
huggingface-text2text-flan-t5-xl
huggingface-text2text-flan-t5-xxl
huggingface-text2text-flan-t5-xxl-bnb-int8
huggingface-text2text-flan-t5-xxl-fp16
huggingface-text2text-flan-ul2-bf16
huggingface-text2text-pegasus-paraphrase
huggingface-text2text-qcpg-sentences
huggingface-text2text-t5-one-line-summary


In [23]:
model_id = text_generation_models[7]
model_version = '*'
print(f'The model that will be deployed is: {model_id}')

The model that will be deployed is: huggingface-text2text-flan-t5-small


## Deploy Model

In [24]:
endpoint_name = name_from_base(f"LLM-in-a-box-{model_id}")
print(f'Endpoint: {endpoint_name}')

Endpoint: LLM-in-a-box-huggingface-text2text-flan-2023-05-03-18-12-21-023


### Collect the containers required to deploy the model

In [51]:
instance_type = "ml.p2.xlarge"

image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=instance_type,
)

source_dir = script_uris.retrieve(
    model_id=model_id,
    model_version=model_version,
    script_scope="inference"
)

model_data = model_uris.retrieve(
    model_id=model_id,
    model_version=model_version,
    model_scope="inference"
)

### Define Model

In [26]:
model = Model(
    image_uri=image_uri,
    source_dir=source_dir,
    model_data=model_data,
    entry_point="inference.py",  # entry point file in source_dir and present in deploy_source_uri
    role=role,
    predictor_cls=Predictor,
    name=endpoint_name,
    sagemaker_session=sagemaker_session,
)

# deploy the Model. Note that we need to pass Predictor class when we deploy model through Model class,
# for being able to run inference through the sagemaker API.
model_predictor = model.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    predictor_cls=Predictor,
    endpoint_name=endpoint_name,
)

-----------!

#### Test that the model is correctly deployed

In [32]:
sagemaker = boto3.client('sagemaker-runtime', region_name=region)
input_question = 'The World Cup is '

response = sagemaker.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/x-text",
    Body=input_question.encode('utf-8')
)
output_answer = json.loads(response['Body'].read().decode('utf-8'))["generated_text"]
print(output_answer)

the first time the world cup has been held in the world.


### Export any variables needed for the application

In [48]:
application_metadata = {'endpoint_name':endpoint_name, 'region':region}
pickle.dump(application_metadata, open('application_metadata.pkl', 'wb'))

# Create Question-Answer Interface
This cell creates the UI that allows users to query a model with a question and displays the resulting generated text.

In [50]:
%%writefile app.py
import os
import json
import boto3
import pickle
import streamlit as st
from collections import deque
from datetime import datetime

# 
application_metadata = pickle.load(open('application_metadata.pkl','rb'))
sagemaker = boto3.client('sagemaker-runtime', region_name=application_metadata['region'])
endpoint_name = application_metadata['endpoint_name']

# 
st.title('Amazon SageMaker + Generative AI (QnA)')
st.write('This demo uses a simple UI to provide access to SageMaker JumpStart and Foundation Models. Type in a question you are itersted in learning more about abd see what is returned. The most recent questions are displayed below.')

# 
input_question = st.text_input('**Please ask a question:**', '')

# 
previous_questions_filename = 'previous_questions.txt'
if os.path.isfile(previous_questions_filename):
    with open(previous_questions_filename, 'r') as fp:
        previous_questions = deque([line.strip() for line in fp.readlines()], maxlen=10)
else:
    previous_questions = deque(maxlen=10)

# 
if st.button('Send Question') and len(input_question) > 3:
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
    payload = {
        "text_inputs": input_question,
        "max_length": 150,
        "max_time": 50,
        "num_return_sequences": 1,
        "top_k": 50,
        "top_p": 0.95,
        "do_sample": True,
    }
    
    if True:
        response = sagemaker.invoke_endpoint(
            EndpointName=endpoint_name,
            ContentType='application/json',
            Body=json.dumps(payload).encode('utf-8')
        )
        output_answer = json.loads(response['Body'].read().decode('utf-8'))["generated_texts"][0]
    else:
        output_answer = 'I am not currently connected to an endpoint'
    st.text_area('Response:', output_answer)
    
    # 
    previous_questions.append('**' + timestamp + ':** ' + input_question)
    with open('previous_questions.txt', 'w') as fp:
        fp.write('\n'.join(previous_questions))

# 
st.subheader('Recent Questions:')
_ = [st.write(f'{q}') for i,q in enumerate(reversed(previous_questions))]

Overwriting app.py


# Start App

### Run Streamlit
To run the application:
1. Select File > New > Terminal
2. In the terminal, use the command: `streamlit run app.py --server.runOnSave true`
3. If this is successful, you will be able to interact with the app by using the web address below

#### Display Link to Application

In [13]:
print(f'http://{domain_id}.studio.us-east-1.sagemaker.aws/jupyter/default/proxy/8501/')

http://d-qxdwe39zkab0.studio.us-east-1.sagemaker.aws/jupyter/default/proxy/8501/
