# Where to use this app
1. Create a SageMaker Studio Domain if you don't have one
2. Open SageMaker Studio under the user you plan to launch this applicatio
3. Either upload this notebook, or clone the repository: [repo](https://github.com/chaeAclark/literate-eureka.git)
4. Open the notebook `LLM in a box.ipynb`
5. You can run the entire notebook by clicking Run > Run All Cells
6. Alternatively, you can run the cells individually

# Imports

#### Update SageMaker

In [2]:
!pip install -U sagemaker --quiet

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


## Import Libraries

In [3]:
import os
import json
import boto3
import pickle
import sagemaker as sm

from sagemaker import image_uris
from sagemaker import model_uris
from sagemaker import script_uris
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base

from sagemaker.jumpstart.notebook_utils import list_jumpstart_models

### Setup a Temporary Directory 

In [4]:
def get_sagemaker_session(local_download_dir) -> sm.Session:
    sagemaker_client = boto3.client(service_name="sagemaker", region_name=boto3.Session().region_name)
    session_settings = sm.session_settings.SessionSettings(local_download_dir=local_download_dir)
    session = sm.session.Session(sagemaker_client=sagemaker_client, settings=session_settings)
    return session

In [5]:
model_path = './download_dir'
if not os.path.exists(model_path):
    os.mkdir(model_path)

### SageMaker Configuration Parameters

In [6]:
role               = sm.get_execution_role()
sagemaker_session  = get_sagemaker_session(model_path) # sm.session.Session()
region             = sagemaker_session._region_name
sagemaker_metadata = json.load(open('/opt/ml/metadata/resource-metadata.json', 'r'))
domain_id          = sagemaker_metadata['DomainId']
resource_name      = sagemaker_metadata['ResourceName']

# Model

## Select Model

In [7]:
filter_value = "task == text2text"
text_generation_models = list_jumpstart_models(filter=filter_value)
print('Available text2text Models:\n--------------------------------')
_ = [print(m) for m in text_generation_models]

Available text2text Models:
--------------------------------
huggingface-text2text-bart4csc-base-chinese
huggingface-text2text-bigscience-t0pp
huggingface-text2text-bigscience-t0pp-bnb-int8
huggingface-text2text-bigscience-t0pp-fp16
huggingface-text2text-flan-t5-base
huggingface-text2text-flan-t5-base-samsum
huggingface-text2text-flan-t5-large
huggingface-text2text-flan-t5-small
huggingface-text2text-flan-t5-xl
huggingface-text2text-flan-t5-xxl
huggingface-text2text-flan-t5-xxl-bnb-int8
huggingface-text2text-flan-t5-xxl-fp16
huggingface-text2text-flan-ul2-bf16
huggingface-text2text-pegasus-paraphrase
huggingface-text2text-qcpg-sentences
huggingface-text2text-t5-one-line-summary


In [8]:
model_id = text_generation_models[9]
model_version = '*'
#model_id = 'huggingface-textgeneration1-bloomz-7b1-fp16'
print(f'The model that will be deployed is: {model_id}')

The model that will be deployed is: huggingface-text2text-flan-t5-xxl


## Deploy Model

In [9]:
endpoint_name = name_from_base(f"LLM-Context-in-a-box-{model_id}")
print(f'Endpoint: {endpoint_name}')

Endpoint: LLM-Context-in-a-box-huggingface-text2t-2023-05-11-18-48-58-636


### Collect the containers required to deploy the model

In [10]:
instance_type = "ml.g5.12xlarge"

image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=instance_type,
)

model_data = model_uris.retrieve(
    model_id=model_id,
    model_version=model_version,
    model_scope="inference"
)

### Define Model

In [11]:
model = Model(
    image_uri=image_uri,
    model_data=model_data,
    role=role,
    predictor_cls=Predictor,
    name=endpoint_name,
    sagemaker_session=sagemaker_session,
    env={"TS_DEFAULT_WORKERS_PER_MODEL": "1"}
)

### Deploy

In [12]:
# deploy the Model. Note that we need to pass Predictor class when we deploy model through Model class,
# for being able to run inference through the sagemaker API.
model_predictor = model.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    predictor_cls=Predictor,
    endpoint_name=endpoint_name,
)

--------------!

#### Test that the model is correctly deployed

In [25]:
sagemaker = boto3.client('sagemaker-runtime', region_name=region)
input_question = 'Tell me the steps to make a pizza:'
payload = {
    "text_inputs": input_question,
    "max_length": 50,
    "max_time": 50,
    "num_return_sequences": 1,
    "top_k": 50,
    "top_p": 0.95,
    "do_sample": True,
}


response = sagemaker.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Body=json.dumps(payload).encode('utf-8')
)
output_answer = json.loads(response['Body'].read().decode('utf-8'))["generated_texts"][0]
print(output_answer)

Place your pizza dough on a flat baking pan. Top with sauce and cheese. Place your toppings on the pizza and cover your pizza with another layer of the pizza sauce and cheese. Top with mozzarella cheese and bake it in the oven.


### Export any variables needed for the application

In [14]:
application_metadata = {'endpoint_name':endpoint_name, 'region':region}
json.dump(application_metadata, open('application_metadata.json', 'w'))

# Streamlit UI for PDF upload and summarize

In [15]:
%%writefile app_textract.py
import os
import io
import ai21
import time
import json
import boto3
import pickle
import streamlit as st
st.set_page_config(layout="wide")

from PIL import Image
from io import BytesIO
from pdf2image import convert_from_bytes

application_metadata = json.load(open('application_metadata.json', 'r'))
endpoint_name = application_metadata['endpoint_name']
s3_client = boto3.client('s3')
textract_client = boto3.client('textract', region_name='us-east-1')
sagemaker = boto3.client('sagemaker-runtime', region_name=application_metadata['region'])

s3_bucket = 'sagemaker-studio-nh1d3ueatt'
filepath = 'textract/doc.pdf'

def display_image(pdf_bytes):
    images = convert_from_bytes(pdf_bytes)
    image_page_1 = images[0].convert('RGB')
    st.image(image_page_1)


def extract_text(s3_bucket, filepath):
    response = textract_client.start_document_text_detection(DocumentLocation={'S3Object': {'Bucket':s3_bucket, 'Name':filepath}})
    text = textract_client.get_document_text_detection(JobId=response['JobId'])
    i = 0
    while text['JobStatus'] != 'SUCCEEDED':
        time.sleep(5)
        i += 1
        text = textract_client.get_document_text_detection(JobId=response['JobId'])
        if i >= 10:
            text = ''
            break
    text = '\n'.join([t['Text'] for t in text['Blocks'] if t['BlockType']=='LINE'])
    return text


def summarize_text(text):
    response = ai21.Summarize.execute(
        source=text,
        sourceType="TEXT",
        sm_endpoint='summarize'
    )
    st.write(response.summary)


def main():
    st.title('Ask Questions of your Document')
    description = """Welcome to our PDF extraction and query answering application. With this app, you can upload a PDF document that is processed using Amazon Textract. 
    After the text is extracted, you can use the attached LLM to ask specific 
    queries and get a response in natural language."""
    st.write(description)
    text = ''
    col1, col2 = st.columns(2)
    
    # The Sidebar holds information and frequently asked questions
    with st.sidebar:
        st.write('## How to use:')
        st.write('First upload a PDF that you would like to analyze. After uploading, an image of the first page will be displayed and behind the scenes, Amazon Textract is used to extract any detected text. This context is passed to an LLM when making a specific query.')
        st.write('---')
        st.write('## Model Parameters:')
        max_len = st.slider('Max Length', min_value=50, max_value=250, value=150, step=10)
        top_p = st.slider('Top p', min_value=0., max_value=1., value=1., step=.01)
        st.write('---')
        st.write('## FAQ:')
        st.write(f'**1. Where is the model stored?**\n\nThere are two models in use that are run on SageMaker endpoints within your account:\n\nSummarization: `summarize` and\n\nQnA: `{endpoint_name}`')
        st.write(f'**2. Where is my data stored?**\n\nAny data you upload is stored into your S3 bucket: `{s3_bucket}`. Currently the queries you make to the endpoint are not stored, but you can enaable this by capturing data from your endpoint.')
        st.write('---')
    
    # First column handles loading the PDF and displaying the first image
    with col1:
        file = st.file_uploader('Upload a PDF file', type=['pdf'])
        if file is not None:
            pdf_bytes = file.read()
            with open('doc.pdf', 'wb') as fp:
                fp.write(pdf_bytes)
            with open('doc.pdf', 'rb') as fp:
                s3_client.upload_fileobj(fp, s3_bucket, filepath)
            display_image(pdf_bytes)
            time.sleep(2)
            text = text + extract_text(s3_bucket, filepath)
    
    # The second column allows users to query the document and communicates with teh SageMaker endpoint
    with col2:
        if file is not None:
            st.write('**Summary:**')
            st.write(summarize_text(text))
        input_question = st.text_input('**Please ask a question of a loaded document:**', '')
        if st.button('Send Question') and len(input_question) > 3:
            payload = {
                "text_inputs": text + '\n' + input_question,
                "max_length": max_len,
                "max_time": 60,
                "num_return_sequences": 1,
                "top_k": 50,
                "top_p": top_p,
                "do_sample": True,
            }
            if True:
                response = sagemaker.invoke_endpoint(
                    EndpointName=endpoint_name,
                    ContentType='application/json',
                    Body=json.dumps(payload).encode('utf-8')
                )
                output_answer = json.loads(response['Body'].read().decode('utf-8'))["generated_texts"][0]
            else:
                output_answer = 'I am not currently connected to an endpoint'
            st.text_area('Response:', output_answer)

if __name__ == '__main__':
    main()

Overwriting app_textract.py


# Start App

### Run Streamlit
To run the application:
1. Select File > New > Terminal
2. In the terminal, use the command: `pip install streamlit boto3`
3. In the terminal, use the command: `streamlit run app.py --server.runOnSave true`
4. If this is successful, you will be able to interact with the app by using the web address below

#### Display Link to Application

In [16]:
print(f'http://{domain_id}.studio.{region}.sagemaker.aws/jupyter/default/proxy/8501/')

http://d-qxdwe39zkab0.studio.us-east-1.sagemaker.aws/jupyter/default/proxy/8501/
