# LLMs in a Box
1. Create a SageMaker Studio Domain if you don't have one
2. Open SageMaker Studio under the user you plan to launch this applicatio
3. Either upload this notebook, or clone the repository: [repo](https://github.com/chaeAclark/literate-eureka.git)
4. Open the notebook `LLM in a box.ipynb`
5. You can run the entire notebook by clicking Run > Run All Cells
6. Alternatively, you can run the cells individually
7. NOTE: To display the image of an uploaded document, you must have poppler utils installed
    1. `sudo yum install poppler-utils`

### Terminal Installation
You need to ensure you have installed all needed packages in the terminal you are using.
1. boto3
2. streamlit
3. pdf2image
4. ai21[SM]
5. Pillow
6. pandas

In [None]:
%%writefile requirements.txt
boto3
streamlit
pdf2image
ai21[SM]
Pillow
pandas

# Imports

#### Update SageMaker

In [2]:
!pip install -U sagemaker --quiet

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


# Imports

### General Libraries

In [None]:
import os
import json
import boto3

### SageMaker Libraries

In [7]:
import os
import json
import boto3
import pickle
import sagemaker as sm

from sagemaker import image_uris
from sagemaker import model_uris
from sagemaker import script_uris
from sagemaker.model import Model
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base

from sagemaker.jumpstart.notebook_utils import list_jumpstart_models

### Deploy and Directory Setup

In [8]:
def get_sagemaker_session(local_download_dir) -> sm.Session:
    """
    # Create a SageMaker Session
    # This function is used to create a SageMaker Session object.
    # The SageMaker Session object is used to create a SageMaker Endpoint,
    # SageMaker Model, and SageMaker Endpoint Config.
    """
    sagemaker_client = boto3.client(service_name="sagemaker", region_name=boto3.Session().region_name)
    session_settings = sm.session_settings.SessionSettings(local_download_dir=local_download_dir)
    session = sm.session.Session(sagemaker_client=sagemaker_client, settings=session_settings)
    return session

In [9]:
model_path = './download_dir'
if not os.path.exists(model_path):
    os.mkdir(model_path)

### SageMaker Configuration Parameters

In [10]:
role               = sm.get_execution_role()
sagemaker_session  = get_sagemaker_session(model_path) # sm.session.Session()
region             = sagemaker_session._region_name

# These are needed to show where the streamlit app is hosted
sagemaker_metadata = json.load(open('/opt/ml/metadata/resource-metadata.json', 'r'))
domain_id          = sagemaker_metadata['DomainId']
resource_name      = sagemaker_metadata['ResourceName']

### Boto Configurations

In [11]:
bucket     = 'sagemaker-studio-nh1d3ueatt'
prefix     = 'textract'

# Model
The following section will deploy the JumpStart model `flan-###`. There are additional steps required if launching 3rd-party proprietary models. These steps are detailed in another section.

### Select Model

In [7]:
filter_value = "task == text2text"
text_generation_models = list_jumpstart_models(filter=filter_value)
print('Available text2text Models:\n--------------------------------')
_ = [print(m) for m in text_generation_models]

Available text2text Models:
--------------------------------
huggingface-text2text-bart4csc-base-chinese
huggingface-text2text-bigscience-t0pp
huggingface-text2text-bigscience-t0pp-bnb-int8
huggingface-text2text-bigscience-t0pp-fp16
huggingface-text2text-flan-t5-base
huggingface-text2text-flan-t5-base-samsum
huggingface-text2text-flan-t5-large
huggingface-text2text-flan-t5-small
huggingface-text2text-flan-t5-xl
huggingface-text2text-flan-t5-xxl
huggingface-text2text-flan-t5-xxl-bnb-int8
huggingface-text2text-flan-t5-xxl-fp16
huggingface-text2text-flan-ul2-bf16
huggingface-text2text-pegasus-paraphrase
huggingface-text2text-qcpg-sentences
huggingface-text2text-t5-one-line-summary


In [8]:
model_id = text_generation_models[7]
model_version = '*'
print(f'The model that will be deployed is: {model_id}')

The model that will be deployed is: huggingface-text2text-flan-t5-xxl


### Deploy

In [9]:
endpoint_name = name_from_base(f"LLM-Context-in-a-box-{model_id}")
print(f'Endpoint: {endpoint_name}')

Endpoint: LLM-Context-in-a-box-huggingface-text2t-2023-05-11-18-48-58-636


#### Collect Model Containers

In [10]:
instance_type = "ml.g5.2xlarge"

image_uri = image_uris.retrieve(
    region=None,
    framework=None,
    image_scope="inference",
    model_id=model_id,
    model_version=model_version,
    instance_type=instance_type,
)

model_data = model_uris.retrieve(
    model_id=model_id,
    model_version=model_version,
    model_scope="inference"
)

print(f'The image URI is:  {image_uri}')
print(f'The model data is: {model_data}')

#### Define Model

In [11]:
model = Model(
    image_uri=image_uri,
    model_data=model_data,
    role=role,
    predictor_cls=Predictor,
    name=endpoint_name,
    sagemaker_session=sagemaker_session,
    env={"TS_DEFAULT_WORKERS_PER_MODEL": "1"}
)

#### Deploy Model

In [12]:
model_predictor = model.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    predictor_cls=Predictor,
    endpoint_name=endpoint_name,
)

--------------!

#### Test that the model is correctly deployed

In [25]:
sagemaker = boto3.client('sagemaker-runtime', region_name=region)
input_question = 'Tell me the steps to make a pizza:'
payload = {
    "text_inputs": input_question,
    "max_length": 50,
    "max_time": 50,
    "num_return_sequences": 1,
    "top_k": 50,
    "top_p": 0.95,
    "do_sample": True,
}


response = sagemaker.invoke_endpoint(
    EndpointName=endpoint_name,
    ContentType="application/json",
    Body=json.dumps(payload).encode('utf-8')
)
output_answer = json.loads(response['Body'].read().decode('utf-8'))["generated_texts"][0]
print(output_answer)

Place your pizza dough on a flat baking pan. Top with sauce and cheese. Place your toppings on the pizza and cover your pizza with another layer of the pizza sauce and cheese. Top with mozzarella cheese and bake it in the oven.


## How to deploy 3rd-party Foundation Models
1. Gain access to the foundation models
    1. Go to the SageMaker Console
    2. There will be a tab for JumpStart > Foundation Models
    3. You must request access if you do not already have it
2. Select the Foundation you would like to deploy
3. Click `Subscribe` in the top-right corner
4. After completing, this will allow you to open a notebook that lets you deploy the model
5. Open the notebook
6. You run this notebook to deploy the model, the caveat is that you must have access to any instance you choose to run.
    1. For AI21 Summarization model, you can use something like: ml.g4dn.12xlarge
    2. For AI21 Grande Instruct, you can use: ml.g5.24xlarge
    3. For AI21 Jumbo Instruct, you can use: ml.g5.48xlarge
    4. These were tested to work as of 2023-05-16
    5. Collect these endpoint names and use them in the application_metadata JSON

# Streamlit UI

### Record any parameters that need to be passed to the Streamlit app
App Metadata Structure:
#### application_metadata
 - models: a dictionary that contains the model display name, SageMaker endpoint name, and the model type (Currently 'sm' or 'ai21')
   - name
   - endpoint
   - type
 - summary_model: the summary model endpoint name
 - region: the region (us-east-1 etc)
 - role: the permissions for the application. it should include (SageMaker, Textract, and Kendra access)
 - datastore: a dictionary that contains the bucket and folder prefix used to store document data
   - bucket
   - prefix
 - kendra: a dictionary that contains information on the Kendra index to be used when searching
   - index_id
   - index_name
   - index_description

In [17]:
application_metadata = {
    'models':[
        {'name':'FLAN-Small', 'endpoint':'LLM-in-a-box-huggingface-text2text-flan-2023-05-18-19-23-11-479', 'type':'sm'},
        {'name':'Super Fancy Model', 'endpoint':'', 'type':'ai21'}],
    'summary_model':'sumsum',
    'region':region,
    'role':role,
    'datastore':
        {'bucket':bucket, 'prefix':prefix},
}
json.dump(application_metadata, open('application_metadata_doc.json', 'w'))

### Write the Streamlit app

In [3]:
%%writefile app_doc.py
import os
import time
import ai21
import json
import boto3
import pandas as pd
import streamlit as st

from PIL import Image
from io import BytesIO
from collections import deque
from datetime import datetime
from pdf2image import convert_from_bytes
st.set_page_config(layout="wide")

APP_MD    = json.load(open('application_metadata_doc.json', 'r'))
MODELS    = {d['name']: d['endpoint'] for d in APP_MD['models']}
MODEL_SUM = APP_MD['summary_model']
REGION    = APP_MD['region']
BUCKET    = APP_MD['datastore']['bucket']
PREFIX    = APP_MD['datastore']['prefix']

S3            = boto3.client('s3', region_name=REGION)
TEXTRACT      = boto3.client('textract', region_name=REGION)
SAGEMAKER     = boto3.client('sagemaker-runtime', region_name=REGION)
CHAT_FILENAME = 'chat.csv'


def query_endpoint(endpoint_name, payload):
    if 'huggingface' in endpoint_name:
        response = SAGEMAKER.invoke_endpoint(
            EndpointName=endpoint_name,
            ContentType='application/json',
            Body=json.dumps(payload).encode('utf-8')
        )
        output_answer = json.loads(response['Body'].read().decode('utf-8'))["generated_texts"][0]
    else:
        response = ai21.Completion.execute(
            sm_endpoint=endpoint_name,
            prompt=payload['text_inputs'],
            maxTokens=payload['max_length'],
            temperature=payload['temperature'],
            stopSequences=['##'],
            numResults=1
        )
        output_answer = response['completions'][0]['data']['text']
    return str(output_answer)


def extract_text(bucket, filepath):
    response = TEXTRACT.start_document_text_detection(DocumentLocation={'S3Object': {'Bucket':bucket, 'Name':filepath}})
    text = TEXTRACT.get_document_text_detection(JobId=response['JobId'])
    i = 0
    while text['JobStatus'] != 'SUCCEEDED':
        time.sleep(5)
        i += 1
        text = TEXTRACT.get_document_text_detection(JobId=response['JobId'])
        if i >= 10:
            text = ''
            break
    text = '\n'.join([t['Text'] for t in text['Blocks'] if t['BlockType']=='LINE'])
    return text


def load_document(file_bytes):
    try:
        images = convert_from_bytes(file_bytes)
        image_page_1 = images[0].convert('RGB')
        st.image(image_page_1)
    except:
        st.write('Cannot display image. Ensure that you have poppler-utils installed.')
    
    with open('doc.pdf', 'wb') as fp:
        fp.write(file_bytes)
    with open('doc.pdf', 'rb') as fp:
        S3.upload_fileobj(fp, BUCKET, PREFIX+'/doc.pdf')
    time.sleep(2)
    text = extract_text(BUCKET, PREFIX+'/doc.pdf')
    return text


def summarize_context(context):
    try:
        response = ai21.Summarize.execute(
            source=context,
            sourceType="TEXT",
            sm_endpoint=MODEL_SUM
        )
        return response.summary
    except:
        return 'No summarization endpoint connected'


def action_doc(params):
    st.title('Ask Questions of your Document')
    col1, col2 = st.columns(2)
    with col1:
        file = st.file_uploader('Upload a PDF file', type=['pdf'])
        if file is not None:
            context = load_document(file.read())
    with col2:
        if file is not None:
            st.write('**Summary:**')
            st.write(summarize_context(context))
        input_question = st.text_input('**Please ask a question of a loaded document:**', '')
        if st.button('Send Question') and len(input_question) > 3:
            payload = {
                "text_inputs": context + '##\n' + input_question,
                "max_length": params['max_len'],
                "max_time": 50,
                "num_return_sequences": 1,
                "top_k": 50,
                "temperature":params['temp'],
                "top_p": params['top_p'],
                "do_sample": True,
            }
            output_answer = query_endpoint(params['endpoint'], payload)
            st.text_area('Response:', output_answer)


def app_sidebar():
    with st.sidebar:
        st.write('## How to use:')
        description = """Welcome to our LLM tool extraction and query answering application. With this app, you can aske general question, 
        ask questions of a specific document, or intelligently search an internal document corpus. By selection the action you would like to perform,
         you can ask general questions, or questions of your document. Additionally, you can select the model you use, to perform real-world tests to determine model strengths and weakneses."""
        st.write(description)
        st.write('---')
        st.write('### User Preference')
        action_name = st.selectbox('Choose Activity', options=['Document Query'])
        model_name = st.selectbox('Select Model', options=MODELS.keys())
        max_len = st.slider('Max Length', min_value=50, max_value=500, value=150, step=10)
        top_p = st.slider('Top p', min_value=0., max_value=1., value=1., step=.01)
        temp = st.slider('Temperature', min_value=0.01, max_value=1., value=1., step=.01)
        st.write('---')
        st.write('## FAQ')
        st.write(f'**1. Where is the model stored?** \n\nThe current model is: `{model_name}` and is running within your account.')
        st.write(f'**2. Where is my data stored?**\n\nAny data you upload is stored into your S3 bucket: `{BUCKET}/{PREFIX}/`. Currently the queries you make to the endpoint are not stored, but you can enable this by capturing data from your endpoint.')
        st.write('---')
        params = {'action_name':action_name,'endpoint':MODELS[model_name], 'max_len':max_len, 'top_p':top_p, 'temp':temp, 'model_name':model_name}
        return params


def main():
    params = app_sidebar()
    endpoint=params['endpoint']
    if params['action_name'] == 'Document Query':
        action_doc(params)
    else:
        raise ValueError('Invalid action name.')


if __name__ == '__main__':
    main()


Overwriting app_doc.py


# Start App

### Run Streamlit
To run the application:
1. Select File > New > Terminal
2. In the terminal, use the command: `streamlit run app_doc.py --server.runOnSave true`
   1. Note: ensure you have installed all required packages
3. If this is successful, you will be able to interact with the app by using the web address below
4. An important thing to note is that when you run the above command, you should see an output similar to below.
5. The port thats  displayed is the same port that MUST be used after the `proxy` folder below.
`
You can now view your Streamlit app in your browser.

  Network URL: http://###.###.###.###:8501\
  External URL: http://###.###.###.###:8501



#### Display Link to Application

In [16]:
print(f'http://{domain_id}.studio.{region}.sagemaker.aws/jupyter/default/proxy/8501/')

http://d-qxdwe39zkab0.studio.us-east-1.sagemaker.aws/jupyter/default/proxy/8501/
