### Deploy meta-textgeneration-llama-2-7b-f

In [None]:
from sagemaker.jumpstart.model import JumpStartModel

In [None]:
role = ""

In [None]:
model_id, model_version = "meta-textgeneration-llama-2-7b-f", "*"

In [None]:
model = JumpStartModel(
    model_id=model_id,
    model_version=model_version,
    role=role
)

In [None]:
predictor = model.deploy()

### Simple Question

In [None]:
def print_dialog(payload, response):
    dialog = payload["inputs"][0]
    for msg in dialog:
        print(f"{msg['role'].capitalize()}: {msg['content']}\n")
    print(f"> {response[0]['generation']['role'].capitalize()}: {response[0]['generation']['content']}")
    print("\n==================================\n")

In [None]:
%%time
payload = {
    "inputs": [[
        {"role": "user", "content": "what is the recipe of mayonnaise?"},
    ]],
    "parameters": {"max_new_tokens": 512, "top_p": 0.9, "temperature": 0.6}
}
response = predictor.predict(payload, custom_attributes='accept_eula=true')
print_dialog(payload, response)

### Chat UI with Gradio

In [None]:
!pip install gradio

In [None]:
import gradio as gr 

In [None]:
# hyperparameters for llm
parameters =  {"max_new_tokens": 256, "top_p": 0.9, "temperature": 0.6} 

In [None]:
with gr.Blocks() as demo:
    gr.Markdown("## Chat with Amazon SageMaker")
    with gr.Column():
        chatbot = gr.Chatbot()
        with gr.Row():
            with gr.Column():
                message = gr.Textbox(label="Chat Message Box", placeholder="Chat Message Box", show_label=False)
            with gr.Column():
                with gr.Row():
                    submit = gr.Button("Submit")
                    clear = gr.Button("Clear")

    def respond(message, chat_history):
        # convert chat history to prompt
        converted_chat_history = ""
        #
        prompt = [[{"role": "user", "content": message}]]
        # send request to endpoint
        llm_response = predictor.predict({"inputs": prompt, "parameters": parameters}, custom_attributes='accept_eula=true')
        # remove prompt from response
        parsed_response = llm_response[0]['generation']['content']
        # parsed_response = llm_response[0]["generated_text"][len(prompt):]
        chat_history.append((message, parsed_response))
        return "", chat_history

    submit.click(respond, [message, chatbot], [message, chatbot], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch(share=True)