# Basic Prompt Engineering

## Step 1. Prepare Large Language Model (LLM) and Embedding Model 
---

In [None]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append('../utils')
sys.path.append('../templates') 

In [None]:
import time
import sagemaker, boto3, json
import glob
import os
import pandas as pd
import requests
import json
from sagemaker.session import Session
from sagemaker.model import Model
from sagemaker import image_uris, model_uris, script_uris, hyperparameters
from sagemaker.predictor import Predictor
from sagemaker.utils import name_from_base
from typing import Any, Dict, List, Optional
from ssm import parameter_store
from termcolor import colored
from common import get_apigateway_url

sagemaker_session = Session()
aws_role = sagemaker_session.get_caller_identity_arn()
aws_region = boto3.Session().region_name

RESTAPI_ID, URL = get_apigateway_url()
print("RESTAPI_ID = ", RESTAPI_ID)
print("API GATEWAY URL = ", URL)

In [None]:
MODEL_NAME = "FALCON-40B" 
#MODEL_NAME = "LLAMA2-7B" 

LLM_INFO = {
    "LLAMA2-7B": f"{URL}llm/llama2_7b", # g5.12xlarge * 4ea
    "FALCON-40B": f"{URL}llm/falcon_40b",    # g5.48xlarge * 8ea 
    "KULLM-12-8B": f"{URL}llm/kkulm_12_8b", # g5.24xlarge * 4ea
}

LLM_URL = LLM_INFO[MODEL_NAME]
EMB_URL = f"{URL}/emb/gptj_6b"             # g5.4xlarge * 4ea 

HEADERS = {    
    'Content-Type': 'application/json',
    'Accept': 'application/json',
}

if 'falcon_40b' in LLM_URL:
    LLM_RESPONSE_KEY = "generated_text"
else:
    LLM_RESPONSE_KEY = "generation"
    
print (f'MODEL_NAME: {MODEL_NAME}\nLLM_URL: {LLM_URL}')    

In [None]:
PARAMS = {
    "LLAMA2-7B": {
        'max_new_tokens': 128,
        'top_p': 0.9,
        'temperature': 0.1,
        'return_full_text': False
    },    
    "FALCON-40B": {
        "max_new_tokens": 128,
        "max_length": 256,
        "top_p": 0.95,
        "do_sample": True,
        "temperature": 0.2,
        "return_full_text": False,
        "include_prompt_in_result": False
    } 
}

<br>

## Step 2. Ask a question to LLM without RAG
---

### Simple prompt engineering

In [None]:
from lib_en import Llama2ContentHandlerAmazonAPIGateway, FalconContentHandlerAmazonAPIGateway
from langchain.llms import AmazonAPIGateway

llm = AmazonAPIGateway(api_url=LLM_URL, headers=HEADERS)
if MODEL_NAME == "FALCON-40B": llm.content_handler = FalconContentHandlerAmazonAPIGateway()
elif MODEL_NAME in ["LLAMA2-7B", "LLAMA2-13B"]: llm.content_handler = Llama2ContentHandlerAmazonAPIGateway()
params = PARAMS[MODEL_NAME]
llm.model_kwargs = params

In [None]:
%%time

payload = {
    "inputs": "Generative AI is",
    "parameters": params
}
response = requests.post(url=LLM_URL, headers=HEADERS, json=payload)
print(response.json()[0][LLM_RESPONSE_KEY])

In [None]:
%%time

payload = {
    "inputs": """A brief email message of Amazon SageMaker's main features

Hi everyone,

We are announcing""",
    "parameters": {"max_new_tokens": 64, "top_p": 0.9, "temperature": 0.6, "return_full_text": False}
}
response = requests.post(url=LLM_URL, headers=HEADERS, json=payload)
print(response.json()[0][LLM_RESPONSE_KEY])

### More complex prompts: Play the role of AWS SA


In [None]:
architect_prompt_template = """
Play the role of a solution architect experienced with AWS. You are analysing customer requirements to create
well-architected solution architectures that you present to the customer. You are detailled, kind and
focussed. Given the following context

Context:
#System Requirements:
{requirements}
#Scale:
{scale}
#Features:
{features}

Describe an architecture on AWS in technical detail with sentences.
"""
prompt = architect_prompt_template.format(
    requirements="A website for computer advertising", 
    scale="Must handle 10k requests per second in peak. Must be globally available. Must be reponsive and fast", 
    features="Landing page describing our product. About page describing the company."
)

In [None]:
payload = {
    'inputs': prompt,
    'parameters': params
}
print(colored(prompt, 'green'))
response = requests.post(url=LLM_URL, headers=HEADERS, json=payload)
print(response.json()[0][LLM_RESPONSE_KEY])

### Applying LangChain

In [None]:
llm.model_kwargs = params
print(llm(prompt))

In [None]:
from langchain.prompts import PromptTemplate

# First we can define an exposed parameter interface to the format string
prompt = PromptTemplate(
    input_variables=["requirements", "scale", "features"],
    template=architect_prompt_template,
)

final_prompt = prompt.format(
    requirements="External facing web application written in Javascript, global deployment",
    scale="Average of 500 requests per minute, scale events up to 3000 requests per second",
    features="Mobile website, desktop version, javascript"
)

In [None]:
print(colored(final_prompt, 'green'))
print(llm(final_prompt))

In [None]:
topic_recommender_prompt = "List {number} topics to write on blog posts about {topic}"

recommend_topic_prompt = PromptTemplate(
    input_variables=['topic', 'number'],
    template=topic_recommender_prompt    
)

final_prompt = recommend_topic_prompt.format(topic="Machine Learning", number=5)
print(colored(final_prompt, 'green'))
print(llm(final_prompt))

In [None]:
from langchain.output_parsers import CommaSeparatedListOutputParser
parsed_recommender_prompt = topic_recommender_prompt + "\n{format_instructions}"

parser = CommaSeparatedListOutputParser()

parsed_recommender_template = PromptTemplate(
    template=parsed_recommender_prompt,
    input_variables=['topic', 'number'],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [None]:
gen_prompt = parsed_recommender_template.format(topic='Generative AI', number=5)

In [None]:
print(colored(gen_prompt, 'green'))
output = llm(gen_prompt)
print(output)