In [None]:
!pip install weaviate-client langchain==0.0.245

In [None]:
# python libraries
import ast
import boto3
from datetime import datetime
import json
import os
import sagemaker
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
import weaviate

# lanagchain libraries
import langchain
from langchain import PromptTemplate, LLMChain
from langchain.llms.sagemaker_endpoint import  SagemakerEndpoint, LLMContentHandler
from langchain.retrievers.weaviate_hybrid_search import WeaviateHybridSearchRetriever

from langsmith import Client

ls_client = Client()
sm_client = boto3.client('sagemaker')

In [None]:
region = boto3.Session().region_name
sagemaker_session = sagemaker.Session()
role = sagemaker_session.get_caller_identity_arn()

<mark>Define the load balancer for the Weaviate instance</mark>

In [None]:
elb_endpoint = ''

In [None]:
wv_client = weaviate.Client(url=f"http://{elb_endpoint}")

<mark>Optional but recommended: provide your langsmith API key</mark>

In [None]:
langsmith_api_key = ''

In [None]:
today = datetime.now().strftime("%Y%m%d")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"English Query - {today}"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = langsmith_api_key 

<h1>Deploy SageMaker Endpoint</h1>

In [None]:
# Hub Model configuration. https://huggingface.co/models
hub = {
    'HF_MODEL_ID':'openchat/opencoderplus',
    'SM_NUM_GPUS': json.dumps(8)
}

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    image_uri=get_huggingface_llm_image_uri("huggingface",version="0.8.2"),
    env=hub,
    role=role, 
    transformers_version="4.30.1",
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type='ml.g5.48xlarge',
    container_startup_health_check_timeout=300,
    wait=True
)

llm_opencoder_endpoint_name = predictor.endpoint_name

<h1>Define Langchain LLM</h1>

In [None]:
# opencoder
parameters = {
    "do_sample": True,
    "top_p": 0.7,
    "temperature": 0.1,
    "top_k": 5,
    "return_full_text": False,
    "max_new_tokens": 500,
    "repetition_penalty": 1.03,
    "stop": ["<|end_of_turn|>"]
  }
  
class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs={}) -> bytes:

        input_str = json.dumps({"inputs": prompt, "parameters": parameters, **model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        result = response_json[0]["generated_text"]
        result = result.split('<|end_of_turn|>')[0]

        return result


content_handler = ContentHandler()


sm_opencoder_llm = SagemakerEndpoint(
    endpoint_name=llm_opencoder_endpoint_name,
    region_name=region,
    model_kwargs=parameters,
    content_handler=content_handler,
)

In [None]:
sm_opencoder_llm("What day comes after Tuesday?")

<h1>Define LLM Chain</h1>

In [None]:
template = """You are an assistant who is an expert at translating a question provided by the User into a Weaviate query that can be executed using the Python client.

Use the examples below as a guide.  Return ONLY the results of the Weaviate query and do NOT reformat the results. 

---
Example #1
User: {question_1}
Assistant: 
```python
{query_1}
```
---

---
Example #2
User: {question_2}
Assistant: 
```python
{query_2}
```
---

---
Example #3
User: {question_3}
Assistant: 
```python
{query_3}
```
---

Begin!

User: {user_question}
Assistant:
"""

prompt_template = PromptTemplate(
    input_variables=["user_question",'question_1','query_1','question_2','query_2','question_3','query_3'],
    template=template,
)

In [None]:
llm_chain = LLMChain(llm=sm_opencoder_llm, prompt=prompt_template, verbose=False)

In [None]:
# define a retriever for the question bank
wv_hybrid_retriever = WeaviateHybridSearchRetriever(client=wv_client, index_name="Query", text_key="query", attributes=['question'], k=3)

In [None]:
wv_hybrid_retriever.get_relevant_documents(query="Search for devices that do not have a stylus")

In [None]:
# define a search function
def query_weaviate(question: str) -> str:
    '''
    Converts an English request into a Weaviate query and returns the
    results of the query to the user.  The input should always be an
    English sentence and the output always returns the result of the
    Weaviate query in JSON format.
    '''
    # collect similar queries
    question = question.replace('"','')
    results = wv_hybrid_retriever.get_relevant_documents(query=question)


    question_1 = results[0].metadata['question']
    query_1 = results[0].page_content

    question_2 = results[1].metadata['question']
    query_2 = results[1].page_content

    question_3 = results[2].metadata['question']
    query_3 = results[2].page_content

    # run the chain
    result = llm_chain.run(user_question=question
                          ,question_1=question_1
                          ,query_1=query_1
                          ,question_2=question_2
                          ,query_2=query_2
                          ,question_3=question_3
                          ,query_3=query_3
                          )

    query = result.split('```python')[1].split('```')[0]

    loc = {}
    exec(query, globals(), loc)
    response = loc['answer']

    return response, query

In [None]:
#question = """Search for devices that discuss durability with a limit of 1.  Return the "model_names" and "key_features" fields."""

#question = """Search for devices that have a stylus with a limit of 2.  Return the "model_names", "key_features", and "stylus" fields"""
#question = """Search for devices that do not have a stylus with a limit of 1.  Return the "model_names", and "document_summary", and "stylus" fields"""
#question = """Search for devices that are durable with a limit of 2.  Return the "model_names", "key_features", and "stylus" fields"""
#question = """Search for devices that have a front facing camera with a limit of 2.  Return the "model_names", "key_features", and "stylus" fields"""

#question = "how many manuals are there with a stylus?"
#question = "how many manuals are there without a stylus?"
#question = "how many manuals are there?"
#question = "what is the total number of manuals that have a stylus?"
#question = "what is the total number of manuals that do not have a stylus?"


#question = """Search for devices that have fast charging with a limit of 3.  Return the "model_names" and "key_features" fields."""
#question = """Search for devices that have fast charging with a limit of 3.  Return the "document_summary" field."""
#question = """Search for devices that have fast charging with a limit of 2.  Return the "model_names", "key_features" and "document_summary" fields."""
#question = """Search for devices that are durable with a limit of 1.  Return the "model_names" and "key_features" fields."""
#question = """Search for devices that have a stylus with a limit of 2.  Return the "model_names", "key_features", and "stylus" fields"""

question = 'Search for devices that are fast charging with a limit of 1.  Return the "model_names", "key_features", and "document_summary" fields'
#question = 'Search for devices that are fast charging with a limit of 2.  Return "model_names" and "key_features"'

#question = """Search for devices that have a stylus with a limit of 1.  Return the "model_names", and "document_summary", and "stylus" fields"""

In [None]:
# IMPORTANT! keep in mind there are only 5 manuals uploaded so our result set will be limited
langchain.debug=False # toggle for detailed logs
response, query = query_weaviate(question)

In [None]:
response

In [None]:
print(query)

<h2>Cleanup</h2>

In [None]:
sm_client.delete_endpoint(
    EndpointName=llm_opencoder_endpoint_name
)