In [None]:
%pip install -q -e .

In [None]:
%load_ext autoreload
%autoreload 2

## LISA-SDK
In this notebook, we provide a tutorial of how to use the LISA SDK. LISA is an enabling service to easily deploy generative AI applications in AWS customer environments. LISA is an open-source infrastructure-as-code offering that is accessible via an API or simple user interface and provides scalable access to generative large language models and embedding language models. In order for the SDK to work properly you will need access to a deployed version of LISA and the REST API url for LISA-Serve.

In [None]:
from lisapy import LisaLlm
from lisapy.authentication import get_cognito_token
url = "LISA-API-URL"
username = "your cognito username"
client_id = "Cognito client ID"
token = get_cognito_token(client_id=client_id, username=username)[
    "AuthenticationResult"
]["IdToken"]


### Connect to LISA

In [None]:
lisa = LisaLlm(
    url=url,
    verify=False,  # note only for dev deployments with self-signed certificates
    timeout=60,
    headers={"Authorization": f"Bearer {token}"},
)

We can get started by listing the actions that the API can perform.

In [None]:
response = lisa.list_models()
display(response)

If we want to know what sorts of parameters are required for each of the action we can use the `describe_api` call.

In [None]:
lisa.describe_model(model_name=response[0].model_name, provider=response[0].provider)

Different deployments of LISA may support different models. Let's see what models we have to work with.

In [None]:
textgen_models = lisa.list_textgen_models()
display(textgen_models)

embedding_models = lisa.list_embedding_models()
display(embedding_models)

### Generation
Now let's ask Lisa a question!

In [None]:
model = lisa.describe_model(provider="ecs.textgen.tgi", model_name="mistral-7b-instruct")
response = lisa.generate("What is Deep Learning?",
                         model=model,
                         )
print(response)

Now let's customize the model kwargs

In [None]:
model.model_kwargs.max_new_tokens = 10
model.model_kwargs.streaming = False
response = lisa.generate(
    "What is Deep Learning?",
    model=model,
)
print(response)

### Streaming
Now let's try streaming!

In [None]:
import sys
model.model_kwargs.max_new_tokens = 512
model.model_kwargs.streaming = True
model.streaming = True
for resp in lisa.generate_stream(prompt='\n\nUser:What is Deep Learning\n\nAssistant:', model=model):
    sys.stdout.write(resp.token)
    sys.stdout.flush()

### Embedding
LISA also serves embedding endpoints. Let's take those for a test drive.

In [None]:
import numpy as np
model = lisa.describe_model(provider="ecs.embedding.tei", model_name="bge-large-en-v1.5")
messages = ["Deep learning is awesome", "Deep learning is vaporware", "Baseball is fun"]
embeddings = lisa.embed(messages, model=model)
print(f"""
The similarity between:

      {messages[0]}

  and

      {messages[1]}

is {np.dot(embeddings[0], embeddings[1])}

The similarity between

      {messages[0]}

  and

      {messages[2]}

is {np.dot(embeddings[0], embeddings[2])}
      """)


## End to end LangChain example based on [this example](https://python.langchain.com/docs/expression_language/cookbook/retrieval)

In [None]:
from langchain_community.vectorstores import FAISS # may require pip install faiss-gpu
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

# from langchain_community.vectorstores import OpenSearchVectorSearch

from lisapy.langchain import LisaTextgen
from lisapy.langchain import LisaEmbeddings
from lisapy.authentication import get_cognito_token

url = "LISA-API-URL"
username = "your cognito username"
client_id = "Cognito client ID"
token = get_cognito_token(client_id=client_id, username=username)[
    "AuthenticationResult"
]["IdToken"]

lisa = LisaLlm(
    url=url, verify=False, timeout=60, headers={"Authorization": f"Bearer {token}"}
)

embedding = LisaEmbeddings(
    provider="ecs.embedding.tei", model_name="bge-large-en-v1.5", client=lisa
)
llm = LisaTextgen(
    model_name="mistral-7b-instruct",
    provider="ecs.textgen.tgi",
    client=lisa,
)

vectorstore = FAISS.from_texts(["harrison worked at kensho"], embedding=embedding)

# could also leverage opensearch for vector store
# vector_search = OpenSearchVectorSearch(
#     <OpenSearch URI>,
#     "embeddings",
#     embedding
# )
retriever = vectorstore.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)
chain.invoke("where did harrison work?")