# Workspan ChatBot Demo - OpenAI , LangChain and Astra Vector Search

WorkSpan is an Ecosystem Management platform. It is used to efficiently manage co-sell opportunities with partners, scaling the co-sell pipeline, revenue, and wins.

The WorkSpan team wants to integrate GenAI features, such as chatbot, into their applications by leveraging Astra Vector Search and LLM Models.

#Imports

In [None]:
!pip install cassio ragstack-ai openai tiktoken gradio

In [2]:
import json
import os
import sys
from getpass import getpass

import cassio

from langchain.vectorstores import Cassandra
from langchain.schema import Document
from langchain.prompts import PromptTemplate
from langchain.document_loaders import TextLoader
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings

# Keys & Environment Variables

In [3]:
ASTRA_DB_ID = ""
ASTRA_DB_APPLICATION_TOKEN = ""
ASTRA_DB_KEYSPACE = "workspan"

In [None]:
cassio.init(
    token=ASTRA_DB_APPLICATION_TOKEN,
    database_id=ASTRA_DB_ID,
    keyspace=ASTRA_DB_KEYSPACE if ASTRA_DB_KEYSPACE else None,
)

#Set up OpenAI Objects and Vector Store

In [5]:
os.environ['OPENAI_API_KEY'] = ""
llm_openai = OpenAI(temperature=0)
openai_embeddings = OpenAIEmbeddings()

vector_store = Cassandra(
    embedding=openai_embeddings,
    table_name="customer_opportunities_openai",
    session=None,  # <-- meaning: use the global defaults from cassio.init()
    keyspace=None,  # <-- meaning: use the global defaults from cassio.init()
)


#Populate Vector Store

In [None]:
input_documents = []

# record #1

next_step = f"""
Action Items:
From Michael, confirmed deprioritize. From Anjaney, account executive interest to schedule meeting - Anjaney to schedule call with Nirav/Amy on R&D.
"""

cadence = f"""
Next Step:
08/16/2023 : Review partner information updates and update opportunity details. 8/17(LR) - connecting with Partner to offer co-sell support

Next Step History:
null;08/16/2023 : Review partner information updates and update opportunity details.;08/16/2023 : Review partner information updates and update opportunity details. 8/17(LR) - connecting with Partner to offer co-sell support
"""

metadata = {"customer_id": 'CUS100', "partner_id": 'AWS', "opportunity_id": 'WS-7202838a', "customer_name": 'Teradyne, Inc.' }
next_step_and_cadence = "{} : {} : {}".format(metadata, next_step, cadence)
input_document = Document(page_content=next_step_and_cadence, metadata=metadata)
input_documents.append(input_document)
print(input_document)


# record #2

next_step = f"""
Action Items:
From Autumn, send recording of last call and our discussed inputs from demo 8/28. Ramesh will provide to Caroline by early next week (of 9/11).
"""

cadence = f"""
REVIEW TECH & Economic Proposal
"""

metadata = {"customer_id": 'CUS100', "partner_id": 'AWS', "opportunity_id": 'WS-8a038b8a', "customer_name": 'Teradyne, Inc.' }
next_step_and_cadence = "{} : {} : {}".format(metadata, next_step, cadence)
input_document = Document(page_content=next_step_and_cadence, metadata=metadata)
input_documents.append(input_document)
print(input_document)


# record #3

next_step = f"""
Action Items:
Joint sync set for 9/7. Enablement session to follow + in person account mapping. Caroline / Michael to begin coordinating. EAI presence
"""

cadence = f"""
07/05/2023: Contact Federico Gandolfo,federico.hernan.gandolfo@abc.com,+54.911.3204.4871 to discuss Deal support
"""

next_step_and_cadence = next_step + cadence

metadata = {"customer_id": 'CUS100', "partner_id": 'AWS', "opportunity_id": 'WS-8a3b0348', "customer_name": 'Teradyne, Inc.' }
next_step_and_cadence = "{} : {} : {}".format(metadata, next_step, cadence)
input_document = Document(page_content=next_step_and_cadence, metadata=metadata)
input_documents.append(input_document)
print(input_document)


# record #4

next_step = f"""
Action Items:
From Caroline, user community engaged to respond to questions. @Dataiku - How can we get initial data from user community/pull together PoV for client? Action (Asan/Ken (sp?)): In-person outreach to Deloitte users and follow-up to 5 responses received.
"""

cadence = f"""
null;06/20/2023: Contact Federico Gandolfo,federico.hernan.gandolfo@abc.com,+54.911.3204.4871 to discuss Deal support;07/05/2023: Contact Federico Gandolfo,federico.hernan.gandolfo@abc.com,+54.911.3204.4871 to discuss Deal support
"""

metadata = {"customer_id": 'CUS100', "partner_id": 'AWS', "opportunity_id": 'WS-8a7128a3', "customer_name": 'Teradyne, Inc.' }
next_step_and_cadence = "{} : {} : {}".format(metadata, next_step, cadence)
input_document = Document(page_content=next_step_and_cadence, metadata=metadata)
input_documents.append(input_document)
print(input_document)


# record #5

next_step = f"""
Propsal did not go thru. No budget Left. Negative.
"""

cadence = f"""
No further follow up required.
"""

metadata = {"customer_id": 'CUS100', "partner_id": 'AWS', "opportunity_id": 'WS-8a7128a4', "customer_name": 'Teradyne, Inc.' }
next_step_and_cadence = "{} : {} : {}".format(metadata, next_step, cadence)
input_document = Document(page_content=next_step_and_cadence, metadata=metadata)
input_documents.append(input_document)
print(input_document)

print(f"Adding {len(input_documents)} documents ... ", end="")
vector_store.add_documents(documents=input_documents, batch_size=50)
print("Done.")

page_content="{'customer_id': 'CUS100', 'partner_id': 'AWS', 'opportunity_id': 'WS-7202838a', 'customer_name': 'Teradyne, Inc.'} : \nAction Items:\nFrom Michael, confirmed deprioritize. From Anjaney, account executive interest to schedule meeting - Anjaney to schedule call with Nirav/Amy on R&D.\n : \nNext Step:\n08/16/2023 : Review partner information updates and update opportunity details. 8/17(LR) - connecting with Partner to offer co-sell support\n\nNext Step History:\nnull;08/16/2023 : Review partner information updates and update opportunity details.;08/16/2023 : Review partner information updates and update opportunity details. 8/17(LR) - connecting with Partner to offer co-sell support\n" metadata={'customer_id': 'CUS100', 'partner_id': 'AWS', 'opportunity_id': 'WS-7202838a', 'customer_name': 'Teradyne, Inc.'}
page_content="{'customer_id': 'CUS100', 'partner_id': 'AWS', 'opportunity_id': 'WS-8a038b8a', 'customer_name': 'Teradyne, Inc.'} : \nAction Items:\nFrom Autumn, send reco

#Set up prompt template

In [6]:
prompt_template_str = """Human: Use the following pieces of context to provide a concise answer to the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

<context>
{context}
</context

Question: {question}

Assistant:"""

prompt = PromptTemplate.from_template(prompt_template_str)

#Answer Questions - OpenAI

In [7]:
from langchain.chains import ConversationalRetrievalChain

model_id = "gpt-4"

req_accept = "application/json"
req_content_type = "application/json"

# This, created from the vector store, will fetch the top relevant documents given a text query
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

def answer_question_openai(question: str, verbose: bool = False) -> str:
    if verbose:
        print(f"\n[answer_question] Question: {question}")
    # Retrieval of the most relevant stored documents from the vector store:
    context_docs = retriever.get_relevant_documents(question)
    context = "\n".join(doc.page_content for doc in context_docs)
    if verbose:
        print("\n[answer_question] Context:")
        print(context)
    # Filling the prompt template with the current values
    llm_prompt_str = prompt.format(
        question=question,
        context=context,
    )
    # Invocation of the Amazon Bedrock LLM for text completion -- ultimately obtaining the answer
    llm_body = json.dumps({"prompt": llm_prompt_str, "max_tokens_to_sample": 5000})

    chain = ConversationalRetrievalChain.from_llm(
        llm = llm_openai,
        retriever=retriever)

    result = chain({"question": question, "chat_history": []})

    return result['answer']


#Implement metdata filter

Metadata filter can be applied to limit the scope of queries to a specific customer

In [6]:
# create a vector store with cassIO
from cassio.table import MetadataVectorCassandraTable
v_table = MetadataVectorCassandraTable(table="customer_opportunities_openai", vector_dimension=1536)

In [62]:
from openai import OpenAI

client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # this is also the default, it can be omitted
)

embedding_model_name = "text-embedding-ada-002"

def search_with_customer_filter(question: str, verbose: bool = False) -> str:

    query_vector = client.embeddings.create(
        input=question,
        model=embedding_model_name,
    ).data[0].embedding

    metadata = {}
    metadata["customer_id"] = 'CUS100'

    # Retrieval of the most relevant stored documents from the vector store:
    results = v_table.ann_search(
        query_vector,
        n=5,
        metadata=metadata,
    )

    context_docs = [
                      (result["body_blob"], result["metadata"]["customer_id"])
                      for result in results
                   ]

    print(context_docs)

    return context_docs

#Test code snippet

In [10]:
#answer = search_with_customer_filter("What are the next steps?")
answer = answer_question_openai("What are the next steps?")
#answer = answer_question("What are the opportunities with identified wins?", verbose=True)
print("=" * 60)
print(answer)

 The next steps are to review partner information updates and update opportunity details on August 16th, and to contact Federico Gandolfo on July 5th to discuss deal support.


# Gradio ChatBot UI

In [None]:
import gradio as gr

def predict(message, history):
    response = answer_question_openai(message)
    return response

gr.ChatInterface(predict).launch()

#LangServ deployment

Due to RuntimeError: asyncio.run() cannot be called from a running event loop  , the following portion need to be run from localhost.

Follow these instructions to run it locally. [LangServe setup](https://github.com/langchain-ai/langserve-launch-example)

#chain.py

In [12]:
from langchain.chains import ConversationalRetrievalChain

model_id = "gpt-4"

req_accept = "application/json"
req_content_type = "application/json"

# This, created from the vector store, will fetch the top relevant documents given a text query
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

question = "What are the next steps?"

# Retrieval of the most relevant stored documents from the vector store:
context_docs = retriever.get_relevant_documents(question)
context = "\n".join(doc.page_content for doc in context_docs)

# Filling the prompt template with the current values
llm_prompt_str = prompt.format(
    question=question,
    context=context,
)
# Invocation of the Amazon Bedrock LLM for text completion -- ultimately obtaining the answer
llm_body = json.dumps({"prompt": llm_prompt_str, "max_tokens_to_sample": 5000})

chain = ConversationalRetrievalChain.from_llm(
    llm = llm_openai,
    retriever=retriever)


#server.py

In [None]:
!pip install langserve fastapi sse_starlette

In [None]:
#!/usr/bin/env python
"""A server for the chain above."""

from fastapi import FastAPI
from langserve import add_routes


app = FastAPI(title="Retrieval App")

add_routes(app, chain)

if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="localhost", port=8000)