In [11]:
from langchain.schema import HumanMessage, SystemMessage
from langchain.chains import ConversationChain
from langchain_community.vectorstores import Qdrant
from langchain.memory import ConversationSummaryMemory
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic

import qdrant_client
import os

In [3]:
anonymizer = PresidioReversibleAnonymizer(
    add_default_faker_operators=False,
    analyzed_fields=["LOCATION","PHONE_NUMBER","US_SSN", "IBAN_CODE", "CREDIT_CARD", "CRYPTO", "IP_ADDRESS",
                    "MEDICAL_LICENSE", "URL", "US_BANK_NUMBER", "US_DRIVER_LICENSE", "US_ITIN", "US_PASSPORT"]
)

def anonymize(input_text):
    if input_text:
        return anonymizer.anonymize(document_content)

In [None]:
def get_rag_llm(model_name="openai"):
    if model_name.lower()=="anthropic":
        return ChatAnthropic(model='claude-2.1')
    else:
        return ChatOpenAI(model='gpt-3.5-turbo-0613')

In [9]:
qdrant_key = os.environ.get("QDRANT_KEY")
qdrant_url = "https://59f8f159-fb60-44e8-bfc4-9f35c77ca8d4.us-east4-0.gcp.cloud.qdrant.io:6333"

# Load the embedding model
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
embedding_model_name = "BAAI/bge-small-en"
os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/mnt/code/model_cache/'
embeddings = HuggingFaceBgeEmbeddings(model_name=embedding_model_name,
                                      model_kwargs=model_kwargs,
                                      encode_kwargs=encode_kwargs
                                     )

client = qdrant_client.QdrantClient(
    qdrant_url,
    api_key=qdrant_key,
)

doc_store = Qdrant(
    client=client, collection_name="rakuten", 
    embeddings=embeddings,
)

In [12]:
rag_llm = ChatOpenAI(temperature=0, 
                     model='gpt-3.5-turbo-0613')

conversation = ConversationChain(
        llm=rag_llm,
        memory=ConversationSummaryMemory(llm=rag_llm),
        verbose=False
    )

messages = [
    {"role": "assistant", "content": "How can I help you today?"}
]

In [13]:
# Get relevant docs through vector DB

SIMILARITY_THRESHOLD = 0.83

# Create prompt
template = """ You are a virtual assistant for Rakuten and your task is to answer questions related to Rakuten which includes general information about Rakuten.

                Respond in the style of a polite helpful assistant and do not allude that you have looked up the context.

                Do not hallucinate. If you don't find an answer, you can point user to the official website here: https://www.rakuten.com/help . 

                In your response, include the following url links at the end of your response {url_links} and any other relevant URL links that you refered.

                Also, at the end of your response, ask if your response was helpful". 

                Here is some relevant context: {context}"""


def get_relevant_docs(user_input):
 
    relevant_docs = doc_store.similarity_search_with_score(user_input)
    urls = [result[0].metadata['source'] for result in relevant_docs if result[1] > SIMILARITY_THRESHOLD]
    contexts = [result[0].page_content for result in relevant_docs if result[1] > SIMILARITY_THRESHOLD]
 
    return urls, contexts
 
 
def build_system_prompt(user_input):
 
    urls, contexts = get_relevant_docs(user_input)
    
    prompt_template = PromptTemplate(
        input_variables=["url_links", "context"],
        template=template
    )
    system_prompt = prompt_template.format( url_links=urls, context=contexts)
 
    return system_prompt
 
# Query the Open AI Model
def queryAIModel(user_input):
 
    system_prompt = build_system_prompt(user_input)            
    messages = [
        SystemMessage(
            content=system_prompt
        ),
        HumanMessage(
            content=user_input
        ),
    ]
    
    output = conversation.predict(input=messages)
 
    return output

In [None]:
# Ask a question
user_question = input("Please provide your question here :")
messages.append({"role": "user", "content": user_question})
result = queryAIModel(anonymize(user_question))
result

In [None]:
# Looks good lets push the prompt to a prompt hub
# set LANGCHAIN_HUB_API_KEY in an env variable
hub.push("subirmansukhani/rakuten-qa-rag", ChatPromptTemplate.from_template(template), new_repo_is_public=False)

In [None]:
# Lets take a look at the prompt hub

from IPython.display import Javascript, display

# Define the URL you want to open
url = 'https://smith.langchain.com/hub/my-prompts?organizationId=6ac11f6f-c332-4bac-b45b-28a8a96410b4'

# JavaScript code to open a new tab with the specified URL and display it in the cell's output area
js_code = f'''
var newWindow = window.open("{url}");
element.append(newWindow.document.body);
'''

# Display the JavaScript output in the cell's output area
display(Javascript(js_code))