In [None]:
! pip install qdrant-client

In [146]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Qdrant
from langchain.schema import HumanMessage, SystemMessage
from langchain.chains import ConversationChain
from langchain.memory import ConversationSummaryMemory

In [None]:
loader = CSVLoader(file_path="clean_help.csv", source_column="url")

data = loader.load()

In [76]:
article_texts = []
chunk_size = 5000
chunk_overlap = 0
strip_whitespace = True

article_text = loader.load_and_split(
        RecursiveCharacterTextSplitter(
            chunk_size=chunk_size, 
            chunk_overlap=chunk_overlap, 
            strip_whitespace=strip_whitespace
        )
    )
    
# refine texts
for chunk in article_text:
    # index of documentation path 
    path_idx = chunk.page_content.find("\n")
    chunk.page_content = chunk.page_content[path_idx + 1:]
        
article_texts.append(article_text)

In [77]:
# Print sample to test quality
article_texts[0][20].page_content

'text: How Do We Make Money? Our business is based on a simple idea: When you follow our links to visit a store, that store pays us a commission on whatever you buy during your visit. We then share that commission with you, our members, as Cash Back. In other words, retailers pay to partner with us because we send shoppers to their websites or brick-and-mortar stores. They may also pay us to help grow their brand. That means we might feature them on our website, in emails, etc. Because of these partnerships, members like you benefit by earning Cash Back and getting great offers. To sum it up, we help retailers succeed and, at the same time, help our members save money. What does it mean when Rakuten states that a cash back percentage "was" a certain amount? Stores pay us a commission for sending our members to their websites, and we share it with you as cash back. When a store joins Rakuten, we set a cash back percentage for that store (a "base cash back percentage"). We work with our 

In [87]:
# Load the embedding model
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
embedding_model_name = "BAAI/bge-small-en"
os.environ['SENTENCE_TRANSFORMERS_HOME'] = '/mnt/code/model_cache/'
embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-small-en",
                                      model_kwargs=model_kwargs,
                                      encode_kwargs=encode_kwargs
                                     )

In [90]:
doc_store = Qdrant.from_documents(article_texts[0],
                              embedding=embeddings,
                              url = "https://59f8f159-fb60-44e8-bfc4-9f35c77ca8d4.us-east4-0.gcp.cloud.qdrant.io:6333",
                              api_key= 'YTyY8NmWHwAXMwMIWHkERLhpKLffi660UJ0gLbhegPF72EzG9igwUQ',
                              collection_name=f"rakuten")

In [147]:
rag_llm = ChatOpenAI(temperature=0, 
                     model='gpt-3.5-turbo-0613')

conversation = ConversationChain(
        llm=rag_llm,
        memory=ConversationSummaryMemory(llm=rag_llm),
        verbose=False
    )

messages = [
    {"role": "assistant", "content": "How can I help you today?"}
]

In [148]:
# Get relevant docs through vector DB

SIMILARITY_THRESHOLD = 0.83

def get_relevant_docs(user_input):
 
    relevant_docs = doc_store.similarity_search_with_score(user_input)
    urls = [result[0].metadata['source'] for result in relevant_docs if result[1] > SIMILARITY_THRESHOLD]
    contexts = [result[0].page_content for result in relevant_docs if result[1] > SIMILARITY_THRESHOLD]
 
    return urls, contexts
 
 
def build_system_prompt(user_input):
 
    urls, contexts = get_relevant_docs(user_input)
 
    # Create prompt
    template = """ You are a virtual assistant for Rakuten and your task is to answer questions related to Rakuten which includes general information about Rakuten.
 
                    Do not hallucinate. If you don't find an answer, you can point user to the official website here: https://www.rakuten.com/help . 
 
                    In your response, include the following url links at the end of your response {url_links} and any other relevant URL links that you refered.
 
                    Also, at the end of your response, ask if your response was helpful". 
 
                    Here is some relevant context: {context}"""
 
    prompt_template = PromptTemplate(
        input_variables=["url_links", "context"],
        template=template
    )
    system_prompt = prompt_template.format( url_links=urls, context=contexts)
 
    return system_prompt
 
# Query the Open AI Model
def queryAIModel(user_input):
 
    system_prompt = build_system_prompt(user_input)            
    messages = [
        SystemMessage(
            content=system_prompt
        ),
        HumanMessage(
            content=user_input
        ),
    ]
    
    output = conversation.predict(input=messages)
 
    return output

In [149]:
# Ask a question
user_question = input("Please provide your question here :")
messages.append({"role": "user", "content": user_question})
result = queryAIModel(user_question)
result

Please provide your question here : how much money can i load on a gift card


'The Rakuten Gift Card Shop allows you to purchase third-party gift cards, gift codes, or other stored-value products. There are no specific details provided about the maximum amount of money you can load on a gift card. However, it is important to note that all gift cards featured on the Rakuten Gift Card Shop are subject to availability, and the company reserves the right to impose quantity limits on any order. Additionally, prices and rewards are subject to change without notice. For more information about the Rakuten Gift Card Shop and its terms of sale, you can visit the official Rakuten help page here: [https://www.rakuten.com/help/article/rakuten-gift-card-shop-terms-of-sale-360052462534]. I hope this information is helpful!'