In [None]:
# Import necessary libraries
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.chat_models import ChatOpenAI
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings

In [6]:
# List of file paths to load
file_paths = [
    "data/Benefits Details 2024.11.07.pdf",
    "data/Benefits_Details_2024.11.05.pdf"
]

# Load each file and store the documents in a list
docs = []
for file_path in file_paths:
    loader = PyPDFLoader(file_path)
    docs.extend(loader.load())  # Append each document's contents to the `docs` list

print(len(docs))  # Total number of document sections loaded

16


In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [None]:
# Set up OpenAI API and embeddings

def initialize_llm_and_embeddings(api_key, model="gpt-4o"):
    """
    Initializes the language model and embeddings.
    """
    os.environ["OPENAI_API_KEY"] = api_key
    llm = ChatOpenAI(
        model=model,
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
    )
    embeddings = OpenAIEmbeddings()
    return llm, embeddings

# Example usage
llm, embeddings = initialize_llm_and_embeddings("sk-proj....")

In [16]:
# Initialize embeddings model
embeddings = OpenAIEmbeddings()

# Set up Chroma with local persistence
persist_directory = "db"  # Set your directory here
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embeddings,
    persist_directory=persist_directory
)

In [12]:
# Initialize embeddings model (needed for the retriever)
embeddings = OpenAIEmbeddings()

# Reload the persisted Chroma database
vectorstore = Chroma(
    persist_directory=persist_directory,
    embedding_function=embeddings  # Use `embedding_function` instead of `embedding`
)

# Create a retriever from the loaded vector store
retriever = vectorstore.as_retriever()

In [19]:
def get_answer(input, retriever, llm):
  system_prompt = (
      "You are an assistant for question-answering tasks. "
      "Use the following pieces of retrieved context to answer "
      "the question. If you don't know the answer, say that you "
      "don't know. Use three sentences maximum and keep the "
      "answer concise."
      "\n\n"
      "{context}"
  )

  prompt = ChatPromptTemplate.from_messages(
      [
          ("system", system_prompt),
          ("human", "{input}"),
      ]
  )

  question_answer_chain = create_stuff_documents_chain(llm, prompt)
  rag_chain = create_retrieval_chain(retriever, question_answer_chain)

  results = rag_chain.invoke({"input": input})
  return results['answer']

In [20]:
question = '''
As a knowledgeable financial advisor specializing in credit card discounts and rewards,
your task is to analyze and compare the specific points, rewards, and benefits offered by the
following credit cards: {Hilton Honors American Express Aspire Card, Blue Cash Preferred Card from American Express},
for purchases at YouTube Premium. Provide clear, specific information on the points each card offers
for purchases at this store. Specify whether the points or benefits differ for in-store versus online purchases,
and avoid referencing unrelated benefits. Highlight which card is optimal for use.
based on the points or cashback it offers, and provide concise comparisons where applicable.
Limit your response to 500 characters for clarity and conciseness.
'''

answer = get_answer(question, retriever, llm)
answer

'The Hilton Honors American Express Aspire Card offers 3x points on purchases like YouTube Premium, as it falls under the "everything else" category. The Blue Cash Preferred Card from American Express does not specifically list YouTube Premium, but it generally offers 1% cash back on such purchases. There is no differentiation between in-store and online purchases for these cards. The Hilton Honors Aspire Card is optimal for YouTube Premium due to its higher points rate.'