### Learning Retrieval of RAG

In [1]:
import os

from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

In [2]:
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
    raise ValueError("Please set the OPENAI_API_KEY environment variable.")

model_name = "gpt-4o"
temperature = 0.0
llm = ChatOpenAI(
    model=model_name,
    temperature=temperature,
    openai_api_key=openai_api_key
)

In [3]:
loader = TextLoader("./work.txt")
documents= loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

In [4]:
print(len(texts), "chunks created")

4 chunks created


In [5]:
database = FAISS.from_documents(texts, embeddings)
retriever = database.as_retriever(search_kwargs={"k": 3})


def query_database(query):
    """
    Query the database with a given query string and performing Similarity Search.
    """
    results = retriever.get_relevant_documents(query)

    if not results:
        return "No relevant documents found."

    return results

In [6]:
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000022AADA336D0>, search_kwargs={'k': 3})

In [7]:
relevant_documents = query_database("What is the main topic of the document?")

  results = retriever.get_relevant_documents(query)


In [8]:
for doc in relevant_documents:
    print(f"Document: {doc.page_content[:200]}...")  # Print the first 200 characters of each document
    print(f"Metadata: {doc.metadata}\n")  # Print metadata if available
    print("-" * 80)  # Separator for clarity

Document: It seemed curious that the same task could be painful to one person and pleasant to another, but I didn't realize at the time what this imbalance implied, because I wasn't looking for it. I didn't rea...
Metadata: {'source': './work.txt'}

--------------------------------------------------------------------------------
Document: "Didn't it get boring when you got to be about 15?" I asked.

"No," he said, "by then I was interested in maths."

In another conversation he told me that what he really liked was solving problems. To...
Metadata: {'source': './work.txt'}

--------------------------------------------------------------------------------
Document: What Doesn't Seem Like Work?

January 2015

My father is a mathematician. For most of my childhood he worked for Westinghouse, modelling nuclear reactors.

He was one of those lucky people who know ea...
Metadata: {'source': './work.txt'}

--------------------------------------------------------------------------------


In [9]:
relevant_documents = query_database(
    "What types of things did the author want to build?")

print("\n\n".join(
    [
        f"Document: {doc.page_content[:200]}...\nMetadata: {doc.metadata}"
        for doc in relevant_documents
    ]
))

Document: What Doesn't Seem Like Work?

January 2015

My father is a mathematician. For most of my childhood he worked for Westinghouse, modelling nuclear reactors.

He was one of those lucky people who know ea...
Metadata: {'source': './work.txt'}

Document: "Didn't it get boring when you got to be about 15?" I asked.

"No," he said, "by then I was interested in maths."

In another conversation he told me that what he really liked was solving problems. To...
Metadata: {'source': './work.txt'}

Document: It seemed curious that the same task could be painful to one person and pleasant to another, but I didn't realize at the time what this imbalance implied, because I wasn't looking for it. I didn't rea...
Metadata: {'source': './work.txt'}
