### LangChain - Retrievers

In [None]:
import os

from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

In [None]:
load_dotenv()

openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
    raise ValueError("Please set the OPENAI_API_KEY environment variable.")

model_name = "gpt-4o"
temperature = 0.0
llm = ChatOpenAI(
    model=model_name,
    temperature=temperature,
    openai_api_key=openai_api_key
)

In [None]:
loader = TextLoader("../work.txt")
documents= loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

In [None]:
print(len(texts), "chunks created")

In [None]:
database = FAISS.from_documents(texts, embeddings)
retriever = database.as_retriever(search_kwargs={"k": 3})


def query_database(query):
    """
    Query the database with a given query string and performing Similarity Search.
    """
    results = retriever.get_relevant_documents(query)

    if not results:
        return "No relevant documents found."

    return results

In [None]:
relevant_documents = query_database("What is the main topic of the document?")

In [None]:
for doc in relevant_documents:
    print(f"Document: {doc.page_content[:200]}...")  # Print the first 200 characters of each document
    print(f"Metadata: {doc.metadata}\n")  # Print metadata if available