In [None]:
%pip install langchain
%pip install langchain-openai

In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.document_loaders.wikipedia import WikipediaLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from dotenv import load_dotenv
import os

load_dotenv()
api_key = os.getenv("API_KEY")
base_url = os.getenv("OPENAI_ENDPOINT")
model_name = "gpt-4o-mini"
temp=0.0

llm = ChatOpenAI(
    base_url=base_url,
    api_key=api_key,
    model=model_name,
    temperature=temp
)

**Loader**

In [None]:
loader = WikipediaLoader(
    "Anthony_Hopkins",
    load_max_docs=1,
    doc_content_chars_max=40000
)
docs = loader.load()

In [None]:
len(docs)

In [None]:
len(docs[0].page_content)

In [None]:
print(docs[0].page_content)

**Splitter**

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300, 
    chunk_overlap=50
)
all_splits = text_splitter.split_documents(docs)


In [None]:
print(f"Split Wikipedia page into {len(all_splits)} sub-documents.")

In [None]:
all_splits[0]

**Embeddings**

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

**Vector Store**

In [None]:
vector_store = InMemoryVectorStore(embeddings)

In [None]:
document_ids = vector_store.add_documents(documents=all_splits)
document_ids[:10]

In [None]:
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

**Prompt**

In [None]:
template = ChatPromptTemplate([
    ("system", "You are an assistant for question-answering tasks."),
    ("human", "Use the following pieces of retrieved context to answer the question. "
              "If you don't know the answer, just say that you don't know. " 
              "Use three sentences maximum and keep the answer concise. "
              "\n# Question: \n-> {question} "
              "\n# Context: \n-> {context} "
              "\n# Answer: "),
])

In [None]:
template.invoke(
    {"context": "##CONTEXT##", "question": "##QUESTION##"}
).to_messages()

In [None]:
def format_docs(docs):
    formatted = "\n\n-> ".join(doc.page_content for doc in docs)
    return formatted

**Generation**

In [None]:
question = "When The Silence of the Lambs was released?"
context = format_docs(retriever.invoke(question))
messages = template.invoke({'question' : question, 'context' : context}).to_messages()
answer = llm.invoke(messages)

In [None]:
print(messages[1].content)

In [None]:
answer

**LCEL**

In [None]:
rag_chain = ( 
    RunnableParallel(
        context = retriever | format_docs, 
        question = RunnablePassthrough() 
    )
    | template 
    | llm 
)

In [None]:
rag_chain.invoke("When he was born?")