In [None]:
%pip install -q langchain langchain-community langchain-openai langchain-chroma python-dotenv pypdf bs4

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
# Establecemos los modelos que vamos a usar
EMBD_MODEL="text-embedding-3-small"
LLM_MODEL="gpt-4o-mini"

#### Indexing

##### 1. Load Documents

In [None]:
# from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_community.document_loaders import WebBaseLoader

document_path = "resources/00 SCRUMstudy-SBOK-Guide-3rd-edition-Spanish.pdf"

# loader = PyPDFLoader(file_path=document_path)
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
document = loader.load()

##### 2. Split Documents

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter


# Split document loaded into a list of documents List[Document]
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048)
docs = text_splitter.split_documents(document)
len(docs)

##### 3. Embeddings and Store

In [None]:
# Embeddings
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_chroma import Chroma

emdb = OpenAIEmbeddings(model=EMBD_MODEL)
vectorstore = Chroma.from_documents(documents=docs, embedding=emdb)

#### Retrieve and Generate

##### Retrieve

In [None]:
retriever = vectorstore.as_retriever()
question = 'What are the approaches to Task Decomposition?'
retrieved_docs = retriever.invoke(question)
retrieved_docs

##### Generate answer

In [None]:
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

qa_system_prompt = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {input} 
Context: {context} 
Answer:
"""


qa_prompt = ChatPromptTemplate(
    [
        ("system", qa_system_prompt),
        ("human", "{input}"),
    ],
    input_variables=["context", "input"]
)

llm = ChatOpenAI(model=LLM_MODEL)

qa_chain = {
    "context": retriever,
    "input": RunnablePassthrough()
} | qa_prompt | llm | StrOutputParser()

# question = 'En que consiste el enfoque Task Decomposition?'

response = qa_chain.invoke(question)

In [None]:
response