# Load env vars

In [None]:
from dotenv import load_dotenv

load_dotenv()

# Create LLM

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="gpt-4o",
    temperature=0,
)

# Ingestion pipeline

In [None]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")


In [None]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

In [None]:
import os

url = "https://e7f4684c-fd33-4db0-b1d3-268870ecb84d.europe-west3-0.gcp.cloud.qdrant.io:6333"
api_key = os.getenv("QDRANT_API_KEY")

In [None]:
client = QdrantClient(
    url=url,
    api_key=api_key,
    https=True,
    timeout=300
)
client.create_collection(
    collection_name="book-content",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
)

In [None]:
vector_store = QdrantVectorStore(
    client=client,
    collection_name="book-content",
    embedding=embeddings,
)

## Load Document

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("data/fortaleza-digital.pdf")
pages = loader.load()

In [None]:
filtered_documents = [page for page in pages if len(page.page_content) > 0]

In [None]:
import uuid
ids = [str(uuid.uuid4()) for _ in range(len(filtered_documents))]

## Ingest Documents

In [None]:
import uuid
from langchain_core.documents import Document

In [None]:
vector_store.add_documents(documents=filtered_documents[:100], ids=ids)

## How to add the vector store in a chain

In [None]:
SYSTEM_PROMPT = """
<PERSONA>
Eres un especialista resolviendo dudas sobre libros de ficción
</PERSONA>

<TASK>
Tu tarea es refrasear la solicitud del usuario para genera una solicitud refraseada.

- Puedes corregir los errores gramaticales
- Puedes mejorar la semántica y orden léxico de la palabras para un mejor entendimiento
</TASK>
"""

USER_PROMPT = """{user_request}"""


In [None]:
from langchain_core.messages import SystemMessage
from langchain_core.prompts import ChatPromptTemplate

rephrase_prompt = ChatPromptTemplate([
    SystemMessage(content=SYSTEM_PROMPT),
    ("user", USER_PROMPT)
])

In [None]:
rephrase_prompt.invoke({"user_request": "quien es susan fletcher?"})

In [None]:
chain = rephrase_prompt | llm

In [None]:
chain.invoke({"user_request": "quien es susan fletcher"})

In [None]:
from langchain_core.runnables import RunnableLambda

chain = rephrase_prompt | llm | RunnableLambda(lambda x: x.content) | vector_store.as_retriever(search_kwargs={"k": 4})

In [None]:
chain.invoke({"user_request": "quien es susan fletcher"})

In [None]:
def combine_documents(documents: list[Document]) -> str:
    return "\n\n".join([document.page_content for document in documents])


In [None]:
SYSTEM_PROMPT = """
<PERSONA>
Eres un especialista resolviendo dudas sobre libros de ficción
</PERSONA>

<TAREA>
Tu tarea es responder la pregunta del usuario.
</TAREA>

<RESTRICCIONES>
- Solo responde la pregunta del usuario tomando como contexto lo provisto en <CONTEXTO>.
</RESTRICCIONES>

<CONTEXTO>
{context}
</CONTEXTO>
"""

USER_PROMPT = """
user question: {user_request}
rephrased user question: {rephrased_request}
"""

from langchain_core.messages import SystemMessage
from langchain_core.prompts import ChatPromptTemplate

qa_prompt = ChatPromptTemplate([
    ("ai", SYSTEM_PROMPT),
    ("user", USER_PROMPT)
])


In [None]:
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter

simple_chatbot = (
    {
        "user_request": itemgetter("user_request"),
        "rephrased_request": rephrase_prompt | llm | RunnableLambda(lambda x: x.content)
    } 
    | RunnablePassthrough() 
    | {
        "user_request": itemgetter("user_request"),
        "rephrased_request": itemgetter("rephrased_request"),
        "context": itemgetter("rephrased_request") | vector_store.as_retriever(search_kwargs={"k": 10}) | RunnableLambda(combine_documents),
    }
    | qa_prompt 
    | llm
    | RunnableLambda(lambda x: x.content)
)


In [None]:
simple_chatbot.invoke({"user_request": "¿quién es susan fletcher?"})

## Summarize pages

In [None]:
grouped_documents = []

for i in range(0, len(filtered_documents), 5):
    content = "\n\n".join([d.page_content for d in filtered_documents[i : i + 5]])
    grouped_documents.append(Document(page_content=content))


In [None]:
SYSTEM_PROMPT = """
<PERSONA>
Eres un especialista tomando extractos de paginas y haciendo resumenes
</PERSONA>

<TAREA>
Tu tarea es generar un resumen de contenido de paginas de un libro de literatura
</TAREA>

<CONSIDERACIONES>
- Genera un resumen tomando considerando detalles relevantes
</CONSIDERACIONES>
"""

USER_PROMPT = """
contenido: {content}
"""

from langchain_core.prompts import ChatPromptTemplate

summarize_prompt = ChatPromptTemplate([
    ("ai", SYSTEM_PROMPT),
    ("user", USER_PROMPT)
])


In [None]:
summarized_chain = {"content": RunnableLambda(lambda x: x.page_content)} | summarize_prompt | llm | RunnableLambda(lambda x: Document(page_content=x.content))

In [None]:
summarized_docs = summarized_chain.batch(inputs=grouped_documents[:2])

In [None]:
summarized_docs

In [None]:
client.create_collection(
    collection_name="book-summarized",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
)

In [None]:
vector_store = QdrantVectorStore(
    client=client,
    collection_name="book-summarized",
    embedding=embeddings,
)

In [None]:
import uuid
ids = [str(uuid.uuid4()) for _ in range(len(summarized_docs))]
vector_store.add_documents(documents=summarized_docs, ids=ids)

# How we can add the summarized db to create a better chatbot?

In [None]:
summarized_vector_store = QdrantVectorStore(
    client=client,
    collection_name="book-summarized",
    embedding=embeddings,
)

In [None]:
simple_chatbot = (
    {
        "user_request": itemgetter("user_request"),
        "rephrased_request": rephrase_prompt | llm | RunnableLambda(lambda x: x.content)
    } 
    | RunnablePassthrough() 
    | {
        "user_request": itemgetter("user_request"),
        "rephrased_request": itemgetter("rephrased_request"),
        "context_1": itemgetter("rephrased_request") | vector_store.as_retriever(search_kwargs={"k": 10}) | RunnableLambda(combine_documents)
    }
    | qa_prompt 
    | llm
    | RunnableLambda(lambda x: x.content)
)