In [None]:
import re
from llama_index.core import Document
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.schema import TransformComponent
from llama_index.llms.openai import OpenAI
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Settings
from llama_index.agent.openai import OpenAIAgent
from llama_index.core.objects import ObjectIndex
from typing import Sequence
from llama_index.core.tools import BaseTool, FunctionTool
import os
import yaml
import json

### Configuración de Llama Index

Modelos y embeddings

In [25]:
Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0.7, max_tokens=1024)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large",embed_batch_size=10)

### Cargar documentos del RAG

In [None]:
book = SimpleDirectoryReader("../data").load_data()
len(book)
print(book[10])

### Pipeline de ingesta

* Crea chunks de un máximo de 512 tokens con 40 tokens de solape entre ellos
* Se crean los embeddings asociados a los chunks extraídos
* El resultado es la construcción de nodos
* Se limpian los nodos
* IngestionPipeline tiene la restricción que trabaja solo con clases que heredan de TransformerComponent
* Referencia : https://docs.llamaindex.ai/en/stable/module_guides/loading/ingestion_pipeline/transformations/#custom-transformations

In [None]:
class TextCleaner(TransformComponent):
    """
    A transformation component that cleans text by removing non-alphanumeric characters.

    This component takes in a list of nodes, removes any non-alphanumeric characters
    (except for spaces) from the text of each node, and returns the cleaned nodes.

    Args:
        nodes (list): A list of nodes to be cleaned.

    Returns:
        list: The list of cleaned nodes.
    """
    def __call__(self, nodes, **kwargs):
        for node in nodes:
            node.text = re.sub(r"[^0-9A-Za-z ]", "", node.text)
        return nodes
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=512, chunk_overlap=40),
        TextCleaner(),
    ],
)
nodes = pipeline.run(documents=book)
len(nodes)

### Se crea el índice de embeddings en memoria

* Cómo construyen el índica es un mecanismo interno de la librería
* Lo importante es que luego este índice se usa para encontrar los chunks más relevantes a la consulta del usuario por similitud de coseno
* https://www.reddit.com/r/LocalLLaMA/comments/1bvo5l4/the_more_i_use_llamaindex_the_less_i_like_it/
* referencia: https://docs.llamaindex.ai/en/stable/understanding/loading/loading/

In [None]:
vector_index = VectorStoreIndex(
        nodes=nodes,
        show_progress=True,
    )

### Validación del RAG usando métodos que vienen en la librería

In [None]:
query_engine = vector_index.as_query_engine()
response = query_engine.query(
    "What is the messy middle and how does it differ from traditional models of the consumer decision-making process?"
)
print(response)

### Creación del Chat

* Se usa como referencia esto: https://docs.llamaindex.ai/en/stable/examples/chat_engine/chat_engine_openai/

In [40]:
chat_engine = vector_index.as_chat_engine(chat_mode="openai", verbose=True)

In [None]:
response = chat_engine.chat("Hi")
print(response)

In [None]:
response = chat_engine.chat(
    "What is the messy middle and how does it differ from traditional models of the consumer decision-making process?"
)
print(response)

In [None]:
response = chat_engine.chat("thanks, and now can you translate it to spanish?")
print(response)