<a href="https://colab.research.google.com/github/a81Biz/ChatBot/blob/master/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install llama-index

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [1]:
import os
import shutil
from llama_index import download_loader, GPTVectorStoreIndex, ServiceContext, SimpleDirectoryReader
from pathlib import Path
from llama_index import GPTListIndex, LLMPredictor
from langchain import OpenAI
from llama_index.indices.composability import ComposableGraph
from langchain.agents import Tool
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.agents import initialize_agent

from llama_index.langchain_helpers.agents import LlamaToolkit, create_llama_chat_agent, IndexToolConfig

# define a decompose transform
from llama_index.indices.query.query_transform.base import DecomposeQueryTransform
from llama_index.query_engine.transform_query_engine import TransformQueryEngine


In [2]:
# Ruta de la carpeta de origen
ruta_docs = './docs'

# Ruta de la carpeta de destino
ruta_chatDocs = './chatDocs'

# Verificar si la carpeta de destino existe, y si no, crearla
if not os.path.exists(ruta_chatDocs):
    os.makedirs(ruta_chatDocs)

# Lista para almacenar los nombres y extensiones de los documentos
nombres_documentos = []

# Recorrer los archivos de la carpeta de origen
for archivo in os.listdir(ruta_docs):
    # Obtener la ruta completa del archivo de origen
    ruta_origen = os.path.join(ruta_docs, archivo)

    # Verificar si es un archivo (no directorio)
    if os.path.isfile(ruta_origen):
        # Obtener el nuevo nombre del archivo en minúsculas y sin espacios
        nuevo_nombre = archivo.lower().replace(' ', '')

        # Obtener la ruta completa del archivo de destino
        ruta_destino = os.path.join(ruta_chatDocs, nuevo_nombre)

        # Copiar el archivo a la carpeta de destino
        shutil.copyfile(ruta_origen, ruta_destino)

        # Obtener el nombre y extensión del archivo y agregarlo a la lista
        nombre, extension = os.path.splitext(nuevo_nombre)
        nombres_documentos.append((nombre, extension))

In [3]:
doc_set = {}
all_docs = []
for nombre, extension  in nombres_documentos:
    each_docs = SimpleDirectoryReader(f'{ruta_chatDocs}/{nombre}{extension}').load_data()
    # insert year metadata into each year
    for d in each_docs:
        d.extra_info = {"doc": nombre}
    doc_set[nombre] = each_docs
    all_docs.extend(each_docs)

In [4]:
#key de API OpenAi
os.environ["OPENAI_API_KEY"] = 'xxx'

In [5]:
# initialize simple vector indices + global vector index
# NOTE: don't run this cell if the indices are already loaded! 
cur_index = {}
index_set = {}
service_context = ServiceContext.from_defaults(chunk_size_limit=512)
for nombre, extension in nombres_documentos:
    cur_index = GPTVectorStoreIndex.from_documents(doc_set[nombre])
    index_set[nombre] = cur_index


In [6]:
# Load indices from disk
for nombre in nombres_documentos:
    index_set[nombre] = cur_index

In [7]:

# set summary text for each doc
index_summaries = [f"Indices de {nombre}" for nombre in nombres_documentos]


In [8]:
# set number of output tokens
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, max_tokens=512))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)

In [9]:
# define a list index over the vector indices
# allows us to synthesize information across each index
graph = ComposableGraph.from_indices(
    GPTListIndex, 
    [index_set[n] for n, e in nombres_documentos], 
    index_summaries=index_summaries,
    service_context=service_context
)

In [10]:
decompose_transform = DecomposeQueryTransform(
    llm_predictor, verbose=True
)


In [11]:
# define custom query engines
custom_query_engines = {}
for index in index_set.values():
    query_engine = index.as_query_engine()
    query_engine = TransformQueryEngine(
        query_engine,
        query_transform=decompose_transform,
        transform_extra_info={'index_summary': index.index_struct.summary},
    )
    custom_query_engines[index.index_id] = query_engine
custom_query_engines[graph.root_id] = graph.root_index.as_query_engine(
    response_mode='tree_summarize',
    verbose=True,
)

In [12]:
# construct query engine
graph_query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines)

In [13]:
# index configs
index_configs = []
for nombre, ext in nombres_documentos:
    query_engine = index_set[nombre].as_query_engine( 
        similarity_top_k=3, 
        )
    tool_config = IndexToolConfig(
        query_engine=query_engine, 
        name=f"Vector Index {nombre}",
        description=f"useful for when you want to answer queries about the {nombre} ",
        tool_kwargs={"return_direct": True, "return_sources": True},
    )
    index_configs.append(tool_config)

In [14]:
# graph config
graph_config = IndexToolConfig(
    query_engine=graph_query_engine,
    name=f"Graph Index",
    description="useful for when you want to answer queries that require analyzing multiple  documents.",
    tool_kwargs={"return_direct": True, "return_sources": True},
    return_sources=True
)

toolkit = LlamaToolkit(
    index_configs=index_configs,
    graph_configs=[graph_config]
)

memory = ConversationBufferMemory(memory_key="chat_history")
llm=OpenAI(temperature=0)
agent_chain = create_llama_chat_agent(
    toolkit,
    llm,
    memory=memory,
    verbose=True
)

In [20]:
while True:
    text_input = input("User: ")
    response = agent_chain.run(input=text_input)
    print(f'Agent: {response}')

User: What are the main needs and challenges of the support team in non-production environments?


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
AI: The main needs and challenges of the support team in non-production environments include ensuring that the environment is stable and secure, providing timely and accurate support to users, and ensuring that the environment is up-to-date with the latest software and security patches. Additionally, the support team must be able to quickly identify and resolve any issues that arise in the environment.[0m

[1m> Finished chain.[0m
Agent: The main needs and challenges of the support team in non-production environments include ensuring that the environment is stable and secure, providing timely and accurate support to users, and ensuring that the environment is up-to-date with the latest software and security patches. Additionally, the support team must be able to quickly identify and resolve any issues that arise in the environm

KeyboardInterrupt: ignored