In [2]:
import os
import openai

openai.api_key = os.environ.get("OPENAI_API_KEY");
import nest_asyncio;

nest_asyncio.apply()


In [3]:
from llama_index.readers.file import UnstructuredReader
from pathlib import Path

# years = [2022, 2021, 2020, 2019]

loader = UnstructuredReader()
doc_set = {}
all_docs = []

# for year in years:
#     year_docs = loader.load_data(
#         file=Path(f"./data/Clean.Code.A.Handbook.of.Agile.Software.Craftsmanship"), split_documents=False
#     )
#     # insert year metadata into each year
#     for d in year_docs:
#         d.metadata = {"year": year}
#     doc_set[year] = year_docs
#     all_docs.extend(year_docs)
all.docs = loader.load_data(
    file=Path(f"./data/"), split_documents=False
)    

In [4]:
# initialize simple vector indices
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core import Settings

Settings.chunk_size = 512
index_set = {}
for year in years:
    storage_context = StorageContext.from_defaults()
    cur_index = VectorStoreIndex.from_documents(
        doc_set[year],
        storage_context=storage_context,
    )
    index_set[year] = cur_index
    storage_context.persist(persist_dir=f"./storage/{year}")

In [5]:
# Load indices from disk
from llama_index.core import load_index_from_storage

index_set = {}
for year in years:
    storage_context = StorageContext.from_defaults(
        persist_dir=f"./storage/{year}"
    )
    cur_index = load_index_from_storage(
        storage_context,
    )
    index_set[year] = cur_index

In [6]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata

individual_query_engine_tools = [
    QueryEngineTool(
        query_engine=index_set[year].as_query_engine(),
        metadata=ToolMetadata(
            name=f"vector_index_{year}",
            description=f"useful for when you want to answer queries about the {year} SEC 10-K for Uber",
        ),
    )
    for year in years
]

In [7]:
from llama_index.llms.openai import OpenAI
from llama_index.core.query_engine import SubQuestionQueryEngine

query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=individual_query_engine_tools,
    llm=OpenAI(model="gpt-3.5-turbo"),
)

In [8]:
query_engine_tool = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="sub_question_query_engine",
        description="useful for when you want to answer queries that require analyzing multiple SEC 10-K documents for Uber",
    ),
)

In [9]:
tools = individual_query_engine_tools + [query_engine_tool]
tools

[<llama_index.core.tools.query_engine.QueryEngineTool at 0x19291cdc7f0>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x19291d7d3a0>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x19291cfb610>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x19291d534f0>,
 <llama_index.core.tools.query_engine.QueryEngineTool at 0x1929562cbe0>]

In [12]:
import pickle

# Save the tools list to a file
with open('tools.pkl', 'wb') as f:
    pickle.dump(tools, f)




In [10]:
from llama_index.agent.openai import OpenAIAgent

agent = OpenAIAgent.from_tools(tools, verbose=True)

In [1]:
response = agent.chat(
    "Compare/contrast the risk factors described in the Uber 10-K across years. Give answer in bullet points."
)
print(str(response))

NameError: name 'agent' is not defined