In [1]:
from llama_index.llms.ollama import Ollama

llm = Ollama(model="deepseek-r1:8b")

In [2]:
from llama_index.readers.file import UnstructuredReader
from pathlib import Path

subjects = ['Biology', 'Chemistry', 'Physics', 'Mathematics', 'Computer_science']

loader = UnstructuredReader()
doc_set = {}
all_docs = []
for subject in subjects:
    subject_docs = loader.load_data(
        file=Path(f"./data/{subject}.pdf"), split_documents=False
    )
    # insert year metadata into each year
    for s in subject_docs:
        s.metadata = {"subject": subject}
    doc_set[subject] = subject_docs
    all_docs.extend(subject_docs)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from llama_index.core import VectorStoreIndex, StorageContext, Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Configure document chunking size
Settings.chunk_size = 512

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

index_set = {}
for subject in subjects:
    storage_context = StorageContext.from_defaults()
    cur_index = VectorStoreIndex.from_documents(
        doc_set[subject],
        storage_context=storage_context,
        embed_model=embed_model,  # use the open source embeddings model
    )
    index_set[subject] = cur_index
    storage_context.persist(persist_dir=f"./storage/{subject}")


In [4]:
# Load indices from disk
from llama_index.core import load_index_from_storage

index_set = {}
for subject in subjects:
    storage_context = StorageContext.from_defaults(
        persist_dir=f"./storage/{subject}"
    )
    cur_index = load_index_from_storage(
        storage_context,
        embed_model=embed_model,
    )
    index_set[subject] = cur_index

In [5]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata

individual_query_engine_tools = [
    QueryEngineTool(
        query_engine=index_set[subject].as_query_engine(llm=llm),
        metadata=ToolMetadata(
            name=f"vector_index_{subject}",
            description=f"useful for when you want to answer queries about the {subject} subject",
        ),
    )
    for subject in subjects
]

In [6]:
from llama_index.core.query_engine import SubQuestionQueryEngine

query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=individual_query_engine_tools,
    llm=llm,
)

In [7]:
query_engine_tool = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="sub_question_query_engine",
        description="useful for when you want to answer queries that require analyzing multiple subjects",
    ),
)

In [8]:
tools = individual_query_engine_tools + [query_engine_tool]

In [9]:
# Define a custom prompt template to enforce the chain-of-thought format
custom_prompt = """
You are an intelligent agent that answers queries by following a strict chain-of-thought format.
For every step, please output your reasoning and your tool calls using exactly the following structure:

Thought: <Your internal reasoning for this step.>
Action: <The tool name you want to use (or output "None" if no tool is used).>
Action Input: <The JSON input to pass to the tool (or "{}" if no tool is used).>

When you are ready to provide your final answer, output only:
Answer: <Your final answer here.>

Do not output any extra text or commentary outside of this structure.
"""

In [10]:
from llama_index.core.agent import ReActAgent

agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, prompt_template=custom_prompt)

In [12]:
while True:
    text_input = input("User: ")
    if text_input == "exit":
        break
    response = agent.chat(text_input)
    print(f"Agent: {response}")

> Running step 9c4a256c-d907-4717-808b-3b8a8e68314e. Step input: hello
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: sub_question_query_engine
Action Input: {'input': 'Hello! How can I assist you today?'}
[0m[1;3;34mObservation: Error: timed out
[0m> Running step 713fc522-2d53-4983-b08e-b767641542dd. Step input: None


ReadTimeout: timed out