In [None]:
# Load modules
from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from IPython.display import Markdown, display
import chromadb
import os

In [2]:
# to load api key from .env file
load_dotenv()

True

### Create Vectorstore Database

This code snippet takes a whole folder of text files and creates a vectorstore based on it. With Machine Learning these are converted into numerical representation, so called embeddings. Embeddings are the numerical conversion of text in a multi-dimensional vector (room). Vectors (words) which are related to each other in meaning or context have a closer vector in the embedding vector. Consequently, the vectors are persisted in a vectorstore.

In [3]:
# path of data folder
data_folder = os.path.join("data")


# initializing ChromaDB
db = chromadb.PersistentClient(path="chroma_db")
chroma_collection = db.get_or_create_collection("quickstart")


# define embedding function
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")


# load documents
documents = SimpleDirectoryReader(data_folder).load_data()


# create and store vectors
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(
    documents, 
    storage_context=storage_context, 
    embed_model=embed_model,
    show_progress=True
)

Parsing nodes:   0%|          | 0/14 [00:00<?, ?it/s]

Generating embeddings:   0%|          | 0/14 [00:00<?, ?it/s]

### Load Vectorstore Database and query it 

Here, the vectorstore is reloaded and the embeddings are reconstructed. Then it can be accessed for queries.

In [4]:
# load from disk
db2 = chromadb.PersistentClient(path="chroma_db")
chroma_collection = db2.get_or_create_collection("quickstart")

vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
index = VectorStoreIndex.from_vector_store(
    vector_store,
    embed_model=embed_model,
)

# Query Data from the persisted index
query_engine = index.as_query_engine()
response = query_engine.query("What is the document regarding combined transport in Austria about?")
display(Markdown(f"<b>{response}</b>"))

<b>The document provides information on support measures and regulations related to combined transport in Austria, including exemptions from the night driving ban for noisy heavy goods vehicles with a maximum permissible gross weight of more than 7.5 tons. It also lists specific routes that are particularly relevant for combined transport in Austria.</b>

### Tool note_engine for creating notes 

This is the set up for the note engine which serves the agent as a tool for creating notes based on some results.

In [None]:
from llama_index.core.tools import FunctionTool
import os
from datetime import datetime
import textwrap


# file path where there notes.txt will be saved (in this case in same folder as this file)
note_file = os.path.join("data", "notes.txt")


# function to create a notes.txt file based on input note
def save_note(note):
    if not os.path.exists(note_file):
        open(note_file, "w")

    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")    # gives the file additionally a time stamp

    wrapped_note = textwrap.fill(note, width=80)                # to make breaks instead giving output into one single line

    with open(note_file, "a") as f:
        f.writelines([f"[{timestamp}] {wrapped_note}\n"])

    return "note saved"


# creates a engine for the agent with the save_note function 
note_engine = FunctionTool.from_defaults(
    fn = save_note,
    name = "note_saver",
    description = "this tool saves a text based note to a file for the user",
)

### Agent querying Database with tools

This code combines loading vectorstore for querying, initializing and promping agent. 

In [None]:
from dotenv import load_dotenv
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import chromadb
import streamlit as st
from note_engine import note_engine


# to load api key from .env file
load_dotenv()


# define embedding function
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")

# load vectorstore from disk (which was created in vectorstore.py)
db2 = chromadb.PersistentClient(path="chroma_db")
chroma_collection = db2.get_or_create_collection("quickstart")

vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
index = VectorStoreIndex.from_vector_store(
    vector_store,
    embed_model=embed_model,
)


# Query Data from the persisted index (vectorstore)
query_engine = index.as_query_engine()

vectorstore_engine = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="document_search",
        description="Search for relevant information in the vectorstore"
    ),
)

# Toolbox for the agent
tools = [
    note_engine,
    vectorstore_engine
]

# Giving the agent the context for its role
context = """Purpose: The primary role of this agent is to assist users by providing accurate 
            information about transport concerns based on the data contained in vectorstore. 
            If he doesn't know he should not make up anything only answer "I DON'T KNOW" """


# Activate the agent
llm = OpenAI(model="gpt-4o-mini") # model can be adjusted
agent = ReActAgent.from_tools(tools, llm = llm, verbose = True, context = context)      # sets everything up


# Query it as long as you don't enter q
while (prompt := input("Enter a prompt (q to quit): ")) != "q":
    result = agent.query(prompt)
    print(result)

> Running step d6009bd8-4626-49df-8fa8-66f9a6a3a0a7. Step input: which routes can you recommend me for a combined transport from vienna to italy?
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: document_search
Action Input: {'input': 'combined transport routes from Vienna to Italy'}
[0m[1;3;34mObservation: Wien Freudenau Hafen CCT – border crossing Nickelsdorf (Hungary)
[0m> Running step 5c4de7a2-efcf-4849-b07f-46b2b571f16d. Step input: None
[1;3;38;5;200mThought: I have found some information regarding a route from Vienna to Italy involving combined transport. However, I need to gather more details to provide a comprehensive answer.
Action: document_search
Action Input: {'input': 'combined transport options from Vienna to Italy'}
[0m[1;3;34mObservation: The combined transport options from Vienna to Italy include the routes from Wien Freudenau Hafen CCT to border crossings in Nickelsdorf, Klingenba

Content of the outputed notes textfile:





[2024-11-23 22:40:58] Routes from Vienna to Italy: 1. Wien Freudenau Hafen CCT � border crossing
Nickelsdorf (Hungary) 2. Wien Freudenau Hafen CCT � border crossing Klingenbach
(Hungary) 3. Wien Freudenau Hafen CCT � border crossing Drasenhofen (Czechia)
