In [1]:
from dotenv import find_dotenv, load_dotenv
load_dotenv(find_dotenv())
import os
import openai
openai.api_key = os.getenv("OPENA_AI_KEY")
from pathlib import Path
import pandas as pd

from llama_index.core.query_engine import PandasQueryEngine
from prompt import new_prompt, instruction_str, context

from RAG.agent.src.prompt import context
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from note_engine import note_engine
from RAG.agent.src.load_index import canada_engine

In [2]:
filepath = Path.joinpath(Path.cwd().parent, 'agent_data', 'WorldPopulation2023.csv')
population = pd.read_csv(filepath)
population.head()

Unnamed: 0,Rank,Country,Population2023,YearlyChange,NetChange,Density(P/Km²),Land Area(Km²),Migrants(net),Fert.Rate,MedianAge,UrbanPop%,WorldShare
0,36,Afghanistan,42239854,2.70 %,1111083,65,652860,-65846,4.4,17.0,26 %,0.53 %
1,138,Albania,2832439,-0.35 %,-9882,103,27400,-8000,1.4,38.0,67 %,0.04 %
2,34,Algeria,45606480,1.57 %,703255,19,2381740,-9999,2.8,28.0,75 %,0.57 %
3,212,American Samoa,43914,-0.81 %,-359,220,200,-790,2.2,29.0,N.A.,0.00 %
4,202,Andorra,80088,0.33 %,264,170,470,200,1.1,43.0,85 %,0.00 %


In [3]:
population_query_engine = PandasQueryEngine(population, verbose=True)
population_query_engine.update_prompts({"pandas_prompt": new_prompt})
query_string = "What is the population of the Canada in 2023?"
population_query_engine.query(query_string)

> Pandas Instructions:
```
df[df['Country'] == 'Canada']['Population2023'].values[0]
```
> Pandas Output: 38781291


Response(response='38781291', source_nodes=[], metadata={'pandas_instruction_str': "df[df['Country'] == 'Canada']['Population2023'].values[0]", 'raw_pandas_output': '38781291'})

In [4]:
tools = [
    note_engine,
    QueryEngineTool(
        query_engine=population_query_engine,
        metadata=ToolMetadata(
            name="population_data",
            description="this gives information at the world population and demographics",
        ),
    ),
    QueryEngineTool(
        query_engine=canada_engine,
        metadata=ToolMetadata(
            name="canada_data",
            description="this gives detailed information about canada the country",
        ),
    ),
]

llm = OpenAI(model="gpt-3.5-turbo-0613")
agent = ReActAgent.from_tools(tools, llm=llm, verbose=True, context=context)

In [5]:
while (prompt := input("Enter a prompt (q to quit): ")) != "q":
    result = agent.query(prompt)
    print(result)

[1;3;38;5;200mThought: I can use the "canada_data" tool to get the information about the capital of Canada.
Action: canada_data
Action Input: {'input': 'capital'}
[0m[1;3;34mObservation: Ottawa
[0m[1;3;38;5;200mThought: I can answer without using any more tools.
Answer: The capital of Canada is Ottawa.
[0mThe capital of Canada is Ottawa.
[1;3;38;5;200mThought: I can use the "canada_data" tool to find the number of languages spoken in Canada.
Action: canada_data
Action Input: {'input': 'languages'}
[0m[1;3;34mObservation: Approximately 98% of Canadians can speak either or both English and French. English is spoken by 57% of Canadians, French by 21%, and 16% can speak both languages. Additionally, a variety of non-official languages are spoken by Canadians, with Mandarin, Punjabi, Cantonese, Spanish, Arabic, Tagalog, Italian, German, and Tamil being some of the most common non-official first languages listed in the 2021 census.
[0m[1;3;38;5;200mThought: I have obtained the inf

In [8]:
canada_engine.query( "What is the population of Canada in 2023?")

Response(response="It is estimated that Canada's population surpassed 40,000,000 in 2023.", source_nodes=[NodeWithScore(node=TextNode(id_='1e3969b9-cf72-4315-85cd-59e0bd9f078b', embedding=None, metadata={'page_label': '15', 'file_name': '/Users/hamidadesokan/Dropbox/2_Skill_Development/DLML/genai_applications/embeddings/RAG/agent/agent_data/Canada.pdf'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='4daca020-6492-4b39-88c8-355006225013', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '15', 'file_name': '/Users/hamidadesokan/Dropbox/2_Skill_Development/DLML/genai_applications/embeddings/RAG/agent/agent_data/Canada.pdf'}, hash='aa26ea19234884268796584283178c0f114f40ca24d3c03f16ca3686d9551216'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='2f1f499f-f943-4d38-8c67-9120dfa68ac4', node_type=<ObjectType.TEXT: '1'>, metadata={'page_label': '14', 'file_name': '/Users/hamidadeso

In [10]:
# from llama_index.embeddings.openai import OpenAIEmbedding
# from llama_index.readers.file import PDFReader
# from llama_index.core import StorageContext, VectorStoreIndex, load_index_from_storage
# import chromadb
# from llama_index.vector_stores.chroma import ChromaVectorStore
# import os
# from pathlib import Path
# 
# 
# def get_index(data, index_name, embed_model, chromapath):
#     index = None
#     db = chromadb.PersistentClient(path=chromapath)
#     chroma_collection = db.get_or_create_collection(index_name)
#     vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
#     if not os.path.exists(chromapath):
#         print("building index", index_name)
#         storage_context = StorageContext.from_defaults(vector_store=vector_store)
#         index = VectorStoreIndex.from_documents(data, storage_context=storage_context, embed_model=embed_model,
#                                                 show_progress=True)
#     else:
#         index = VectorStoreIndex.from_vector_store(vector_store,
#                                                    embed_model=embed_model, )
# 
#     return index

In [12]:

# embedding_model_name = "text-embedding-3-large"
# embed_model = OpenAIEmbedding(model=embedding_model_name)
# 
# canada_filepath = Path.joinpath(Path.cwd().parent, 'agent_data', 'Canada.pdf')
# chromapath = str(
#     Path.joinpath(Path.cwd().parent, 'agent_data', 'agent_chroma_db'))
# canada_pdf = PDFReader().load_data(file=canada_filepath)
# canada_index = get_index(canada_pdf, "canada", embed_model,
#                          chromapath)
# canada_engine = canada_index.as_query_engine()

In [14]:
print(canada_engine.query("What is the captital of Canada"))

Ottawa
