In [1]:
import pandas as pd
from datetime import datetime
import json
from IPython.display import display
from IPython.display import Markdown

import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from utils import read_config, OracleAgent

from llama_index.core.schema import Document
from llama_index.core import VectorStoreIndex, StorageContext, Settings
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.gemini import Gemini
from llama_index.core.vector_stores.types import VectorStoreQuery

import chromadb

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
info_path = '.env/info.json'
info = read_config(info_path)

# oracle_agent = OracleAgent(info['DW_conn_info'])

# API_KEY= info['gkey']
# genai.configure(api_key=API_KEY)
# llm_model = genai.GenerativeModel('gemini-pro', generation_config=generation_config, safety_settings=safety_settings)
# model = Gemini(model="models/gemini-pro")

In [3]:
model_name = "sentence-transformers/distiluse-base-multilingual-cased-v1"
embed_model = HuggingFaceEmbedding(model_name=model_name)

In [4]:
# load vectordb
db2 = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db2.get_or_create_collection("DashboardQA")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
index = VectorStoreIndex.from_vector_store(
    vector_store,
    embed_model=embed_model,
)


In [5]:
query = "我要看稅前淨利、損益表、資產負債表的數據要看哪張表"
embedding_vector = embed_model.get_query_embedding(query)
query_vector = VectorStoreQuery(query_embedding=embedding_vector, similarity_top_k=1)

In [6]:
result = vector_store.query(query_vector)

In [7]:
print(result)

VectorStoreQueryResult(nodes=[TextNode(id_='b5a8d032-bc64-4c94-9f41-4e62dd13c495', embedding=None, metadata={'dashboard_name': '投控經營分析', 'report_name': '華紙-營運績效分析月報', 'page_name': '事業部層', 'pic_name': '事業部層-完整畫面.jpg'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='98006c58-116a-4098-882f-32455b18743f', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'dashboard_name': '投控經營分析', 'report_name': '華紙-營運績效分析月報', 'page_name': '事業部層', 'pic_name': '事業部層-完整畫面.jpg'}, hash='47c8e3eb15f78729226dc2ccafc0b148330a5d8dfb54e8e4d43edaf72be445eb'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='a973e1a5-5a48-46a8-ad59-d5a7f312e10d', node_type=<ObjectType.TEXT: '1'>, metadata={'dashboard_name': '投控經營分析', 'report_name': '華紙-營運績效分析月報', 'page_name': '事業部層', 'pic_name': '事業部層-完整畫面.jpg'}, hash='0c6bc856079b805dfead70575981179818b369bfd151ce8051069b970ab18937'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_