## Lab5 Create or restore the vectorDB for Q&A

In [1]:
import os

In [2]:
# Enalbe 780M with ROCm
os.environ['HSA_OVERRIDE_GFX_VERSION'] = '11.0.0'

print(os.environ['HSA_OVERRIDE_GFX_VERSION'])

In [3]:
import torch

In [4]:
print(torch.__version__)

2.3.0+rocm6.0


In [5]:
# Query GPU
if torch.cuda.is_available():
    device = torch.device("cuda")          # a CUDA device object
    print('Using GPU:', torch.cuda.get_device_name(0))
    print('GPU properties:', torch.cuda.get_device_properties(0))
else:
    device = torch.device("cpu")
    print('Using CPU')

Using GPU: AMD Radeon Graphics
GPU properties: _CudaDeviceProperties(name='AMD Radeon Graphics', major=11, minor=0, gcnArchName='gfx1100', total_memory=16384MB, multi_processor_count=6)


In [6]:
# NOTE: This is ONLY necessary in jupyter notebook.
# Details: Jupyter runs an event-loop behind the scenes.
#          This results in nested event-loops when we start an event-loop to make async queries.
#          This is normally not allowed, we use nest_asyncio to allow it for convenience.

import nest_asyncio

nest_asyncio.apply()

In [7]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.node_parser import SentenceSplitter

from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama

# Set embedding model
# Please download it ahead running this lab by "ollama pull nomic-embed-text"
Settings.embed_model = OllamaEmbedding(model_name="nomic-embed-text")

# Set ollama model
Settings.llm = Ollama(model="qwen:7b", request_timeout=200.0)

  _torch_pytree._register_pytree_node(


In [8]:
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

if not os.path.exists("./chroma_db/CUM_zh_db"):
    # initialize client
    db = chromadb.PersistentClient(path="./chroma_db/CUM_zh_db")
    # get collection
    chroma_collection = db.get_or_create_collection("CUM_zh_db")
    # assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    # Load data
    documents = SimpleDirectoryReader(input_files=["../data/QinUM.pdf"]).load_data()
    #print(documents[200])
    # Build vector index per-document
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context,
        transformations=[SentenceSplitter(chunk_size=2048, chunk_overlap=100)],
    )
else:
    # initialize client
    db = chromadb.PersistentClient(path="./chroma_db/CUM_zh_db")
    # get collection
    chroma_collection = db.get_or_create_collection("CUM_zh_db")
    # assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    # load your index from stored vectors
    index = VectorStoreIndex.from_vector_store(
        vector_store, storage_context=storage_context
    )

In [9]:
# create a query engine
query_engine = index.as_query_engine(streaming=True, response_mode="compact", similarity_top_k=3)

In [10]:
# Updating Prompt for Car User Manual Q&A
from llama_index.core import PromptTemplate

template = (
    "你是比亚迪秦汽车的产品专家，请根据用户问题基于产品使用手册进行给出回答和提示。\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "请基于该型号汽车的产品用户手册内容进行回答。\n"
    "回答问题时需要给出相关信息在产品用户手册中的页码。\n"
    "如果问题超出用户手册之外，请明确告知用户该问题超出手册范围。\n"
    "回答内容需准确且精炼。\n"
    "Query: {query_str}\n"
    "Answer: "
)
qa_template = PromptTemplate(template)
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_template}
)

template = (
    "The original query is as follows: {query_str}.\n"
    "We have provided an existing answer: {existing_answer}.\n"
    "We have the opportunity to refine the existing answer (only if needed) with some more context below.\n"
    "-------------\n"
    "{context_msg}\n"
    "-------------\n"
    "Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.\n"
    "if the question is 'who are you' , just say I am Car User Manual Copilot.\n"
    "Answers need to be precise and concise.\n"
    "Refined Answer: "
)


qa_template = PromptTemplate(template)

query_engine.update_prompts(
    {"response_synthesizer:refine_template": qa_template}
)

#prompts_dict = query_engine.get_prompts()
#print(list(prompts_dict.keys()))

In [11]:
# Query Test 0
response = query_engine.query("如何切换DM系统工作模式并请给出手册的相关页码？")
response.print_response_stream()

可以通过以下步骤在比亚迪秦汽车上切换DM系统的工作模式：

1. **警告**：首先确保车辆安全，如果发生事故，请遵循手册中的应急处理程序。

2. **踩制动踏板**：为了降低高压电泄露的风险，你需要踩下刹车踏板。

3. **操作电子驻车开关**：接着按下电子驻车开关（通常位于刹车上），这将帮助你停止混合动力系统的运行。

4. **使用“P”键**：在面板上找到“P”键（有时候是“SET”或“CONFIG”键），按下它以完成DM系统的工作模式切换。

手册中关于这些操作的具体页面可能会因版本和手册更新而有所不同。建议您参照最新的用户手册进行操作。

In [12]:
# Q
response = query_engine.query("给出介绍仪表盘功能的用户手册页码")
response.print_response_stream()

，您提供的信息中没有直接指出介绍仪表盘功能的用户手册页码。通常这类功能介绍会位于车身内部控制系统或者驾驶信息部分的内容页面。

根据产品用户手册的一般结构，这部分内容可能会在以下手册页码附近找到：
- 361（这可能是一个综合信息章节的页码，包含了多个系统的介绍）；
- 如果仪表盘控制是单独一部分，那页码可能会直接标为“4-1充/放电说明”或者类似的页面标识。

如果手册中没有明确的页码，那么您可以通过搜索关键词“仪表盘功能”或类似描述来找到相关页面。

In [13]:
# Q
response = query_engine.query("通风功能与加热功能是否可以同时开启?")
response.print_response_stream()

，您咨询的产品通风和加热功能开启情况在提供的产品用户手册中并未明确提及。通常情况下，这类功能可能会有单独的开关或者是在特定模式下同时启用。

建议您查阅产品的详细使用说明书或直接联系比亚迪汽车授权服务店以获取最准确的信息。

## To-Do: Optimization the RAG

Refer to Basic Strategies: https://docs.llamaindex.ai/en/v0.10.19/optimizing/basic_strategies/basic_strategies.html