## Lab5 Create or restore the vectorDB for Q&A

In [1]:
import os

In [2]:
# Enalbe 780M with ROCm
os.environ['HSA_OVERRIDE_GFX_VERSION'] = '11.0.0'

print(os.environ['HSA_OVERRIDE_GFX_VERSION'])

In [3]:
import os
os.environ['HIP_VISIABLE_DEVICES'] = "0"

In [4]:
import torch

In [5]:
print(torch.__version__)

2.4.0.dev20240401+rocm6.0


In [6]:
# Query GPU
if torch.cuda.is_available():
    device = torch.device("cuda")          # a CUDA device object
    print('Using GPU:', torch.cuda.get_device_name(0))
    print('GPU properties:', torch.cuda.get_device_properties(0))
else:
    device = torch.device("cpu")
    print('Using CPU')

Using GPU: AMD Radeon PRO W7900
GPU properties: _CudaDeviceProperties(name='AMD Radeon PRO W7900', major=11, minor=0, gcnArchName='gfx1100', total_memory=46064MB, multi_processor_count=48)


In [7]:
# NOTE: This is ONLY necessary in jupyter notebook.
# Details: Jupyter runs an event-loop behind the scenes.
#          This results in nested event-loops when we start an event-loop to make async queries.
#          This is normally not allowed, we use nest_asyncio to allow it for convenience.

import nest_asyncio

nest_asyncio.apply()

In [8]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.node_parser import SentenceSplitter

from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama

# Set embedding model
# Please download it ahead running this lab by "ollama pull nomic-embed-text"
#Settings.embed_model = OllamaEmbedding(model_name="nomic-embed-text")
Settings.embed_model = OllamaEmbedding(model_name="mxbai-embed-large")

# Set ollama model
Settings.llm = Ollama(model="qwen2:7b", request_timeout=200.0)

In [9]:
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

if not os.path.exists("./chroma_db/CUM_zh_db"):
    # initialize client
    db = chromadb.PersistentClient(path="./chroma_db/CUM_zh_db")
    # get collection
    chroma_collection = db.get_or_create_collection("CUM_zh_db")
    # assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    # Load data
    documents = SimpleDirectoryReader(input_files=["../data/QinUM.pdf"]).load_data()
    #print(documents[200])
    # Build vector index per-document
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context,
        transformations=[SentenceSplitter(chunk_size=2048, chunk_overlap=100)],
    )
else:
    # initialize client
    db = chromadb.PersistentClient(path="./chroma_db/CUM_zh_db")
    # get collection
    chroma_collection = db.get_or_create_collection("CUM_zh_db")
    # assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    # load your index from stored vectors
    index = VectorStoreIndex.from_vector_store(
        vector_store, storage_context=storage_context
    )

In [10]:
# create a query engine
query_engine = index.as_query_engine(streaming=True, response_mode="compact", similarity_top_k=3)

In [11]:
# Updating Prompt for Car User Manual Q&A
from llama_index.core import PromptTemplate

template = (
    "你是比亚迪秦汽车的产品专家，请根据用户问题基于产品使用手册进行给出回答和提示。\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "请基于该型号汽车的产品用户手册内容进行回答。\n"
    "回答问题时需要给出相关信息在产品用户手册中的页码。\n"
    "如果问题超出用户手册之外，请明确告知用户该问题超出手册范围。\n"
    "回答内容需准确且精炼。\n"
    "Query: {query_str}\n"
    "Answer: "
)
qa_template = PromptTemplate(template)
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_template}
)

template = (
    "The original query is as follows: {query_str}.\n"
    "We have provided an existing answer: {existing_answer}.\n"
    "We have the opportunity to refine the existing answer (only if needed) with some more context below.\n"
    "-------------\n"
    "{context_msg}\n"
    "-------------\n"
    "Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.\n"
    "if the question is 'who are you' , just say I am Car User Manual Copilot.\n"
    "Answers need to be precise and concise.\n"
    "Refined Answer: "
)


qa_template = PromptTemplate(template)

query_engine.update_prompts(
    {"response_synthesizer:refine_template": qa_template}
)

#prompts_dict = query_engine.get_prompts()
#print(list(prompts_dict.keys()))

In [12]:
# Query Test 0
response = query_engine.query("如何切换DM系统工作模式并请给出手册的相关页码？")
response.print_response_stream()

DM系统工作模式切换通常在车辆的中控台上或仪表盘上找到相关控制按钮。具体操作步骤可能因车型年份和配置的不同而有所差异，但一般流程如下：

1. 查找“驾驶模式”或“动力源选择”按钮。
2. 根据指示（通常是通过显示屏或物理按键），切换至所需的DM工作模式。

关于具体的操作说明和示意图，请查阅车辆的用户手册第X页。请注意，不同车型的具体步骤可能会有细微差别，务必参照您车辆的手册进行操作。

超出手册范围的问题：

如果您需要了解如何在EV模式下充电，并且询问与手册内容不完全相关或过于具体的技术问题（例如特定设备兼容性、医学设备安全等），建议联系比亚迪汽车的客户服务热线或者授权服务中心获取专业解答。

In [13]:
import ChatTTS
import torchaudio
from IPython.display import Audio

In [14]:
chat = ChatTTS.Chat()
chat.load(compile=False) # Set to True for better performance

use default LlamaModel for importing TELlamaModel error: No module named 'transformer_engine'


True

In [15]:
wavs = chat.infer(str(response))

found invalid characters: {'）', '：', '（', '\n', '“', '2', '1', '”'}
text:   0%|                                                                                                                                               | 0/384(max) [00:00, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
  attn_output = torch.nn.functional.scaled_dot_product_attention(
text:  79%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▋                             | 302/384(max) [00:03, 85.42it/s]
code:  69%|████████████████████████████████████████████████████████████████████████████████████████████                                          | 1407/2048(max) [00:14, 100.41it/s]


In [16]:
Audio(wavs[0], rate=24_000, autoplay=True)

In [17]:
# Q
response = query_engine.query("给出介绍仪表盘功能的用户手册页码")
response.print_response_stream()

对于介绍仪表盘功能的部分，相关页面位于产品用户手册的第62页。

In [18]:
wavs = chat.infer(str(response))
Audio(wavs[0], rate=24_000, autoplay=True)

found invalid characters: {'2', '6'}
text:   9%|████████████▉                                                                                                                             | 36/384(max) [00:00, 99.26it/s]
code:  13%|█████████████████▊                                                                                                                     | 270/2048(max) [00:02, 107.65it/s]


In [19]:
# Q
response = query_engine.query("通风功能与加热功能是否可以同时开启?")
response.print_response_stream()

这个问题超出了产品用户手册中关于比亚迪秦汽车的描述。通常情况下，车辆的通风和加热系统可以独立操作，但具体到比亚迪秦汽车，是否能同时开启通风和加热功能可能会受到特定设计限制或能效考虑的影响。建议查阅车辆的具体规格说明或联系厂家获取精确信息。

参考页码：无

请查阅车辆说明书的特定章节或与比亚迪客户服务联系以获得确切答案。

In [20]:
wavs = chat.infer(str(response))
Audio(wavs[0], rate=24_000, autoplay=True)

found invalid characters: {'：', '\n'}
text:  44%|███████████████████████████████████████████████████████████▌                                                                            | 168/384(max) [00:01, 100.90it/s]
code:  69%|███████████████████████████████████████████████████████████████████████████████████████████▉                                          | 1406/2048(max) [00:13, 101.57it/s]


## To-Do: Optimization the RAG

Refer to Basic Strategies: https://docs.llamaindex.ai/en/v0.10.19/optimizing/basic_strategies/basic_strategies.html