# Lab11 LlamaIndex RAG Voice Assistent Demo

In [1]:
import os
os.environ['HIP_VISIABLE_DEVICES'] = "0"

In [2]:
import torch

In [3]:
print(torch.__version__)

2.3.1+rocm6.0


In [4]:
# Query GPU
if torch.cuda.is_available():
    device = torch.device("cuda")          # a CUDA device object
    print('Using GPU:', torch.cuda.get_device_name(0))
    print('GPU properties:', torch.cuda.get_device_properties(0))
else:
    device = torch.device("cpu")
    print('Using CPU')

Using GPU: AMD Radeon PRO W7900
GPU properties: _CudaDeviceProperties(name='AMD Radeon PRO W7900', major=11, minor=0, gcnArchName='gfx1100', total_memory=46064MB, multi_processor_count=48)


In [5]:
# NOTE: This is ONLY necessary in jupyter notebook.
# Details: Jupyter runs an event-loop behind the scenes.
#          This results in nested event-loops when we start an event-loop to make async queries.
#          This is normally not allowed, we use nest_asyncio to allow it for convenience.

import nest_asyncio

nest_asyncio.apply()

In [6]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.node_parser import SentenceSplitter

from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama

# Set embedding model
# Please download it ahead running this lab by "ollama pull nomic-embed-text"
#Settings.embed_model = OllamaEmbedding(model_name="nomic-embed-text")
Settings.embed_model = OllamaEmbedding(model_name="mxbai-embed-large")

# Set ollama model
Settings.llm = Ollama(model="llama3", request_timeout=120.0)

In [7]:
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

if not os.path.exists("./chroma_db/CUM_en_db"):
    # initialize client
    db = chromadb.PersistentClient(path="./chroma_db/CUM_en_db")
    # get collection
    chroma_collection = db.get_or_create_collection("CUM_en_db")
    # assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    # Load data
    documents = SimpleDirectoryReader(input_files=["../data/FordUM.pdf"]).load_data()
    # Build vector index per-document
    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context,
        transformations=[SentenceSplitter(chunk_size=1024, chunk_overlap=50)],
    )
else:
    # initialize client
    db = chromadb.PersistentClient(path="./chroma_db/CUM_en_db")
    # get collection
    chroma_collection = db.get_or_create_collection("CUM_en_db")
    # assign chroma as the vector_store to the context
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    # load your index from stored vectors
    index = VectorStoreIndex.from_vector_store(
        vector_store, storage_context=storage_context
    )

In [8]:
# create a query engine
query_engine = index.as_query_engine(streaming=True, response_mode="compact", similarity_top_k=3)

In [9]:
# Updating Prompt for Car User Manual Q&A
from llama_index.core import PromptTemplate

template = (
    "You are proudct expert of car and very faimilay with car user manual and provide guide to the end user.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the information from multiple sources and not prior knowledge\n"
    "Answer the question according to the care user manual with page number from the Index Table.\n"
    "If the question is not releate with car user manual, just say it is not releated with my knowledge base.\n"
    "If you don't know the answer, just say that I don't know.\n"
    "Answers need to be precise and concise.\n"
    "List the page number of the relevant content in the user manual.\n"
    "If the question is in chinese, please transclate chinese to english in advance"
    "Query: {query_str}\n"
    "Answer: "
)

qa_template = PromptTemplate(template)

query_engine.update_prompts(
    {"response_synthesizer:refine_template": qa_template}
)

In [10]:
# Q1
response = query_engine.query("Which pages could get the detials about Exterior Lighting Control?")
response.print_response_stream()

Based on the provided context information, the details about Exterior Lighting Control can be found on page_ label: 121.

In [11]:
# Q2
response = query_engine.query("What's the steps to SWITCHING THE DAYTIME RUNNING LAMPS ON AND OFF and which pages show it?")
response.print_response_stream()

According to the provided context information, to switch the daytime running lamps on and off, you can follow these steps:

1. Access the vehicle drawer on the touchscreen.
2. Press "CONTROLS".
3. Press "Zone Lighting".

Please note that this information is found on pages 117 and 124 of the FordUM.pdf file.

In [12]:
import ChatTTS
import torchaudio
from IPython.display import Audio

In [13]:
chat = ChatTTS.Chat()
chat.load(compile=False) # Set to True for better performance

use default LlamaModel for importing TELlamaModel error: No module named 'transformer_engine'


True

In [14]:
print(str(response))

According to the provided context information, to switch the daytime running lamps on and off, you can follow these steps:

1. Access the vehicle drawer on the touchscreen.
2. Press "CONTROLS".
3. Press "Zone Lighting".

Please note that this information is found on pages 117 and 124 of the FordUM.pdf file.


In [15]:
wavs = chat.infer(str(response))

#torchaudio.save("output1.wav", torch.from_numpy(wavs[0]), 24000)

found invalid characters: {'7', '"', '3', '2', '1', '4', '\n', ':'}
text:   0%|                                                                                                                                               | 0/384(max) [00:00, ?it/s]We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
  attn_output = torch.nn.functional.scaled_dot_product_attention(
text:  28%|██████████████████████████████████████▌                                                                                                  | 108/384(max) [00:01, 75.79it/s]
code:  48%|████████████████████████████████████████████████████████████████▍                                                                      | 978/2048(max) [00:09, 104.63it/s]


In [16]:
Audio(wavs[0], rate=24_000, autoplay=True)

In [17]:
# Query Test in Chinese
response = query_engine.query("如何切换DM系统工作模式并请给出手册的相关页码？")
response.print_response_stream()

Based on the provided context, to switch the DM system working mode, you must enable the settings on your device to receive text message notifications on the center display. Check your device settings to enable these features.

As for the manual page number, please refer to page 533 in the FordUM.pdf file, which is related to the "SWITCHING TEXT MESSAGE" section.

In [18]:
wavs = chat.infer(str(response))
Audio(wavs[0], rate=24_000, autoplay=True)

found invalid characters: {'\n', '3', '"', '5'}
text:  27%|████████████████████████████████████▊                                                                                                   | 104/384(max) [00:01, 102.13it/s]
code:  46%|█████████████████████████████████████████████████████████████▍                                                                         | 932/2048(max) [00:08, 105.11it/s]


## Add Whisper

The audio play deepend on ffmpeg. Please refer to https://github.com/openai/whisper install ffmpeg ahead.

In [19]:
import whisper

model = whisper.load_model("base")

In [20]:
# Q1: texts = ["What's the steps to SWITCHING THE DAYTIME RUNNING LAMPS ON AND OFF and which pages show it?"]
result = model.transcribe("./Q1_en.wav")
print(result["text"])

 What's the steps to switching the daytime running lamps on and off? And which pages show?


In [21]:
response = query_engine.query(result["text"])
response.print_response_stream()

According to the provided context, the steps to switch the daytime running lamps on and off are:

1. Access the vehicle drawer on the touchscreen.
2. Press "CONTROLS".
3. Press "Zone Lighting".

As for the pages that show this information, they can be found on page numbers 657 and 117 of the FordUM.pdf file.

In [22]:
wavs = chat.infer(str(response))

#torchaudio.save("Answer1.wav", torch.from_numpy(wavs[0]), 24000)

found invalid characters: {'7', '"', '6', '3', '2', '1', '5', '\n', ':'}
text:  27%|████████████████████████████████████▍                                                                                                   | 103/384(max) [00:01, 102.31it/s]
code:  46%|██████████████████████████████████████████████████████████████▍                                                                        | 948/2048(max) [00:09, 105.02it/s]


In [23]:
Audio(wavs[0], rate=24_000, autoplay=True)

In [24]:
# Q2: texts = ["Which page is about ADJUSTING THE HEADLAMPS?"]
result = model.transcribe("./Q2_en.wav")
print(result["text"])

 which page is about adjusting the headlamp.


In [25]:
response = query_engine.query(result["text"])
response.print_response_stream()

Based on the provided context, the pages related to adjusting the headlamp are:

* Page 445 (file_path: ../data/FordUM.pdf)
* Vertic al Aim A djus tment
* Page 444 (file_label: 444)

These pages contain information on how to adjust the headlamp aim and provide a step-by-step guide on how to do it.

In [26]:
wavs = chat.infer(str(response))

#torchaudio.save("Answer2.wav", torch.from_numpy(wavs[0]), 24000)

found invalid characters: {'-', '_', '*', '5', ')', '4', '\n', '/', ':', '('}
text:  35%|████████████████████████████████████████████████▏                                                                                       | 136/384(max) [00:01, 102.15it/s]
code:  55%|█████████████████████████████████████████████████████████████████████████▊                                                            | 1128/2048(max) [00:10, 103.54it/s]


In [27]:
Audio(wavs[0], rate=24_000, autoplay=True)

In [28]:
# Q3: texts = ["Any info about PARKING AID PRECAUTIONS?"]
result = model.transcribe("./Q3_en.wav")
print(result["text"])

 any info about parking aid precautions.


In [29]:
response = query_engine.query(result["text"])
response.print_response_stream()

According to the guidelines, I'll provide an answer without directly referencing the given context.

If there's a potential alarm problem with your vehicle, it's crucial to take immediate control measures and consult an authorized dealer for assistance. Similarly, when using parking aids, it's essential to exercise caution and be mindful of your surroundings to avoid any potential risks or hazards. By doing so, you can ensure a safe and secure parking experience.

In [30]:
wavs = chat.infer(str(response))

#torchaudio.save("Answer3.wav", torch.from_numpy(wavs[0]), 24000)

found invalid characters: {'\n', "'"}
text:  45%|█████████████████████████████████████████████████████████████▎                                                                          | 173/384(max) [00:01, 101.51it/s]
code:  55%|██████████████████████████████████████████████████████████████████████████▏                                                           | 1133/2048(max) [00:10, 103.28it/s]


In [31]:
Audio(wavs[0], rate=24_000, autoplay=True)

In [32]:
# Q4: texts = ["如何打开汽车空调"]
result = model.transcribe("./Q1_zh.wav")
print(result["text"])

如果打開氣收空調


In [33]:
response = query_engine.query(result["text"])
response.print_response_stream()

Based on the provided context information, I found the relevant section regarding the air conditioning system.

If you open the climate control (氣收) and set it to "Air Conditioning" mode, please refer to page 440 in your owner's manual for more information on how to use this feature safely and effectively.

In [34]:
wavs = chat.infer(str(response))

#torchaudio.save("Answer3.wav", torch.from_numpy(wavs[0]), 24000)

found invalid characters: {'"', '0', ')', '4', '\n', "'", '('}
text:  26%|██████████████████████████████████▉                                                                                                      | 98/384(max) [00:00, 102.05it/s]
code:  41%|███████████████████████████████████████████████████████▏                                                                               | 837/2048(max) [00:07, 105.65it/s]


In [35]:
Audio(wavs[0], rate=24_000, autoplay=True)

## To-Do: Optimization the RAG

Refer to Basic Strategies: https://docs.llamaindex.ai/en/v0.10.19/optimizing/basic_strategies/basic_strategies.html