In [1]:
# -*- coding: utf-8

In [2]:
# %pip install nvidia-ml-py3
import pynvml

# 初始化 NVML 库
pynvml.nvmlInit()

# 获取 GPU 数量
num_gpus = pynvml.nvmlDeviceGetCount()
print("GPU 数量：", num_gpus)

# 遍历每个 GPU，获取其资源信息
for i in range(num_gpus):
    handle = pynvml.nvmlDeviceGetHandleByIndex(i)
    gpu_name = pynvml.nvmlDeviceGetName(handle)
    gpu_memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU {i}: {gpu_name.decode()}，内存使用情况：{gpu_memory_info.used / 1024 / 1024} MB / {gpu_memory_info.total / 1024 / 1024} MB")

# 关闭 NVML 库
pynvml.nvmlShutdown()

GPU 数量： 2
GPU 0: Tesla V100-PCIE-16GB，内存使用情况：710.625 MB / 16384.0 MB
GPU 1: Tesla V100-PCIE-16GB，内存使用情况：710.625 MB / 16384.0 MB


In [3]:
import torch
# LLamaIndex 使用 PyTorch 进行向量计算
# 清理GPU 资源
torch.cuda.empty_cache()

In [4]:
# 初始化 NVML 库
pynvml.nvmlInit()

# 获取 GPU 数量
num_gpus = pynvml.nvmlDeviceGetCount()
print("GPU 数量：", num_gpus)

# 遍历每个 GPU，获取其资源信息
for i in range(num_gpus):
    handle = pynvml.nvmlDeviceGetHandleByIndex(i)
    gpu_name = pynvml.nvmlDeviceGetName(handle)
    gpu_memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU {i}: {gpu_name.decode()}，内存使用情况：{gpu_memory_info.used / 1024 / 1024} MB / {gpu_memory_info.total / 1024 / 1024} MB")

# 关闭 NVML 库
pynvml.nvmlShutdown()

GPU 数量： 2
GPU 0: Tesla V100-PCIE-16GB，内存使用情况：710.625 MB / 16384.0 MB
GPU 1: Tesla V100-PCIE-16GB，内存使用情况：710.625 MB / 16384.0 MB


In [5]:


# %pip install llama-index-llms-ollama
# !pip install llama-index
# %pip install llama-index-embeddings-ollama
# %pip install docx2txt
# %pip install llama-index-embeddings-huggingface
# %pip install llama-index-embeddings-instructor

from llama_index.core import Settings

# load the ollama
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.node_parser import SentenceSplitter

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from BCEmbedding.tools.llama_index import BCERerank
from llama_index.core.node_parser import SentenceSplitter

from llama_index.core import VectorStoreIndex

# model set
llama2_7b = "llama2"
llama2_13b = "llama2:13b"
llama3_8b = "llama3"
llama3_70b = "llama3:70b"

# connect with the ollama server, and talk with llama2 

llm_llama = Ollama(model=llama3_8b, request_timeout=600, temperature=0.1, device='cuda') #base_url = 'http://localhost:11434',
# llm_llama2 = Ollama(model="llama2:13b", request_timeout=600, temperature=0.1) #base_url = 'http://localhost:11434',
# embedding_model = OllamaEmbedding(model_name="llama2",ollama_additional_kwargs={"mirostat": 0}) #base_url="http://localhost:11434"

embed_args = {'model_name': 'maidalun1020/bce-embedding-base_v1', 'max_length': 512, 'embed_batch_size': 256, 'device': 'cuda'}
embedding_model = HuggingFaceEmbedding(**embed_args)

reranker_args = {'model': 'maidalun1020/bce-reranker-base_v1', 'top_n': 5, 'device': 'cuda'}
reranker_model = BCERerank(**reranker_args)

Settings.llm = llm_llama
Settings.embed_model = embedding_model
Settings.node_parser = SentenceSplitter(chunk_size=500, chunk_overlap=20)
# Settings.num_output = 512
# Settings.context_window = 1024

  _torch_pytree._register_pytree_node(
04/19/2024 17:10:20 - [INFO] -sentence_transformers.SentenceTransformer->>>    Load pretrained SentenceTransformer: maidalun1020/bce-embedding-base_v1
  _torch_pytree._register_pytree_node(
04/19/2024 17:10:29 - [INFO] -sentence_transformers.SentenceTransformer->>>    2 prompts are loaded, with the keys: ['query', 'text']
04/19/2024 17:10:33 - [INFO] -BCEmbedding.models.RerankerModel->>>    Loading from `maidalun1020/bce-reranker-base_v1`.
04/19/2024 17:10:34 - [INFO] -BCEmbedding.models.RerankerModel->>>    Execute device: cuda;	 gpu num: 2;	 use fp16: False


In [6]:
# create a vector storage
# %pip install llama-index-vector-stores-chroma
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# initialize client, setting path to save data
chroma_client = chromadb.PersistentClient()

04/19/2024 17:10:35 - [INFO] -chromadb.telemetry.product.posthog->>>    Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


In [7]:
from IPython.display import Markdown, display

# define prompt viewing function
def display_prompt_dict(prompts_dict):
    for k, p in prompts_dict.items():
        text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
        display(Markdown(text_md))
        print(p.get_template())
        display(Markdown("<br><br>"))

In [8]:

# %pip install llama-index-readers-web

from llama_index.readers.web import SimpleWebPageReader


documents = SimpleWebPageReader(html_to_text=True).load_data(
    [
     "https://amd.github.io/ama-sdk/v1.1.2/index.html",
     "https://amd.github.io/ama-sdk/v1.1.2/getting_started_on_prem.html",
     "https://amd.github.io/ama-sdk/v1.1.2/virtualization.html",
     "https://amd.github.io/ama-sdk/v1.1.2/examples/ffmpeg/tutorials.html",
     "https://amd.github.io/ama-sdk/v1.1.2/examples/ffmpeg/quality_analysis.html",
     "https://amd.github.io/ama-sdk/v1.1.2/examples/ffmpeg/filters.html",
     "https://amd.github.io/ama-sdk/v1.1.2/examples/gstreamer/tutorials.html",
     "https://amd.github.io/ama-sdk/v1.1.2/examples/gstreamer/filters.html",
     "https://amd.github.io/ama-sdk/v1.1.2/examples/gstreamer/xcompositor.html",
     "https://amd.github.io/ama-sdk/v1.1.2/examples/gstreamer/xabrladder.html",
     "https://amd.github.io/ama-sdk/v1.1.2/examples/xma/xma_apps.html",
     "https://amd.github.io/ama-sdk/v1.1.2/specs_and_features.html",
     "https://amd.github.io/ama-sdk/v1.1.2/package_feed.html",
     "https://amd.github.io/ama-sdk/v1.1.2/using_ffmpeg.html",
     "https://amd.github.io/ama-sdk/v1.1.2/using_gstreamer.html",
     "https://amd.github.io/ama-sdk/v1.1.2/unified_logging.html",
     "https://amd.github.io/ama-sdk/v1.1.2/tuning_video_quality.html",
     "https://amd.github.io/ama-sdk/v1.1.2/tuning_pipeline_latency.html",
     "https://amd.github.io/ama-sdk/v1.1.2/managing_compute_resources.html",
     "https://amd.github.io/ama-sdk/v1.1.2/c_apis.html",
     "https://amd.github.io/ama-sdk/v1.1.2/card_management.html",
     "https://amd.github.io/ama-sdk/v1.1.2/encoder_comp_matrix.html",
     "https://ffmpeg.org/ffmpeg.html",
     "https://ffmpeg.org/ffmpeg-resampler.html",
     "https://ffmpeg.org/ffmpeg-devices.html",
     "https://ffmpeg.org/ffmpeg-all.html",
     "https://trac.ffmpeg.org/wiki/Encode/H.264",
     "https://trac.ffmpeg.org/wiki/Encode/H.265",
     "https://trac.ffmpeg.org/wiki/Encode/AV1",
     "https://trac.ffmpeg.org/wiki/Scaling",
     "https://trac.ffmpeg.org/wiki/Null",
     "https://trac.ffmpeg.org/wiki/FilteringGuide",
     ]
     
)

collection_name = "ma35_rag_base_beg"
collection = chroma_client.list_collections()
if collection_name in collection:
    chroma_client.delete_collection(collection_name)
    chroma_client.clear_system_cache()
chroma_collection = chroma_client.get_or_create_collection(name=collection_name)
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(docstore=documents, vector_store=vector_store)

# 这个nodes 有什么用处
from llama_index.core.node_parser import SimpleNodeParser 
# Initialize the parser 
parser = SimpleNodeParser.from_defaults(chunk_size=500, chunk_overlap=20) 
# Parse documents into nodes 
nodes = parser.get_nodes_from_documents(documents)
# print(nodes[0])
len(nodes)

# %pip install ipywidgets
# index = VectorStoreIndex.from_documents(documents,storage_context=storage_context,show_progress=True)
index = VectorStoreIndex(nodes,embed_model=embedding_model,storage_context=storage_context,show_progress=True)

# # documents

Generating embeddings:   0%|          | 0/1248 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [9]:
# from bs4 import BeautifulSoup

# # 读取 HTML 文件
# with open("local_html/FFMPEG command line arguments - VideoDC - Xilinx Enterprise Wiki.html", "r", encoding="utf-8") as file:
#     html_content = file.read()

# # 使用 Beautiful Soup 解析 HTML
# soup = BeautifulSoup(html_content, "html.parser")

# # 提取文档内容
# document_text = soup.get_text()

# # 打印文档内容
# print(document_text)


In [10]:
# load your index from stored vectors

collection_name = "ma35_rag_base"
collection = chroma_client.list_collections()
chroma_collection = chroma_client.get_or_create_collection(name=collection_name)
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_vector_store(
    vector_store, embed_model=embedding_model,storage_context=storage_context
)

In [11]:
# retriever = index.as_retriever()
# relevant_docs = retriever.retrieve("what is the max transcode rate for 1080p30 stream")
# relevant_docs

# """
# response_mode

#     REFINE = "refine"
#     COMPACT = "compact"
#     SIMPLE_SUMMARIZE = "simple_summarize"
#     TREE_SUMMARIZE = "tree_summarize"
#     GENERATION = "generation"
#     NO_TEXT = "no_text"

# """

# ResponseMode为tree_summarize时，LLM会对每一段文本进行最大长度的分割，并进行连续的读取和询问。这种模式的优点是可以保证对文本的完整理解和回答，但如果没有正确处理分割段落的情况，可能会导致错误的生成结果
# ResponseMode为generation时，生成的回答不依赖于文档的内容，只基于提供的问题进行生成。这种模式适用于纯粹的问题回
# ResponseMode为no_text时，生成的回答中不包含任何内容，仅作为占位符使用
# ResponseMode为simple_summarize时，LLM会截取每段文本的相关句子（通常是第一句），并进行提炼生成回答。这种模式适用于对结果要求不高的场景。
# ResponseMode为refine时，如果只有一个文本块（text_chunk），则会正常生成回答。但如果存在多个文本块，则会以类似轮询的方式迭代生成回答。这种模式可以对多个文本块进行迭代式的回答生成，逐步完善回答内容。
# ResponseMode为compact时，生成的回答会将多个文本块（text_chunk）压缩到设定的最大长度，并生成一次回答。然后，根据后续内容对以往的答案进行改进和完善（即进行多次迭代）


In [12]:
from llama_index.core import PromptTemplate
query_engine = index.as_query_engine(response_mode='simple_summarize')

# template = (
#     "You are video transcode expert and very faimilay with ffmpge.\n"
#     "Context information from multiple sources is below.\n"
#     "---------------------\n"
#     "{context_str}\n"
#     "---------------------\n"
#     "Given the information from multiple sources and not prior knowledge\n"
#     "please read the above context information carefully. and anwer the question.\n"
#     "if the question is not releate with video process, just say it is not releated with my knowledge base.\n"
#     "if you don't know the answer, just say that I don't know.\n"
#     "Answers need to be precise and concise.\n"
#     "Query: {query_str}\n"
#     "Answer: "
# )
# qa_template = PromptTemplate(template)


# query_engine.update_prompts(
#     {"response_synthesizer:text_qa_template": qa_template}
# )


# template = (
#     "The original query is as follows: {query_str}.\n"
#     "We have provided an existing answer: {existing_answer}.\n"
#     "We have the opportunity to refine the existing answer (only if needed) with some more context below.\n"
#     "-------------\n"
#     "{context_msg}\n"
#     "-------------\n"
#     "Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.\n"
#     "if the question is 'who are you' , just say I am a video expert.\n"
#     "Answers need to be precise and concise.\n"
#     "Refined Answer: "
# )


# qa_template = PromptTemplate(template)

# query_engine.update_prompts(
#     {"response_synthesizer:refine_template": qa_template}
# )

In [13]:

# Here no database input, so no answer
questions = [
   """explain following ffmpeg command\n
   
   "ffmpeg -hwaccel ama -f rawvideo -s 1920x1080 -framerate 24 -i cut1_1080p.nv12 -vf 'hwupload' -c:v av1_ama -b:v 5M -f mp4 -y 1.av1_1080p_1.mp4" """,

   # """
   # explain following ffmpeg command\n

   # ffmpeg -y -hwaccel ama \
   #    -c:v h264_ama  -out_fmt nv12 -i <INPUT>  \
   #    -filter_complex "scaler_ama=outputs=4:out_res=(1920x1080|full|nv12)(1280x720|full|nv12)(720x480|full|nv12)(360x240|full|nv12) [a][b][c][d]; \
   #                   [a]hwdownload,format=nv12[a1];[b]hwdownload,format=nv12[b1];[c]hwdownload,format=nv12[c1];[d]hwdownload,format=nv12[d1]" \
   #    -map '[a1]' -f rawvideo -pix_fmt nv12 -y /tmp/scale_1080p.yuv \
   #    -map '[b1]' -f rawvideo -pix_fmt nv12 -y /tmp/scale_720p.yuv  \
   #    -map '[c1]' -f rawvideo -pix_fmt nv12 -y /tmp/scale_480p.yuv \
   #    -map '[d1]' -f rawvideo -pix_fmt nv12 -y /tmp/scale_240p.yuv
   # """
]

counter = 0
for question in questions:
   counter = counter + 1
   query_response = query_engine.query(question)
   print(f"Question{counter}: {question}")
   print(f"Answer:{query_response.response}")
   print(" ")

   
   print(f"source_nodes length:{len(query_response.source_nodes)}")
   for i, result in enumerate(query_response.source_nodes, start=1):
      print(result)

      # print(f"Result {i}: Document ID {result['id']}, Title '{result['title']}', Similarity: {result.score}")
      print(f"Result {i}\n Similarity: {result.score}\n content '{result.get_content()}")

   # print()
   # # print(response.get_formatted_sources(length=10))
   # print()

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question1: explain following ffmpeg command

   
   "ffmpeg -hwaccel ama -f rawvideo -s 1920x1080 -framerate 24 -i cut1_1080p.nv12 -vf 'hwupload' -c:v av1_ama -b:v 5M -f mp4 -y 1.av1_1080p_1.mp4" 
Answer:Empty Response
 
source_nodes length:0


In [14]:
collection_name = "ma35_rag_base_beg"
collection = chroma_client.list_collections()
chroma_collection = chroma_client.get_or_create_collection(name=collection_name)
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_vector_store(
    vector_store, embed_model=embedding_model,storage_context=storage_context
)

In [15]:
from llama_index.core import PromptTemplate
query_engine_beg = index.as_query_engine(response_mode='refine',similarity_top_k=50, temperature=0.6,node_postprocessors=[reranker_model])

template = (
    "You are video transcode expert and very faimilay with ffmpge.\n"
    "Context information from multiple sources is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the information from multiple sources and not prior knowledge\n"
    "please read the above context information carefully. and anwer the question.\n"
    "if the question is not releate with video process, just say it is not releated with my knowledge base.\n"
    "if you don't know the answer, just say that I don't know.\n"
    "Answers need to be precise and concise.\n"
    "you must answer every question in chinese\n"
    "Query: {query_str}\n"
    "Answer: "
)
qa_template = PromptTemplate(template)


query_engine_beg.update_prompts(
    {"response_synthesizer:text_qa_template": qa_template}
)


template = (
    "The original query is as follows: {query_str}.\n"
    "We have provided an existing answer: {existing_answer}.\n"
    "We have the opportunity to refine the existing answer (only if needed) with some more context below.\n"
    "-------------\n"
    "{context_msg}\n"
    "-------------\n"
    "Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.\n"
    "if the question is 'who are you' , just say I am a video expert.\n"
    "Answers need to be precise and concise.\n"
    "Refined Answer: "
)


qa_template = PromptTemplate(template)

query_engine_beg.update_prompts(
    {"response_synthesizer:refine_template": qa_template}
)

In [16]:
questions = [
   """explain following ffmpeg command 'ffmpeg -hwaccel ama -hwaccel_device /dev/ama_transcoder0 -c:v h264_ama -i infile.mp4 -c:v hevc_ama -b:v 1000K -r 60 -f mp4 -y transcoded.mp4' """,
   """详细解释一下下面的命令行 'gst-launch-1.0 filesrc location=<INPUT> ! parsebin ! h264parse ! ama_h264dec ! capsfilter 'caps=video/x-raw(memory:AMAMemory),format=NV12' !  ama_download ! filesink location=/tmp/h264.nv12'""",
   """如何使用linux 命令行去检查MA35D设备的系统状态？给我一个例子 """

   # """
   # explain following ffmpeg command\n

   # ffmpeg -y -hwaccel ama \
   #    -c:v h264_ama  -out_fmt nv12 -i <INPUT>  \
   #    -filter_complex "scaler_ama=outputs=4:out_res=(1920x1080|full|nv12)(1280x720|full|nv12)(720x480|full|nv12)(360x240|full|nv12) [a][b][c][d]; \
   #                   [a]hwdownload,format=nv12[a1];[b]hwdownload,format=nv12[b1];[c]hwdownload,format=nv12[c1];[d]hwdownload,format=nv12[d1]" \
   #    -map '[a1]' -f rawvideo -pix_fmt nv12 -y /tmp/scale_1080p.yuv \
   #    -map '[b1]' -f rawvideo -pix_fmt nv12 -y /tmp/scale_720p.yuv  \
   #    -map '[c1]' -f rawvideo -pix_fmt nv12 -y /tmp/scale_480p.yuv \
   #    -map '[d1]' -f rawvideo -pix_fmt nv12 -y /tmp/scale_240p.yuv
   # """
]

counter = 0
for question in questions:
   counter = counter + 1
   query_response = query_engine_beg.query(question)
   print(f"Question{counter}: {question}")
   print(f"Answer:{query_response.response}")

   # print("")
   # print(f"source_nodes length:{len(query_response.source_nodes)}")
   # for i, result in enumerate(query_response.source_nodes, start=1):
   #    print(result)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (546 > 512). Running this sequence through the model will result in indexing errors
You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
04/19/2024 17:12:18 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:12:23 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:12:28 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:12:35 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:12:41 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Question1: explain following ffmpeg command 'ffmpeg -hwaccel ama -hwaccel_device /dev/ama_transcoder0 -c:v h264_ama -i infile.mp4 -c:v hevc_ama -b:v 1000K -r 60 -f mp4 -y transcoded.mp4' 
Answer:Based on the new context, I refine the original answer as follows:

The FFmpeg command `ffmpeg -hwaccel ama -hwaccel_device /dev/ama_transcoder0 -c:v h264_ama -i infile.mp4 -c:v hevc_ama -b:v 1M -r 60 -f mp4 -y transcoded.mp4` is used to transcode the input video `infile.mp4` into H.265 (HEVC) format, using AMA hardware acceleration, and save the result in `transcoded.mp4`. The command also sets the video bitrate to 1,000 Kbps (1 Mbps), frame rate to 60 fps, and output file format to MP4.

The main parameters explained are:

* `-hwaccel ama`: Enables AMA hardware acceleration.
* `-hwaccel_device /dev/ama_transcoder0`: Specifies the AMA hardware accelerator device as `/dev/ama_transcoder0`.
* `-c:v h264_ama`: Encodes the input video into H.264 (AVC) format using AMA hardware acceleration.
* `-i 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

04/19/2024 17:12:45 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:12:49 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:12:54 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:12:59 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:13:04 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Question2: 详细解释一下下面的命令行 'gst-launch-1.0 filesrc location=<INPUT> ! parsebin ! h264parse ! ama_h264dec ! capsfilter 'caps=video/x-raw(memory:AMAMemory),format=NV12' !  ama_download ! filesink location=/tmp/h264.nv12'
Answer:The refined answer:

The command line `gst-launch-1.0 filesrc location=<INPUT> ! parsebin ! h264parse ! ama_h264dec ! capsfilter 'caps=video/x-raw(memory:AMAMemory),format=NV12' !  ama_download ! filesink location=/tmp/h264.nv12` is used to decode H.264 streams and save the decoded images as NV12 format in `/tmp/h264.nv12`. The pipeline consists of:
1. `filesrc`: reads input files
2. `parsebin`: unpacks containers to elementary streams
3. `h264parse`: parses H.264 streams
4. `ama_h264dec`: decodes H.264 streams and outputs NV12 format images
5. `capsfilter`: specifies the output capabilities, requiring video/x-raw(memory:AMAMemory) format with NV12 format
6. `ama_download`: downloads decoded images
7. `filesink`: saves downloaded images to `/tmp/h264.nv12` files

Thi

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

04/19/2024 17:13:06 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:13:08 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:13:12 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:13:16 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:13:21 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Question3: 如何使用linux 命令行去检查MA35D设备的系统状态？给我一个例子 
Answer:To check the system status of an MA35D device using Linux command-line, you can use the `mautil` commands:

1. Run `mautil examine` to get the status of the system and devices.
2. Use `cat /sys/class/misc/ama_transcoder{x}/bus_id` to look up the PCIe bus ID of a device (where x is a number between 0 to total number of devices minus 1).
3. Check the firmware version number using `cat /sys/class/misc/ama_transcoder0/version_information`.

Additionally, you can use the following commands:

* Run `mautil -d [<DBDF> | all] examine` to get detailed information about the system and device status.
* Use `mautil -d [<DBDF> | all] validate` to validate the basic shell acceleration functionality.

To check the current loading of all devices in your system, you can use the following command:

```xrmadm /opt/amd/ama/ma35/scripts/list_cmd.json```

This will generate a report in JSON format containing the load information for all compute unit (CU

In [17]:
prompts_dict = query_engine.get_prompts()
display_prompt_dict(prompts_dict)

**Prompt Key**: response_synthesizer:text_qa_template<br>**Text:** <br>

Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 


<br><br>

In [18]:
from llama_index.core import PromptTemplate

template = (
    "You are a Video ffmpeg & gstreamer technolodge expert.\n"
    "please answer the question in chinese"
    "Context information from multiple sources is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the information from multiple sources and not prior knowledge, please read the sources carefully.\n"
    "if the question is not releate with the RDMA, just say it is not releated with my knowledge base.\n"
    "if you don't know the answer, just say that I don't know.\n"
    "if the question is 'who are you' , just say I am a FPGA and RDMA expert.\n"
    "Answers need to be precise and concise.\n"
    "Query: {query_str}\n"
    "Answer: "
)
qa_template = PromptTemplate(template)

In [19]:
# response_mode 有几种模式可选，refine， compact, tree_summarize 等，每一种都有对应的promopt template
query_engine_tree_summarize = index.as_query_engine(response_mode='simple_summarize', streaming=True,similary_threshold=0.1, similarity_top_k=30)
# query_engine.update_prompts(qa_template)  
query_engine_tree_summarize.update_prompts(
    {"response_synthesizer:summary_template": qa_template}
)

from IPython.display import Markdown, display

# define prompt viewing function
def display_prompt_dict(prompts_dict):
    for k, p in prompts_dict.items():
        text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
        display(Markdown(text_md))
        print(p.get_template())
        display(Markdown("<br><br>"))

prompts_dict = query_engine_tree_summarize.get_prompts()
display_prompt_dict(prompts_dict)

**Prompt Key**: response_synthesizer:text_qa_template<br>**Text:** <br>

Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {query_str}
Answer: 


<br><br>

In [20]:
questions = [
   "一个ma35D AV1 codec 能处理1080p的数据流最大到多少fps "
]

counter = 0
for question in questions:
   counter = counter + 1
   response = query_engine_tree_summarize.query(question)
   print(f"Question{counter}: {question}")
   response.print_response_stream()
   print()
   # print(response.get_formatted_sources(length=10))
   print()

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question1: 一个ma35D AV1 codec 能处理1080p的数据流最大到多少fps 


04/19/2024 17:13:24 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Based on the provided context information, I can help you with your question.

According to the text, the MA35D accelerator card has Video Codec Unit (VCU) cores that support AOM AV1: AOMedia Video 1 - Main, High up to Level 5.3.

The text also mentions that the VCU cores support resolutions from 128x128 to 3840x2160 portrait and landscape.

However, it does not explicitly mention the maximum frame rate (fps) for a specific resolution like 1080p.

To answer your question, I would need more information about the MA35D AV1 codec's capabilities or specifications. Unfortunately, this context does not provide that information.

If you have any additional context or specifications about the MA35D AV1 codec, I'd be happy to help you with your query!



In [21]:
from llama_index.core import PromptTemplate
query_engine_refine = index.as_query_engine(response_mode='refine', similarity_top_k=100)

template = (
    "You are video transcode expert and very faimilay with ffmpge.\n"
    "Context information from multiple sources is below.\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "Given the information from multiple sources and not prior knowledge\n"
    "please read the above context information carefully. and anwer the question.\n"
    "if the question is not releate with video process, just say it is not releated with my knowledge base.\n"
    "if you don't know the answer, just say that I don't know.\n"
    "Answers need to be precise and concise.\n"
    "Query: {query_str}\n"
    "Answer: "
)
qa_template = PromptTemplate(template)


query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_template}
)


template = (
    "The original query is as follows: {query_str}.\n"
    "We have provided an existing answer: {existing_answer}.\n"
    "We have the opportunity to refine the existing answer (only if needed) with some more context below.\n"
    "-------------\n"
    "{context_msg}\n"
    "-------------\n"
    "Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.\n"
    "if the question is 'who are you' , just say I am a video expert.\n"
    "Answers need to be precise and concise.\n"
    "answer the question in chinese\n"
    "Refined Answer: "
)


qa_template = PromptTemplate(template)

query_engine.update_prompts(
    {"response_synthesizer:refine_template": qa_template}
)

In [22]:
questions = [
   """Using ffmpeg Decoder a clip that is already encoded in H.264, and will decode the file into a RAW format and save it to disk.""",
   """Using ffmpeg encode a RAW 1080p60 clip in YUV420 format. Pass the clip to the MA35D encoder to produce an AV1 encoded MP4 output with a target bitrate of 5Mbps and saves it to disk. please do not use -re option""",
   """ Using ffmpeg do the Bit Conversion, To encode YUV 4:2:2 10 bit pixel format to YUV 4:2:0 8 bit format' """,
   """ Using ffmpeg decodes an existing H.264 file and then scales it into 1080p/720p/480p/240p four resolutions and save the RAW outputs to disk under""",
   """ Using ffmpeg one cmd line, decodes an existing H.264 file and then using scaler_ama scales it into 1080p/720p/480p/240p four resolutions and save the RAW outputs to disk under""",
   # """ffmpeg命令使用ma35d硬件转码, 用一条命令行使用split方式，将一个h264 4k60的文件同时转码成两个hevc和av1格式的文件,写出具体的命令行例子"""
]

counter = 0
for question in questions:
   counter = counter + 1
   query_engine = index.as_query_engine()
   query_response = query_engine.query(question)
   print(f"Question{counter}: {question}")
   print(f"Answer:{query_response.response}")


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

04/19/2024 17:13:30 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Question1: Using ffmpeg Decoder a clip that is already encoded in H.264, and will decode the file into a RAW format and save it to disk.
Answer:To use FFmpeg decoder to decode a clip that is already encoded in H.264 and save it to disk in a RAW format, you can use the following command:

ffmpeg -i input.mp4 -c:v rawvideo -f rawvideo output.raw

This will read the input file (input.mp4), decode the H.264 video stream using FFmpeg's built-in decoder, and write the decoded frames to a RAW file (output.raw) in a format that can be read by other applications.

Note: The `-c:v` option specifies the codec for the output video stream, and `rawvideo` is the codec for a raw, uncompressed video stream. The `-f` option specifies the format of the output file, which in this case is `rawvideo`.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

04/19/2024 17:13:32 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Question2: Using ffmpeg encode a RAW 1080p60 clip in YUV420 format. Pass the clip to the MA35D encoder to produce an AV1 encoded MP4 output with a target bitrate of 5Mbps and saves it to disk. please do not use -re option
Answer:Here is the answer:

ffmpeg -hwaccel ama -i <INPUT> -vf "format=yuv420p, hwupload" -c:v av1_ama -b:v 5M -f mp4 -y output.mp4


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

04/19/2024 17:13:33 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Question3:  Using ffmpeg do the Bit Conversion, To encode YUV 4:2:2 10 bit pixel format to YUV 4:2:0 8 bit format' 
Answer:To encode YUV 4:2:2 10-bit pixel format to YUV 4:2:0 8-bit, you can use the following command:

ffmpeg -hwaccel ama -i <INPUT> -vf "format=yuv420p, hwupload" -c:v h264_ama -b:v 1M <OUTPUT>

This command uses the `format` filter to convert the input YUV 4:2:2 10-bit pixel format to YUV 4:2:0 8-bit. The rest of the flags are used to specify the output video codec and bitrate.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

04/19/2024 17:13:36 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Question4:  Using ffmpeg decodes an existing H.264 file and then scales it into 1080p/720p/480p/240p four resolutions and save the RAW outputs to disk under
Answer:To scale the decoded video into different resolutions (10, 720, 480, and 240) using ffmpeg, you can use the scaler filter. Here's an example command:

ffmpeg -i input.h264 -filter_complex "scale=w=1080:h=720[720p];scale=w=640:h=480[480p];scale=w=384:h=240[240p]" -vsync 0 -c:v rawvideo -f rawvideo output_10.80p.raw output_720p.raw output_480p.raw output_240p.raw

This command will decode the input H.264 file, scale it to four different resolutions (1080x720, 640x480, and 384x240), and save each resolution as a separate RAW video file.

Note that you can adjust the scaling parameters (w and h) to achieve the desired resolutions. Also, make sure to specify the correct codec and format for your output files.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

04/19/2024 17:13:39 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Question5:  Using ffmpeg one cmd line, decodes an existing H.264 file and then using scaler_ama scales it into 1080p/720p/480p/240p four resolutions and save the RAW outputs to disk under
Answer:Here is a command that uses FFmpeg's `overlay` filter to decode an existing H.264 file, scale it into 1080p, 720p, 480p, and 240p four resolutions, and save the raw outputs to disk:

ffmpeg -i <INPUT> -filter_complex "scale=10:80,format=yuv420p[00];scale=7:20,format=yuv420p[11];scale=4:80,format=yuv420p[22];scale=2:40,format=yuv420p[33];[00][11]hstack[top];[22][33]hstack[bot]; [top][bot] vstack" -c:v raw_yuv -f raw <OUT DIR>/output_%04d.raw

This command will generate four files: output_1080.raw, output_720.raw, output_480.raw, and output_240.raw.


In [23]:
questions = [
   """explain following ffmpeg command ffmpeg -hwaccel ama -f rawvideo -s 1920x1080 -framerate 24 -i cut1_1080p.nv12 -vf "hwupload" -c:v av1_ama -b:v 5M -f mp4 -y 1.av1_1080p_1.mp4'""",
]

counter = 0
for question in questions:
   counter = counter + 1
   response = query_engine_refine.query(question)
   print(f"Question{counter}: {question}")
   print(f"Answer:{response.response}")
   print()
   # print(response.get_formatted_sources(length=10))
   print()


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

04/19/2024 17:13:42 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:13:45 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:13:48 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:13:52 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:13:55 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:13:58 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:14:01 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:14:04 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
04/19/2024 17:14:06 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 

Question1: explain following ffmpeg command ffmpeg -hwaccel ama -f rawvideo -s 1920x1080 -framerate 24 -i cut1_1080p.nv12 -vf "hwupload" -c:v av1_ama -b:v 5M -f mp4 -y 1.av1_1080p_1.mp4'
Answer:Here's a rewritten answer using the new context:

This FFmpeg command demonstrates how to process video streams with AMA hardware acceleration. The command starts by specifying the input format as `rawvideo`, resolution as `19x20x10x80`, and frame rate as `24`. The input file is named `cut1_10p.nv12`. The `-vf "hwupload"` option instructs FFmpeg to upload the input video data to the hardware accelerator for processing. The codec used for encoding is AV1 with hardware acceleration (`-c:av av1_ama`), and the target bitrate is set to 5 megabits per second (`-b:v 5M`).




In [24]:
questions = [
   
   "how to enable ultra low latency for M35D Encoding?, give me a detailed ffmpge cmd example"
]

counter = 0
for question in questions:
   counter = counter + 1
   response = query_engine_tree_summarize.query(question)
   print(f"Question{counter}: {question}")
   response.print_response_stream()
   print()
   # print(response.get_formatted_sources(length=10))
   print()

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Question1: how to enable ultra low latency for M35D Encoding?, give me a detailed ffmpge cmd example


04/19/2024 17:14:17 - [INFO] -httpx->>>    HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Based on the provided context information, it appears that you are looking for instructions on how to enable ultra-low latency for M35D encoding using FFmpeg.

According to the context information, the command to enable ultra-low latency is:

`ma35_transcoder_app -streams 1 -frames 2,000 -c:v av1_ama -b:v 10M -latency_mode 2 -o h264_av1_transcode.av1`

This command uses the `ma35_transcoder_app` application to transcode an input file using the H.264 to AV1 format. The `-streams 1` option specifies that only one stream should be processed, and the `-frames 2,000` option sets the maximum number of frames to process.

The `-c:v av1_ama` option specifies the video codec as AV1 with AMA (Advanced Media Acceleration) support. The `-b:v 10M` option sets the bitrate for the video stream to 10 megabits per second.

The `-latency_mode 2` option enables ultra-low latency mode, which is intended for real-time encoding applications that require low latency and high throughput.

Finally, the `-o h26