In [1]:
import pandas as pd
import numpy as np
import gradio as gr
import matplotlib.pyplot as plt
import getpass
import base64
import faiss
import os
from openai import OpenAI
from operator import itemgetter
from langchain.vectorstores import FAISS
from IPython.display import Audio, display
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain.text_splitter import CharacterTextSplitter
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
from langchain.schema.runnable import RunnableLambda
from langchain.schema.runnable.passthrough import RunnableAssign
from langchain_core.runnables import RunnableBranch
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory

In [2]:
import getpass
import os

def get_nv_api_key():
    if os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
        print("Valid NVIDIA_API_KEY already in environment. Delete to reset")
    else:
        nvapi_key = getpass.getpass("NVAPI Key (starts with nvapi-): ")
        assert nvapi_key.startswith("nvapi-"), f"{nvapi_key[:5]}... is not a valid key"
        os.environ["NVIDIA_API_KEY"] = nvapi_key
    return nvapi_key

if __name__ == "__main__":
    
    nvapi_key = get_nv_api_key()

NVAPI Key (starts with nvapi-):  ········


In [3]:
if not os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
    os.environ["NVIDIA_API_KEY"] = nvapi_key
### initialize ai-embed-qa-4 model
embedder = NVIDIAEmbeddings(model="NV-Embed-QA")
# Here we read in the text data and prepare them into vectorstore
ps = os.listdir("./zh_data/")
data = []
sources = []
for p in ps:
    content = ""
    if p.endswith('.txt'):
        path2file="./zh_data/"+p
        with open(path2file,encoding="utf-8") as f:
            lines=f.readlines()
            for line in lines:
                content += line
            if len(content)>=1:
                data.append(content)
                sources.append(path2file)

documents=[d for d in data if d != '\n']

# Here we create a vector store from the documents and save it to disk.
text_splitter = CharacterTextSplitter(chunk_size=500, separator=" ")
docs = []
metadatas = []

for i, d in enumerate(documents):
    splits = text_splitter.split_text(d)     
    #print(len(splits))
    docs.extend(splits)     
    metadatas.extend([{"source": sources[i]}] * len(splits))

store = FAISS.from_texts(docs, embedder , metadatas=metadatas)
store.save_local('./zh_data/nv_embedding')

In [12]:
retriever = store.as_retriever()
nvapi_key = nvapi_key
llm = ChatNVIDIA(model="meta/llama-3.1-405b-instruct", nvidia_api_key=nvapi_key, max_tokens=1024)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer solely based on the following context:\n<Documents>\n{context}\n</Documents>",
        ),
        ("user", "{question}"),
    ]
)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

text = chain.invoke("授信额度最高是人民币多少元？请给我完整的条款依据")

In [14]:
import os
import re
import uuid
import shutil
from pydub import AudioSegment
from openai import OpenAI
from IPython.display import clear_output
from IPython.display import Audio

if not os.path.exists("./audio"):
    os.mkdir("./audio")

Language = "Chinese" 
voice_name = "zh-CN-XiaoxiaoNeural"
speed = 1

if not os.path.exists("./audio"):
    os.mkdir("./audio")


#@title Edge TTS
def calculate_rate_string(input_value):
    rate = (input_value - 1) * 100
    sign = '+' if input_value >= 1 else '-'
    return f"{sign}{abs(int(rate))}"

def make_chunks(input_text, language):
    language="Chinese"
    if language == "Chinese":
      temp_list = input_text.strip().split(".")
      filtered_list = [element.strip() + '.' for element in temp_list[:-1] if element.strip() and element.strip() != "'" and element.strip() != '"']
      if temp_list[-1].strip():
          filtered_list.append(temp_list[-1].strip())
      return filtered_list

def tts_file_name(text):
    if text.endswith("."):
        text = text[:-1]
    text = text.lower()
    text = text.strip()
    text = text.replace(" ","_")
    truncated_text = text[:25] if len(text) > 25 else text if len(text) > 0 else "empty"
    random_string = uuid.uuid4().hex[:8].upper()
    file_name = f"./audio/{truncated_text}_{random_string}.mp3"
    return file_name

def merge_audio_files(audio_paths, output_path):
    # Initialize an empty AudioSegment
    merged_audio = AudioSegment.silent(duration=0)

    # Iterate through each audio file path
    for audio_path in audio_paths:
        # Load the audio file using Pydub
        audio = AudioSegment.from_file(audio_path)

        # Append the current audio file to the merged_audio
        merged_audio += audio

    # Export the merged audio to the specified output path
    merged_audio.export(output_path, format="mp3")

def edge_free_tts(chunks_list,speed,voice_name,save_path):
  # print(chunks_list)
  if len(chunks_list)>1:
    chunk_audio_list=[]
    if os.path.exists("./audio/edge_tts_voice"):
      shutil.rmtree("./audio/edge_tts_voice")
    os.mkdir("./audio/edge_tts_voice")
    k=1
    for i in chunks_list:
      print(i)
      edge_command=f'edge-tts  --rate={calculate_rate_string(speed)}% --voice {voice_name} --text "{i}" --write-media ./content/edge_tts_voice/{k}.mp3'
      print(edge_command)
      var1=os.system(edge_command)
      if var1==0:
        pass
      else:
        print(f"Failed: {i}")
      chunk_audio_list.append(f"./content/edge_tts_voice/{k}.mp3")
      k+=1
    # print(chunk_audio_list)
    merge_audio_files(chunk_audio_list, save_path)
  else:
    edge_command=f'edge-tts  --rate={calculate_rate_string(speed)}% --voice {voice_name} --text "{chunks_list[0]}" --write-media {save_path}'
    print(edge_command)
    var2=os.system(edge_command)
    if var2==0:
      pass
    else:
      print(f"Failed: {chunks_list[0]}")
  return save_path

def random_audio_name_generate():
  random_uuid = uuid.uuid4()
  audio_extension = ".mp3"
  random_audio_name = str(random_uuid)[:8] + audio_extension
  return random_audio_name

def talk(input_text):
  global translate_text_flag,Language,speed,voice_name
  if len(input_text)>=600:
    long_sentence = True
  else:
    long_sentence = False

  if long_sentence==True and translate_text_flag==True:
    chunks_list=make_chunks(input_text,Language)
  elif long_sentence==True and translate_text_flag==False:
    chunks_list=make_chunks(input_text,"Chinese")
  else:
    chunks_list=[input_text]
  save_path="./audio/"+random_audio_name_generate()
  edge_save_path=edge_free_tts(chunks_list,speed,voice_name,save_path)
  return edge_save_path

def convert_to_text(audio_path):
    import whisper
    select_model ="base" # ['tiny', 'base']
    whisper_model = whisper.load_model(select_model)
    result = whisper_model.transcribe(audio_path,word_timestamps=True,fp16=False,language='Chinese')
    with open('scan.txt', 'w') as file:
        file.write(str(result))
    return result["text"]


if __name__ == "__main__":

    Language = "Chinese" # @param ['English']
    voice_name ="zh-CN-XiaoxiaoNeural"# @param["en-US-AriaNeural",'zh-CN-XiaoxiaoNeural','zh-CN-XiaoyiNeural']
    speed = 1  # @param {type: "number"}
    edge_save_path=talk(text)
    Audio(edge_save_path, autoplay=True)

edge-tts  --rate=+0% --voice zh-CN-XiaoxiaoNeural --text "第十八条 消费金融公司向借款人发放消费贷款，应当审慎评估借款人的还款能力，对借款人贷款授信额度最高不得超过人民币20万元。" --write-media ./audio/e3311c73.mp3


In [15]:
def rag_model(question):
    # Load the vectorestore back.
    embedder = NVIDIAEmbeddings(model="NV-Embed-QA")
    store = FAISS.load_local("./zh_data/nv_embedding", embedder,allow_dangerous_deserialization=True)
    retriever = store.as_retriever()
    llm = ChatNVIDIA(model="meta/llama-3.1-405b-instruct", nvidia_api_key=nvapi_key, max_tokens=1024)
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "Answer solely based on the following context:\n<Documents>\n{context}\n</Documents>",
            ),
            ("user", "{question}"),
        ]
    )

    chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    return chain.invoke(question)

def run_text_prompt(message, chat_history):
    bot_message = rag_model(message)
    edge_save_path=talk(bot_message)
    display(Audio(edge_save_path, autoplay=True))

    chat_history.append((message, bot_message))
    return edge_save_path, chat_history


def run_audio_prompt(audio, chat_history):
    if audio is None:
        return None, chat_history
    message_transcription = convert_to_text(audio)
    edge_save_path, chat_history = run_text_prompt(message_transcription, chat_history)
    return edge_save_path, chat_history

#@title Run gradio app
if __name__ == "__main__":
    # massage = "授信额度最高是人民币多少元？请给我完整的条款依据"
    with gr.Blocks() as demo:
        chatbot = gr.Chatbot(label="Chat with me")
        msg = gr.Textbox(label="Ask anything")
        msg.submit(run_text_prompt, [msg, chatbot], [msg, chatbot])
        with gr.Row():
            audio = gr.Audio(sources="microphone", type="filepath")
            send_audio_button = gr.Button("Send Audio", interactive=True)
            send_audio_button.click(run_audio_prompt, [audio, chatbot], [audio, chatbot])

    demo.launch(share=True,debug=True)
    

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://ea1d32edf2a44d5b54.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


edge-tts  --rate=+0% --voice zh-CN-XiaoxiaoNeural --text "第十八条 消费金融公司向借款人发放消费贷款，应当审慎评估借款人的还款能力，对借款人贷款授信额度最高不得超过人民币20万元。" --write-media ./audio/1c5c1ab1.mp3


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://ea1d32edf2a44d5b54.gradio.live


In [19]:
global img_path
img_path = './outputs/customer_distribution.png'

user_input = "Add a column for the percentage of customers, draw this table as stacked pie chart in python, and save the image in path: "+img_path
print(user_input)

Add a column for the percentage of customers, draw this table as stacked pie chart in python, and save the image in path: ./outputs/customer_distribution.png


In [None]:
# 将 langchain 运行状态下的表保存到全局变量中
def save_table_to_global(x):
    global table
    if 'TABLE' in x.content:
        table = x.content.split('TABLE', 1)[1].split('END_TABLE')[0]
    return x

# helper function 用于Debug
def print_and_return(x):
    print(x)
    return x

# 对打模型生成的代码进行处理, 将注释或解释性文字去除掉, 留下pyhon代码
def extract_python_code(text):
    pattern = r'```python\s*(.*?)\s*```'
    matches = re.findall(pattern, text, re.DOTALL)
    return [match.strip() for match in matches]

# 执行由大模型生成的代码
def execute_and_return(x):
    code = extract_python_code(x.content)[0]
    try:
        result = exec(str(code))
        #print("exec result: "+result)
    except ExceptionType:
        print("The code is not executable, don't give up, try again!")
    return x

# 将图片编码成base64格式, 以方便输入给大模型
def image2b64(image_file):
    with open(image_file, "rb") as f:
        image_b64 = base64.b64encode(f.read()).decode()
        return image_b64

def chart_agent(image_b64, user_input, table):
    # Chart reading Runnable
    chart_reading = ChatNVIDIA(model="ai-phi-3-vision-128k-instruct")
    chart_reading_prompt = ChatPromptTemplate.from_template(
        'Generate underlying data table of the figure below, : <img src="data:image/png;base64,{image_b64}" />'
    )
    chart_chain = chart_reading_prompt | chart_reading

    # Instruct LLM Runnable
    # instruct_chat = ChatNVIDIA(model="nv-mistralai/mistral-nemo-12b-instruct")
    # instruct_chat = ChatNVIDIA(model="meta/llama-3.1-8b-instruct")
    #instruct_chat = ChatNVIDIA(model="ai-llama3-70b")
    instruct_chat = ChatNVIDIA(model="meta/llama-3.1-405b-instruct")

    instruct_prompt = ChatPromptTemplate.from_template(
        "Do NOT repeat my requirements already stated. Based on this table {table}, {input}" \
        "If has table string, start with 'TABLE', end with 'END_TABLE'." \
        "If has code, start with '```python' and end with '```'." \
        "Do NOT include table inside code, and vice versa."
    )
    instruct_chain = instruct_prompt | instruct_chat

    # 根据“表格”决定是否读取图表
    chart_reading_branch = RunnableBranch(
        (lambda x: x.get('table') is None, RunnableAssign({'table': chart_chain })),
        (lambda x: x.get('table') is not None, lambda x: x),
        lambda x: x
    )
    # 根据需求更新table
    update_table = RunnableBranch(
        (lambda x: 'TABLE' in x.content, save_table_to_global),
        lambda x: x
    )
    # 执行绘制图表的代码
    execute_code = RunnableBranch(
        (lambda x: '```python' in x.content, execute_and_return),
        lambda x: x
    )

    chain = (
        chart_reading_branch
        #| RunnableLambda(print_and_return)
        | instruct_chain
        #| RunnableLambda(print_and_return)
        | update_table
        | execute_code
    )

    return chain.invoke({"image_b64": image_b64, "input": user_input, "table": table}).content

def execute_and_return_gr(x):
    code = extract_python_code(x.content)[0]
    try:
        result = exec(str(code))
        #print("exec result: "+result)
    except ExceptionType:
        print("The code is not executable, don't give up, try again!")
    return img_path

def chart_agent_gr(image_b64, user_input, table):

    image_b64 = image2b64(image_b64)
    # Chart reading Runnable
    chart_reading = ChatNVIDIA(model="microsoft/phi-3-vision-128k-instruct")
    chart_reading_prompt = ChatPromptTemplate.from_template(
        'Generate underlying data table of the figure below, : <img src="data:image/png;base64,{image_b64}" />'
    )
    chart_chain = chart_reading_prompt | chart_reading

    # Instruct LLM Runnable
    # instruct_chat = ChatNVIDIA(model="nv-mistralai/mistral-nemo-12b-instruct")
    # instruct_chat = ChatNVIDIA(model="meta/llama-3.1-8b-instruct")
    #instruct_chat = ChatNVIDIA(model="ai-llama3-70b")
    instruct_chat = ChatNVIDIA(model="meta/llama-3.1-405b-instruct")

    instruct_prompt = ChatPromptTemplate.from_template(
        "Do NOT repeat my requirements already stated. Based on this table {table}, {input}" \
        "If has table string, start with 'TABLE', end with 'END_TABLE'." \
        "If has code, start with '```python' and end with '```'." \
        "Do NOT include table inside code, and vice versa."
    )
    instruct_chain = instruct_prompt | instruct_chat

    # 根据“表格”决定是否读取图表
    chart_reading_branch = RunnableBranch(
        (lambda x: x.get('table') is None, RunnableAssign({'table': chart_chain })),
        (lambda x: x.get('table') is not None, lambda x: x),
        lambda x: x
    )
    
    # 根据需求更新table
    update_table = RunnableBranch(
        (lambda x: 'TABLE' in x.content, save_table_to_global),
        lambda x: x
    )

    execute_code = RunnableBranch(
        (lambda x: '```python' in x.content, execute_and_return_gr),
        lambda x: x
    )
    
    # 执行绘制图表的代码
    chain = (
        chart_reading_branch
        | RunnableLambda(print_and_return)
        | instruct_chain
        | RunnableLambda(print_and_return)
        | update_table
        | execute_code
    )

    return chain.invoke({"image_b64": image_b64, "input": user_input, "table": table})

#@title Run gradio app
if __name__ == "__main__":
    # inputs = "Add a column for the percentage of customers, draw this table as stacked pie chart in python, and save the image in path: ./outpts/customer_distribution.png"
    multi_modal_chart_agent = gr.Interface(fn=chart_agent_gr,
                    inputs=[gr.Image(label="Upload image", type="filepath"), 'text'],
                    outputs=['image'],
                    title="Multi Modal chat agent",
                    description="Multi Modal chat agent",
                    allow_flagging="never")

    multi_modal_chart_agent.launch(debug=True, share=False, show_api=False, server_port=5002, server_name="0.0.0.0")



Running on local URL:  http://0.0.0.0:5002

To create a public link, set `share=True` in `launch()`.




{'image_b64': 'iVBORw0KGgoAAAANSUhEUgAAA9YAAAK9CAIAAABOz2PIAABGhElEQVR4nO3dPWgk1/4/6HMXB7ubbLDswl8eTXtN4z+byzsDbQwyi+BGYnAgJIxwMkqEk0E/uA3CgRF08BuU+CqRkkEYCQXGVnShAwu8bphhxMY7QzO4R+OGDXdh47tBv1W/VXVL6qOXep7EHnV1dfW3T1V/+tSpU3978+ZNAAAAYvlvbnoDAAAgXz5q/eezzz672e0AAIB77+3bt0EvOAAARCaCAwBAVCI4AABEJYIDAEBUIjgAAEQlggMAQFQiOAAARCWCAwBAVCI4AABEJYIDAEBUIjgAAEQlggMAQFQiOAAARCWCAwBAVCI4AABEJYIDAEBUIjgAAEQlggMAQFQiOAAARCWCAwBAVCI4AABEJYLnQa3ydHFxcXFx8WmldvtXe80ujjububi4+LRyfHG5tdQqnVUc3+I3G9W9qMn1NA/iuRuHnenci10phJB8J4u9j+eaPrLRVZppe7iPjY3bRATPgdrv1Xrr/+rV36/vQDKj1V6rWmVxfb+zmSGEevW3Py4Xst6/66ziz6tv1j1x52tyfc2DWO7CYWd6d35X6uq+k/DufXtfuraPbFSVZtoe7mdj4xb56KY34F67uKj98dOL36r1zpd8sVj89Ktvv/miND8fcSt6B8U7sNrrdHH8onrT28CtpXncQXfgsEO/mX5kd3flEPSCz1Ct8nR9fXu/l79DCPV6vbq/vf5D73R37fjptZyBTFnP/BffLhVb/1vc+LJ0G1Ybyfs/u7Vf2jk8Ozs7Ozv8fnXsr5/r+iy4G6ZsHjdCmxxwBw479Ev5yK7evGfaHma65RD0gs/KxfHT7cQJ7hTdGHDFM5Ap65kvlQ/OyrdqtVEk+jCWviy1olXq6Yfr+iy4C6ZuHjdCmxxw6w87DEr5yK7evGfaHma65SCCz0jtp/1u/i5u7Hy/2vqG7wxM+fThrfumB