In [7]:
import os
import openai
import sys
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import MarkdownHeaderTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import TextLoader
from langchain.chat_models import ChatOpenAI

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

In [8]:
llm_name = "gpt-3.5-turbo-0301"

In [9]:
from langchain.document_loaders import TextLoader

loader = TextLoader("../KnowledgeDocument(pan_card_services).txt")
corpus = loader.load()
txt = ' '.join([d.page_content for d in corpus])

In [10]:
# Since the knowledge base is formatted like a markdown, using a markdown header splitter to get splits on headers and header information in metadata.
from langchain.text_splitter import MarkdownHeaderTextSplitter

split_on = [
    ("#", "Header 1"),
    ("##", "Header 2"),
    ("###", "Header 3"),
]

markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=split_on)
md_header_splits = markdown_splitter.split_text(txt)

In [11]:
embeddings = OpenAIEmbeddings()
db = DocArrayInMemorySearch.from_documents(md_header_splits, embeddings)
llm = ChatOpenAI(model_name=llm_name, temperature=0)

In [12]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """You are a helpful assistant, use the following context to answer the question at the end. If you don't know the answer, just say "sorry, I can't answer this, the answer to this question does not appear in my knowledge base", don't try to make up an answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template)

# Run chain
from langchain.chains import RetrievalQA
question = "Are PAN cards necessary?"
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=db.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})


result = qa_chain({"query": question})
result["result"]

'PAN cards are not necessary for all individuals, but they are mandatory for NRIs with a source of income in India to file their taxes or if they want to invest in stocks or mutual funds in India. PAN cards are also required for financial transactions such as opening a bank account, investing in stocks, purchasing or selling property, and investing in India. Additionally, a PAN card is necessary to file income tax returns and invest in mutual funds in India.'

In [27]:
import whisper
import gradio as gr
from langchain.chains import LLMChain

model = whisper.load_model("small")

def transcribe(audio):

    #time.sleep(3)
    # load audio and pad/trim it to fit 30 seconds
    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)

    # make log-Mel spectrogram and move to the same device as the model
    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    # detect the spoken language
    _, probs = model.detect_language(mel)
    print(f"Detected language: {max(probs, key=probs.get)}")
    lang = max(probs, key=probs.get)
    # decode the audio
    options = whisper.DecodingOptions(fp16 = False)
    aud_to_text = whisper.decode(model, mel, options).text
    if lang == "en":
      translated_aud_to_text = aud_to_text
      t_result = qa_chain({"query": translated_aud_to_text})["result"]
      return aud_to_text, "No translation is needed as the language is English", t_result, t_result, lang
    else:
      # translate from detected lang to en
      prompt = PromptTemplate(
      input_variables=["source", "target", "text"],
      template="Translate from language {source} to {target}: {text}?",
      )
      chain = LLMChain(llm=llm, prompt=prompt)
      translated_aud_to_text = chain.run({
          'source': lang,
          'target': "en",
          'text': aud_to_text
          })

      t_result = qa_chain({"query": translated_aud_to_text})["result"]
      translated_answer = chain.run({
          'source': "en",
          'target': lang,
          'text': t_result
          })
      return aud_to_text, translated_aud_to_text, translated_answer, t_result, lang

aud_to_text = gr.Textbox(label="Transcribed Question")
translated_aud_to_text = gr.Textbox(label="Question translated to English")
translated_answer = gr.Textbox(label="Chatbot's answer in orginal langauge")
t_result = gr.Textbox(label="Chatbot's answer in English")
lang = gr.Textbox(label="Detected language")

gr.Interface(
    title = 'Multi-lingual Voice based RAG chatbot',
    fn=transcribe,
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath")
    ],
    outputs=[
        aud_to_text, translated_aud_to_text, translated_answer, t_result, lang
    ],
    live=True).launch(share = True)

  gr.inputs.Audio(source="microphone", type="filepath")
  gr.inputs.Audio(source="microphone", type="filepath")


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://7e82793b0e229a4eb0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


