In [None]:
pip install langchain-experimental psutil gradio speechrecognition

# Import statements

In [39]:
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ChatMessageHistory
from gradio.data_classes import FileData
import time
import gradio as gr
import psutil
import speech_recognition as sr

# Connect to locally running LLM by using Ollama

In [40]:
# This assumes that ollama is running on localhost at port 11434 and has "mistral:instruct" downloaded
llm = Ollama(model="mistral:instruct", num_thread = (psutil.cpu_count() - 2), keep_alive = -1, num_ctx=4098)
# llm.invoke("Hi") // test

# Create chain for conversational generation

In [41]:
prompt = ChatPromptTemplate.from_messages([
    ("system","You are a helpful assistant named Atom. Answer all questions to the best of your ability."),
    MessagesPlaceholder(variable_name="messages")
    ])

chain = prompt | llm

# Create conversational memory for LLMs

In [42]:
ephemeral_chat_history = ChatMessageHistory()

# Extract contents of text files passed in by user in chat

In [43]:
def get_files_content(files):
    if len(files) > 0 :
        files_content=[]
        for FileData in files:
            with open(FileData["path"],'r') as f:
                try:
                    file_content = f.read()
                    if file_content == "":
                        print(f"Warning : The '{FileData['path']}' file is empty. Skipping it..")
                        continue
                    files_content.append(file_content)
                except:
                    print(f"Warning : Unable to read the contents of the file '{FileData['path']}'. Skipping it..")
                    continue
        if len(files_content) == 0:
            return False, ""
        content = "\n\n".join(files_content)
        return True, content
    return True, ""

# Convert speech to text 

In [44]:
def get_audio_content(audio):
    if audio == None:
        return True, ""
    try:                                    
        r = sr.Recognizer()
        # audio object                                                         
        audio = sr.AudioFile(audio)
        #read audio object and transcribe
        with audio as source:
            audio = r.record(source)                  
            result = r.recognize_google(audio)
    except:
        return False, ""
    return True, "\n\n" + result + "\n\n"

# Combine content from text files and speech to create context

In [45]:
def get_context(files, audio):
    file_status, file_context = get_files_content(files)
    audio_status, audio_context = get_audio_content(audio)
    context = ""
    context_status = True
    if not file_status:
        context = context + "Error : Failed to read contents of all uploaded files or it was empty. Please try again and upload readable docs.\n"
        context_status = False
    if not audio_status:
        context = context + "Error : Failed to convert audio to text or the audio was empty. Try again.\n"
        context_status = False
    
    if context_status:
        context = file_context + audio_context
    
    return context_status, context

# Generate prompt to feed LLM

In [46]:
def generate_prompt(context, question):
    if ' '.join(context.split()) == "" and question == "":
        return ""
    elif question == "":
        return f"""
        Can you provide a comprehensive summary of the given text? 
        The summary should cover all the key points and main ideas presented in the original text, 
        while also condensing the information into a concise and easy-to-understand format. 
        Please ensure that the summary includes relevant details and examples that support the main ideas,
        while avoiding any unnecessary information or repetition. 
        The length of the summary should be appropriate for the length and complexity of the original text, 
        providing a clear and accurate overview without omitting any important information:
        "{context}"
        CONCISE SUMMARY:
        """
    elif ' '.join(context.split()) == "":
        return question
    else:
        return f"""
        Analyze and examine the following document and answer the given question.
        Base your answer on the information provided in the document. 
        If you cannot answer the question based on the document provided,
        then answer it based on your knowledge but specify that you are using external information.
        
        Document:
        "{context}"

        Question:
        "{question}"

        ANSWER:
        """

# Driver method

In [47]:
def slow_reply(message, history, audio):
    context_status, context = get_context(message["files"], audio)
    if not context_status:
        for i in range(len(context)):
            time.sleep(0.1)
            yield "Atom\n" + context[: i+1]
    else:
        question = message["text"]
        
        Prompt = generate_prompt(context, question)

        if Prompt == "":
            error = "Error : Empty input sent!"
            for i in range(len(error)):
                time.sleep(0.1)
                yield "Atom\n" + error[: i+1]
        else:
            # print(Prompt) #test
            
            if len(history) == 0:
                ephemeral_chat_history.clear()

            ephemeral_chat_history.add_user_message(Prompt)

            reply = chain.invoke({"messages":ephemeral_chat_history.messages})

            ephemeral_chat_history.add_ai_message(reply)

            for i in range(len(reply)):
                time.sleep(0.1)
                yield "Atom\n" + reply[: i+1]

# Create chatbot UI using Gradio

In [48]:
input_audio = gr.Audio(
    sources=["microphone"],
    type="filepath",
    label="Audio",
    waveform_options = gr.WaveformOptions(
        waveform_color="#01C6FF",
        waveform_progress_color="#0066B4",
        skip_length=2,
        show_controls=False,
    ),
)

gr.ChatInterface(
    slow_reply,
    multimodal=True,
    additional_inputs = input_audio,
    undo_btn=None,
    title="MultiInputChatbot",
    description="Your all-in-one multilingual assistant for text, audio, and file-based conversations."
).launch()

Running on local URL:  http://127.0.0.1:7871

To create a public link, set `share=True` in `launch()`.


