In [4]:
%%capture
!pip -q install openai
!pip -q install langchain-openai
!pip -q install langchain-core
!pip -q install langchain-community
!pip -q install sentence-transformers
!pip -q install langchain-huggingface
!pip -q install langchain_experimental
!pip install gradio
!pip install transformers
!pip install easyocr

In [5]:
%%capture
import os
import pickle
import numpy as np
import pandas as pd
from getpass import getpass
import easyocr

import openai
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
import gradio as gr
from transformers import pipeline
import google.colab.userdata as googlecolab

In [6]:
api_key = googlecolab.get('OA_API')
os.environ['OPENAI_API_KEY'] = api_key
openai.api_key = os.getenv('OPENAI_API_KEY')

In [7]:
llm_gpt4o = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
llm_gpt35 = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
llm_llama = ChatOpenAI(model_name="Llama-3.1-Nemotron-70B-Instruct-HF", temperature=0)

In [None]:

def get_conversation_history():
    history = []
    for message in memory.chat_memory.messages:
      history.append(f"{message.type}: {message.content}\n")
    return "".join(set(history))

def InvokeLLM(llm_choice, prompt):
    prompt_template = PromptTemplate.from_template(
        "The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.\n\nCurrent conversation:\n{history}\n\nHuman: {input}")

    if llm_choice == "gpt-4o-mini":
        llm = llm_gpt4o
    elif llm_choice == "gpt-3.5-turbo":
        llm = llm_gpt35
    elif llm_choice == "Llama-3.1-Nemotron-70B-Instruct-HF":
        llm = llm_llama
    else:
        return "Invalid LLM choice"

    conversation_chain = ConversationChain(
        llm=llm,
        prompt=prompt_template,
        memory=memory,
        verbose=False
    )
    response = conversation_chain.predict(input=prompt)
    memory.save_context({"input": prompt}, {"output": response})
    return response

def combined_response(llm_choice, prompt, transcription, extraction):
    if transcription != "":
      transcript_text = transcription
    else:
      transcript_text = extraction

    combined_prompt = f"{prompt} Transcription: {transcript_text}"
    llm_response = InvokeLLM(llm_choice, combined_prompt)
    return llm_response

def clear_memory():
  memory.clear()
  return " "

def transcribe(audio, speech_model):
    transcriber = pipeline("automatic-speech-recognition", model=speech_model)
    try:
      sr, y = audio
      if y.ndim > 1:
          y = y.mean(axis=1)
      y = y.astype(np.float32)
      y /= np.max(np.abs(y))
    except Exception as e:
      err = "Error during transcription:  {0}".format(e)
      print(err)
      return "Transcription Error"
    return transcriber({"sampling_rate": sr, "raw": y})["text"]

def ocr_read_image(image_path):
    reader = easyocr.Reader(['en'])
    result = reader.readtext(image_path)
    text = ""
    for detection in result:
        text += detection[1] + " "
    return text

In [8]:
# ------ Gradio development --------
with gr.Blocks() as demo:
    with gr.Row():
        llm_speech = gr.Radio(["whisper-1", "openai/whisper-base.en"], label="Choose Speeh-Text LLM: ", value="openai/whisper-base.en")
        audio_input = gr.Audio(sources="microphone")
        btn_transcribe = gr.Button("Generate Transcription")
        transcription = gr.Textbox(label="Transcription")
    with gr.Row():
        image_input = gr.Image(type="filepath")
        btn_preview = gr.Button("Preview") # Preview button
        preview_output = gr.Image(label="Image Preview") # Output for the preview
        btn_extract_image = gr.Button("Extract Text from Image")
        extraction = gr.Textbox(label="Extracted Text")
    with gr.Row():
        llm_choice = gr.Radio(["gpt-4o-mini", "gpt-3.5-turbo", "Llama-3.1-Nemotron-70B-Instruct-HF"], label="Choose Q&A LLM: ", value="gpt-3.5-turbo")
        prompt = gr.Textbox(label="Enter your prompt: ")
    with gr.Row():
        response = gr.Textbox(label="Response generated")
    memory = ConversationBufferMemory(return_messages=True)
    btn_submit = gr.Button("Generate Response")
    btn_history = gr.Button("Show Conversation")
    btn_clear = gr.Button("Clear Memory")

    btn_transcribe.click(transcribe, inputs=[audio_input, llm_speech], outputs=transcription)
    btn_preview.click(lambda x: x, inputs=image_input, outputs=preview_output) # Preview functionality
    btn_extract_image.click(ocr_read_image, inputs=[image_input], outputs=extraction)
    btn_submit.click(combined_response, inputs=[llm_choice, prompt, transcription, extraction], outputs=response)
    btn_clear.click(clear_memory, outputs=response)
    btn_history.click(get_conversation_history, outputs=response)

demo.launch()

  memory = ConversationBufferMemory(return_messages=True)


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://eb1567f6ea835c9d45.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# ---------------- Testing purpose -----------------
InvokeLLM("gpt-3.5-turbo", "what is the capital of Singapore?")
InvokeLLM("gpt-3.5-turbo", "where is India?")
get_conversation_history()

'ai: AI: The capital of Singapore is Singapore City. It is a bustling metropolis known for its modern architecture, diverse culture, and delicious food. The city-state is located at the southern tip of the Malay Peninsula in Southeast Asia. It has a population of around 5.7 million people and is a major financial hub in the region. Is there anything else you would like to know about Singapore?\nhuman: where is India?\nhuman: what is the capital of Singapore?\nai: AI: India is a country located in South Asia. It is known for its rich history, diverse culture, and vibrant traditions. India shares its borders with several countries including Pakistan, China, Nepal, Bhutan, Bangladesh, and Myanmar. The country is also bordered by the Indian Ocean to the south. With a population of over 1.3 billion people, India is the second most populous country in the world. Is there anything else you would like to know about India?\n'