https://www.gradio.app/guides/real-time-speech-recognition


In [None]:

### You need to run this first, clear the session and then run everything else in the notebook. If it still gives you an error when you try to create the chat object, just restart the session, DONT RUN THIS CELL and run everything else again

!pip install -q accelerate

In [None]:
!pip install gradio



In [None]:
## LLM imports
import transformers
!pip install -q  bitsandbytes
import torch

In [None]:
# Access huggingface
!huggingface-cli login --token "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"

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
# Audio transcription import
from transformers import pipeline
import gradio as gr
import numpy as np

In [None]:
######Run

class Chat:

  def __init__(self) :
    # Model loading etc
    self.model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    self.pipeline = transformers.pipeline(
       "text-generation",
         model=self.model_id,
         model_kwargs={"torch_dtype": torch.bfloat16,"load_in_4bit": torch.cuda.is_available()},
         device_map="auto",
     )
    self.terminators = [
     self.pipeline.tokenizer.eos_token_id,
     self.pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
     ]

# Given a prompt it will add the specific instructions that will give the output in the correct format
#the current additions use few-shot prompt
  def add_template(self, prompt):

    # This is the main instruction of what is being done
    instruction= 'You are a reverse dictionary chatbot and the user is going to give you a description of the word they are trying to find. You should give 15 possible words they are tyring to describe. Your output should be in the form of a list in this template : [word1, word 2, ..., wordd 25]. Do not add any further comments or notes. Do not repeat any of the words'

    messages = [
    {"role": "system", "content": instruction },
    {"role": "user", "content":  prompt},
    ]
    return messages

  def clean_generated(self, gen):
    # First strip the generated answer from 'Answer:'
    y=gen.replace('[', '')
    z=y.replace(']', '')
    possible_words = z.split(',')
    # It is possible the current line is in the format : word1, word2, word3, ...-> split on ,
    return possible_words

# This make will take the refined prompt and pass it to the LLM
  def get_suggestions(self,prompt):

    messages = self.add_template(prompt)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    outputs = self.pipeline(
    messages,
    max_new_tokens=256,
    eos_token_id=self.terminators,
    do_sample=True,
    temperature=0.4,
    top_p=0.9,

    )

    print (self.clean_generated(outputs[0]["generated_text"][2]['content']))
    lst = (self.clean_generated(outputs[0]["generated_text"][2]['content']))
    return (lst)



In [None]:
chat=Chat()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")

In [None]:
import random
import pandas as pd
df = pd.read_csv('ToT_examples_Sheet1.csv')
descriptions= list( df['Descriptions'])

In [None]:

def transcribe(audio):
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    return transcriber({"sampling_rate": sr, "raw": y})["text"]

def predict(txt):
  possible_words=chat.get_suggestions(txt)
  return possible_words

def clear_audio():
    return None, ""

def change_tab(id):
    return gr.Tabs(selected=id)

def get_next_suggestions(state):
    current_index, suggestions = state
    next_index = current_index + 5
    display_suggestions = suggestions[current_index:next_index]
    state = (next_index, suggestions)

    predictedd_words = ""
    for word in display_suggestions:
      predictedd_words += word + "\n"

    return predictedd_words, state


theme = gr.themes.Soft(
  primary_hue="emerald",
  secondary_hue="lime",
  text_size="lg",
  spacing_size="lg",
  radius_size="lg",
).set(
  background_fill_primary='*neutral_100',
  shadow_drop='*shadow_drop_lg',
)

def update_predictions(txt):
                suggestions = predict(txt)
                state = (0, suggestions)
                return get_next_suggestions(state)

def handle_example_prompt():
    example_text = random.choice(descriptions)
    return example_text

def clear_text():
    return ""

def reset_example_state():
    return None

with gr.Blocks(theme=theme) as demo:

  with gr.Tabs() as tabs:
    example_state = gr.State(None)

    with gr.TabItem("Transcription", id=0):
      example_prompt_button = gr.Button("Example of prompt")
      audio_input = gr.Audio(type="numpy")
      clear_button = gr.Button("Clear")
      submit_button = gr.Button("Submit")
      text_output = gr.Textbox(label="Transcription", max_lines=6)
      generate_button = gr.Button("Generate Words")

      predict_words_output = gr.Textbox(label="Words Suggestion", visible=False)

      submit_button.click(transcribe, inputs=audio_input, outputs=text_output)

      clear_button.click(fn=clear_audio, inputs=None, outputs=[audio_input, text_output])
      def handle_example_prompt_and_store():
                example_text = random.choice(descriptions)
                return example_text, example_text

      example_prompt_button.click(handle_example_prompt_and_store, inputs=None, outputs=[text_output, example_state])
      generate_button.click(update_predictions, inputs=text_output, outputs=predict_words_output)
      generate_button.click(change_tab, gr.Number(1, visible=False), tabs)

    with gr.TabItem("Prediction", id=1):
      text_output2 = gr.Textbox(label="Transcription", max_lines=6)

      predict_words_output = gr.Textbox(label="Words Suggestion", max_lines=6)
      next_button = gr.Button("Next 5 words")
      restart_button = gr.Button("Go to previous page")

      suggestion_state = gr.State([0, []])

      example_prompt_button.click(lambda state: state, inputs=example_state, outputs=text_output2)
      submit_button.click(fn=transcribe, inputs=audio_input, outputs=text_output2)
      generate_button.click(fn=update_predictions, inputs=text_output, outputs=[predict_words_output, suggestion_state])
      clear_button.click(fn=clear_audio, inputs=None, outputs=[audio_input, predict_words_output])
      clear_button.click(fn=clear_text, inputs=None, outputs=text_output2)
      clear_button.click(fn=reset_example_state, inputs=None, outputs=example_state)
      next_button.click(fn=get_next_suggestions, inputs=suggestion_state, outputs=[predict_words_output, suggestion_state])
      restart_button.click(change_tab, gr.Number(0, visible=False), tabs)



demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://9432666fc690717271.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


