In [None]:
"""
Pipeline for Uzbek Speech-to-Text and Named Entity Recognition
"""

In [1]:
!pip install datasets transformers gradio peft torch typing
!pip install git+https://github.com/huggingface/transformers
!pip install git+https://github.com/huggingface/peft.git

Collecting git+https://github.com/huggingface/transformers
  Cloning https://github.com/huggingface/transformers to /tmp/pip-req-build-qs5airzt
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers /tmp/pip-req-build-qs5airzt
  Resolved https://github.com/huggingface/transformers to commit 85eb3392318fc91a97692f23e1ce69b916567185
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting git+https://github.com/huggingface/peft.git
  Cloning https://github.com/huggingface/peft.git to /tmp/pip-req-build-f_23b7pm
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/peft.git /tmp/pip-req-build-f_23b7pm
  Resolved https://github.com/huggingface/peft.git to commit ae55fdcc5c4830e0f9fb6e56f16555bafca392de
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel 

In [12]:
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification, AutoProcessor, AutoModelForPreTraining
from datasets import load_dataset, Audio
import gradio as gr
from peft import PeftConfig, PeftModel
from typing import List, Dict

In [13]:
# STT Model and related configs
STT_MODEL_ID = "oyqiz/uzbek_stt"
STT_LANGUAGE = "Uzbek"
STT_TASK = "transcribe"

# NER model path and labels
NER_MODEL_ID = "ibodullo2205/uzbek-ner"
LABEL_LIST = ['CARDINAL', 'DATE', 'EMAIL', 'EVENT', 'FAC', 'FACILITY', 'GPE', 'JCH-2022', 'JOURNAL', 'LANGUAGE', 'LAW', 'LOC', 'MISC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PER', 'PERCENT', 'PERIOD', 'PERSON', 'PHONE', 'PRODUCT', 'QUANTITY', 'RASUM', 'SOCIAL_MEDIA', 'TIME', 'WEBSITE', 'WORK_OF_ART']
label2id = {label: idx for idx, label in enumerate(LABEL_LIST)}
id2label = {idx: label for label, idx in label2id.items()}

# Device configuration
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

In [14]:
def load_stt_model():
    """Loads the fine-tuned STT model and its components."""
    # Load model directly
    pipe = pipeline("automatic-speech-recognition", model="oyqiz/uzbek_stt")
    processor = AutoProcessor.from_pretrained("oyqiz/uzbek_stt")
    model = AutoModelForPreTraining.from_pretrained("oyqiz/uzbek_stt")

    return pipe

In [16]:
def load_ner_model():
    """Loads the fine-tuned NER model and tokenizer."""
    tokenizer = AutoTokenizer.from_pretrained(NER_MODEL_ID)
    model = AutoModelForTokenClassification.from_pretrained(
        NER_MODEL_ID,
        ignore_mismatched_sizes=True # Adding this line to ignore mismatched sizes.
    ).to(DEVICE)

    ner_pipeline = pipeline("token-classification", model=model, tokenizer=tokenizer, device=DEVICE, aggregation_strategy="simple")

    return ner_pipeline

In [7]:
def process_audio(audio_file):
    """Processes audio input, transcribes it, and extracts named entities."""
    try:
        # 1. STT
        transcription = stt_pipeline(audio_file)["text"]
        if not transcription:
             return "Could not transcribe audio", [] # Return if transcription fails

        # 2. NER
        ner_results = ner_pipeline(transcription)

        # Format NER output for better readability
        formatted_entities = []
        for entity in ner_results:
              formatted_entities.append((entity['entity_group'], entity['word']))

        return transcription, formatted_entities
    except Exception as e:
        print(f"Error in pipeline: {e}")
        return "Error processing audio", []

In [18]:
def build_gradio_interface():
      iface = gr.Interface(
            fn=process_audio,
            # Use sources=["microphone"] instead of source="microphone"
            inputs=gr.Audio(sources=["microphone"], type="filepath"),
            outputs=[gr.Textbox(label="Transcription"), gr.HighlightedText(label="Named Entities")],
            title="Uzbek Speech-to-Text + Named Entity Recognition Pipeline",
            description="This demo uses fine-tuned Whisper for Uzbek Speech-to-Text and a fine-tuned BERT model for Named Entity Recognition. Record or upload an audio file to see the results."
      )
      return iface

In [19]:
if __name__ == "__main__":
    # Load models
    print("Loading STT model...")
    stt_pipeline = load_stt_model()
    print("Loading NER model...")
    ner_pipeline = load_ner_model()
    print("Models loaded.")

    # Build and launch Gradio interface
    iface = build_gradio_interface()
    iface.launch()

Loading STT model...


Device set to use cuda:0
Some weights of the model checkpoint at oyqiz/uzbek_stt were not used when initializing Wav2Vec2ForPreTraining: ['lm_head.bias', 'lm_head.weight']
- This IS expected if you are initializing Wav2Vec2ForPreTraining from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForPreTraining from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForPreTraining were not initialized from the model checkpoint at oyqiz/uzbek_stt and are newly initialized: ['project_hid.bias', 'project_hid.weight', 'project_q.bias', 'project_q.weight', 'quantizer.codevectors', 'quantizer.weight_proj.bias', 'quantizer.weight_proj.weight']
You should probably TRAIN this model on a down-st

Loading NER model...


Device set to use cuda


Models loaded.
Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://7cb141f160c5bebf9d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
