In [None]:
%pip install --upgrade pip
%pip install ipywidgets
%pip install --upgrade datasets[audio] transformers accelerate evaluate jiwer tensorboard gradio
%pip install --upgrade ipywidgets
%pip install notebook
%pip install jupyterlab
%pip install widgetsnbextension

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
from transformers import AutoTokenizer

model_name = "humbleakh/whisper-small-arabic"
tokenizer = AutoTokenizer.from_pretrained(model_name)


In [None]:
from transformers import WhisperForConditionalGeneration, WhisperProcessor

# Load the processor from the base model
processor = WhisperProcessor.from_pretrained("openai/whisper-small")

# Load your fine-tuned model
model = WhisperForConditionalGeneration.from_pretrained("humbleakh/whisper-small-arabic")

# Configure the processor for Arabic
processor.tokenizer.set_prefix_tokens(language="arabic", task="transcribe")

In [None]:
from transformers import WhisperForConditionalGeneration, WhisperProcessor, pipeline
import gradio as gr
import torch

# Create pipeline with your fine-tuned model and proper configuration
pipe = pipeline(
    task="automatic-speech-recognition",
    model="humbleakh/whisper-small-arabic",
    device=0 if torch.cuda.is_available() else -1,
    chunk_length_s=30,  # Process in 30-second chunks
    return_timestamps=True  # Enable timestamp tokens
)

def transcribe(audio):
    try:
        result = pipe(
            audio,
            batch_size=1,
            generate_kwargs={
                "language": "arabic",
                "task": "transcribe",
                "return_timestamps": True
            }
        )
        # Extract just the text from the result
        return result["text"] if isinstance(result, dict) else result
    except Exception as e:
        return f"Error during transcription: {str(e)}"

iface = gr.Interface(
    fn=transcribe, 
    inputs=gr.Audio(
        type="filepath",
        label="Upload Audio File (supports long files)",
        sources=["upload"]
    ),
    outputs=gr.Textbox(
        label="Transcription",
        show_copy_button=True
    ),
    title="Whisper Small Arabic",
    description="Upload an Arabic audio file for transcription. Supports long audio files (WER: 44.17%)",
)

iface.launch()