## Steps Involved in the setup

1. Install librosa with conda install > There might be an error where the soundfile could not be found
2. pip uninstall soundfile
3. pip install soundfile

In [1]:
from huggingface_hub import hf_hub_download
from huggingface_hub import list_repo_files

In [2]:
# Replace 'model_id' with the ID of the model you want to download
model_id = "distil-whisper/distil-large-v3"

# List all files in the repository
files = list_repo_files(repo_id=model_id)

# Download each file
local_dir = "C:/Users/wengz/Desktop/model_weights/distil-whisper/distil-large-v3"
for file in files:
    file_path = hf_hub_download(repo_id=model_id, filename=file,local_dir=local_dir)
    print(f"Downloaded {file} to {file_path}")

In [7]:
import os

# Replace with the path to your ffmpeg binary
os.environ["PATH"] += os.pathsep + "C:/ffmpeg/bin"

In [3]:
import torchaudio
print(torchaudio.list_audio_backends())

['soundfile']


In [4]:
import librosa

# Load and preprocess audio file
audio_file = "C:/Users/wengz/Desktop/Council Meeting - July 26, 2022.mp3"
audio_input, _ = librosa.load(audio_file, sr=16000)

In [5]:
import audioread

# Check which backend `audioread` is using
print("Available audioread backends:")
for backend in audioread.available_backends():
    print(backend)

Available audioread backends:
<class 'audioread.rawread.RawAudioFile'>
<class 'audioread.ffdec.FFmpegAudioFile'>


# Load audio using torchaudio
audio_file = "C:/Users/wengz/Desktop/2024-09-26 Planning Meeting.mp3"
waveform, sample_rate = torchaudio.load(audio_file)

# Resample if necessary (Whisper requires 16kHz)
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
waveform = resampler(waveform)

In [6]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from datasets import load_dataset

device = "cuda:0" if torch.cuda.is_available() else "cpu"
# torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
torch_dtype = torch.float16
#device = "cpu"

model_id = "C:/Users/wengz/Desktop/model_weights/distil-whisper/distil-large-v3"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch_dtype,
    return_timestamps=True,
    device=device,
)

result = pipe(audio_input)
print(result["text"])


You have passed task=transcribe, but also have set `forced_decoder_ids` to [[1, None], [2, 50360]] which creates a conflict. `forced_decoder_ids` will be ignored in favor of task=transcribe.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


 Being that it is 1 p.m. I will call the council meeting to order. Before we move forward with the approval of the, well, maybe I'll do the approval of the agenda first. Can I have a mover and seconder for the approval of the agenda please? Councillor Rolke, Councillor Fuel. Be resolved that the agenda for July 26th, council meeting be approved. Thank you. Is there any additions from council? I have SHH appointment. And I also have, with ward appropriations, we have two resolutions for that, just so council knows. Is there anything else? All those in favor? Carried were zero. So before we move on to the minutes, I do have a statement to read on behalf of Councilor Williams. So before we begin the meeting, I would like to take a moment to speak about Councilor Williams, due to both his personal health and extenuating circumstances over the last eight months, effective immediately, Council Williams has resigned from his position. On behalf of Council Administration and the public, I want

In [7]:
list_result = result["text"].split(". ")

In [8]:
list_result

[' Being that it is 1 p.m',
 'I will call the council meeting to order',
 "Before we move forward with the approval of the, well, maybe I'll do the approval of the agenda first",
 'Can I have a mover and seconder for the approval of the agenda please? Councillor Rolke, Councillor Fuel',
 'Be resolved that the agenda for July 26th, council meeting be approved',
 'Thank you',
 'Is there any additions from council? I have SHH appointment',
 'And I also have, with ward appropriations, we have two resolutions for that, just so council knows',
 'Is there anything else? All those in favor? Carried were zero',
 'So before we move on to the minutes, I do have a statement to read on behalf of Councilor Williams',
 'So before we begin the meeting, I would like to take a moment to speak about Councilor Williams, due to both his personal health and extenuating circumstances over the last eight months, effective immediately, Council Williams has resigned from his position',
 'On behalf of Council Ad