In [None]:
'''Using the preprocessed files'''

In [None]:
import os
import pandas as pd
import tempfile
from pydub import AudioSegment
from tqdm.auto import tqdm
import nemo.collections.asr as nemo_asr

In [4]:
all_datasets_df = pd.read_csv('data/final_120_sampled_medical_datasets.csv')
print('Loaded all datasets merged:', len(all_datasets_df))

Loaded all datasets merged: 120


In [None]:
def load_model():
  model_name = "nvidia/parakeet-tdt-0.6b-v2"
  nvidia_model = nemo_asr.models.ASRModel.from_pretrained(model_name)
  print("Loaded NVIDIA Parakeet")
  return nvidia_model

nvidia_model = load_model()


In [None]:
def run_parakeet_asr(audio_path):
    temp_wav = None
    try:            
        audio = AudioSegment.from_file(audio_path)

        #normalize to 16k mono if its not already in that format
        if audio.frame_rate != 16000:
            audio = audio.set_frame_rate(16000)
        if audio.channels != 1:
            audio = audio.set_channels(1)

        #export to temp wav
        temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
        audio.export(temp_wav.name, format="wav")
        temp_wav.close()

        #transcribe file path
        return nvidia_model.transcribe([temp_wav.name])[0].text

    except Exception as e:
        return f"ERROR: {e}"

    finally:
        if temp_wav is not None and os.path.exists(temp_wav.name):
            os.remove(temp_wav.name)


In [None]:
audio_paths = all_datasets_df["audio_file"].tolist()
all_datasets_df["Nvidia-Parakeet-ASR"] = [run_parakeet_asr(p) for p in tqdm(audio_paths, desc="Parakeet ASR")]

output_csv = "results/nvidia_parakeet_asr_results.csv"
all_datasets_df.to_csv(output_csv, index=False)
print("Saved:", output_csv)