In [1]:
import pandas as pd

In [2]:
CSV_PATH = "./hotword-detection/cv-valid-dev.csv"
df = pd.read_csv(CSV_PATH)

In [3]:
# read in model and processor
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
model = Wav2Vec2ForCTC.from_pretrained("./asr_train/wav2vec2-large-960h-cv")
processor = Wav2Vec2Processor.from_pretrained("./asr_train/wav2vec2-large-960h-cv")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import os
import torch
import torchaudio
AUDIO_DIR = "./data/common_voice/cv-valid-dev"

In [5]:
# Add a new column for the generated text
generated = []

print(f"Processing {len(df)} files...")

# Loop through each row
for idx, row in df.iterrows():
    filename = row["filename"]
    audio_path = os.path.join(AUDIO_DIR, filename)

    # load and convert to 16kHz mono
    waveform, sample_rate = torchaudio.load(audio_path)

    if sample_rate != 16000:
        waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
        sample_rate = 16000

    # get inputs
    inputs = processor(waveform.squeeze().numpy(), sampling_rate=sample_rate, return_tensors="pt", padding=True)

    # perform inference
    with torch.no_grad():
        logits = model(**inputs).logits
    predicted_ids = torch.argmax(logits, dim=-1)
    transcription = processor.decode(predicted_ids[0])

    generated.append(transcription)

# Add to DataFrame and save
df["generated_text"] = generated
df.to_csv(CSV_PATH, index=False)

print(f"\nDone. Transcriptions saved to: {CSV_PATH}")

Processing 4076 files...

Done. Transcriptions saved to: ./hotword-detection/cv-valid-dev.csv


In [6]:
hotwords = ["be careful", "destroy", "stranger"]

In [7]:
# if text contains any of the hotwords, set label to 1
df["label"] = df["generated_text"].apply(lambda x: 1 if any(hotword in x.lower() for hotword in hotwords) else 0)

In [8]:
df_detected = df[df["label"] == 1][["filename"]].reset_index(drop=True)

In [9]:
# save as txt file
df_detected.to_csv("./hotword-detection/detected.txt", sep = " ", header=False, index=False)