In [None]:
# ─────────────── Part 1: Parameter ───────────────
from pathlib import Path
from datetime import datetime

# Number of audiofiles you want to process
# None = all files
num_files = 2000  

# Adjust this directory according where you saved your audio files!
notebook_dir = Path.home() / "stanic_audio"

# Output-Directory/File for this session
timestamp = datetime.now().strftime("%d%m%y_%H%M")
output_dir = notebook_dir / f"results_{timestamp}"
output_dir.mkdir(parents=True, exist_ok=True)

print(f"Verarbeite bis zu {num_files if num_files is not None else 'alle'} Dateien aus {notebook_dir}")
print(f"Ergebnisse landen in {output_dir}")



In [None]:
# ─────────────── Part 2: Loop & Transcription ───────────────
import json
import torch
from speechbrain.inference.ASR import WhisperASR

# 1. Choosing device (CPU/GPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Verwende Device: {device}")

# 2. Load model
asr_model = WhisperASR.from_hparams(
    source="speechbrain/whisper_rescuespeech",
    savedir="pretrained_models/whisper_rescuespeech",
    overrides={
        "decoder.bos_index": None,
        "decoder.eos_index": None,
    },
    overrides_must_match=False,
    run_opts={"device": device},
)

# 3. List all .wav files recursively in the notebook directory
wav_files = sorted(notebook_dir.rglob("*.wav"))
if num_files is not None:
    wav_files = wav_files[:num_files]

print(f"Found WAVs: {len(wav_files)}")

# 4. Iterate through all files and transcribe
for idx, wav_path in enumerate(wav_files, 1):
    print(f"[{idx}/{len(wav_files)}] Transcribe: {wav_path.name}")
    
    # Transcription
    segments = asr_model.transcribe_file(str(wav_path))
    
    # JSON structure in the desired format
    json_output = [
        {
            "start": seg.start,
            "end": seg.end,
            "text": seg.words,
            "words": seg.words.split(),
        }
        for seg in segments
    ]
    
    # Filename and timestamp
    basename  = wav_path.stem
    model_tag = "speechbrain_whisper_rescuespeech"
    stamp     = datetime.now().strftime("%d%m%y_%H%M%S")
    out_name  = f"{basename}_{model_tag}_{stamp}.json"
    
    # Save in output_dir
    out_path = output_dir / out_name
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(json_output, f, ensure_ascii=False, indent=4)

    print(f" → Saved: {out_path.name}")

print("✅ Done!")    
