In [1]:
import os
import torch
import time
import librosa
from transformers import pipeline

INPUT_FOLDER = "input"
OUTPUT_FOLDER = "audio_output"
SUPPORTED_EXTENSIONS = {".mp3", ".wav", ".m4a", ".flac", ".ogg", ".aac"}

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Dispositivo: {device}")

pipe = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-medium",
    return_timestamps=True,
    device=device,
    generate_kwargs={"task": "transcribe", "language": "it"}
)

audio_files = [
    f for f in os.listdir(INPUT_FOLDER)
    if os.path.splitext(f)[1].lower() in SUPPORTED_EXTENSIONS
]

if not audio_files:
    print(f"Nessun file audio trovato in '{INPUT_FOLDER}'.")
else:
    print(f"Trovati {len(audio_files)} file audio da trascrivere.\n")

for i, filename in enumerate(audio_files, 1):
    audio_path = os.path.join(INPUT_FOLDER, filename)
    output_filename = os.path.splitext(filename)[0] + ".txt"
    output_path = os.path.join(OUTPUT_FOLDER, output_filename)

    print(f"[{i}/{len(audio_files)}] Trascrizione di: {filename}")

    try:
        audio, sr = librosa.load(audio_path, sr=16000)

        start_time = time.time()
        result = pipe(audio)
        duration = time.time() - start_time

        with open(output_path, "w", encoding="utf-8") as f:
            f.write(result["text"])

        print(f"  ✓ Completato in {duration:.2f}s → {output_path}\n")

    except Exception as e:
        print(f"  ✗ Errore su {filename}: {e}\n")

print("=== Trascrizione batch completata ===")

  from .autonotebook import tqdm as notebook_tqdm


Dispositivo: cuda


Loading weights: 100%|██████████| 947/947 [00:00<00:00, 1127.54it/s, Materializing param=model.encoder.layers.23.self_attn_layer_norm.weight]   
Passing `generation_config` together with generation-related arguments=({'return_timestamps'}) is deprecated and will be removed in future versions. Please pass either a `generation_config` object OR all generation parameters explicitly, but not both.


Trovati 1 file audio da trascrivere.

[1/1] Trascrizione di: Corvatta_Intervista.mp3


A custom logits processor of type <class 'transformers.generation.logits_process.SuppressTokensLogitsProcessor'> has been passed to `.generate()`, but it was also created in `.generate()`, given its parameterization. The custom <class 'transformers.generation.logits_process.SuppressTokensLogitsProcessor'> will take precedence. Please check the docstring of <class 'transformers.generation.logits_process.SuppressTokensLogitsProcessor'> to see related `.generate()` flags.
A custom logits processor of type <class 'transformers.generation.logits_process.SuppressTokensAtBeginLogitsProcessor'> has been passed to `.generate()`, but it was also created in `.generate()`, given its parameterization. The custom <class 'transformers.generation.logits_process.SuppressTokensAtBeginLogitsProcessor'> will take precedence. Please check the docstring of <class 'transformers.generation.logits_process.SuppressTokensAtBeginLogitsProcessor'> to see related `.generate()` flags.


  ✓ Completato in 198.43s → audio_output/Corvatta_Intervista.txt

=== Trascrizione batch completata ===
