In [1]:
import torch
import torchaudio
import soundfile as sf
import os
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor

def transcribe_audio(file_path):
    # Charger le mod√®le et le processeur
    model_name = "facebook/wav2vec2-large-960h"
    processor = Wav2Vec2Processor.from_pretrained(model_name)
    model = Wav2Vec2ForCTC.from_pretrained(model_name)
    model.eval()
    
    # V√©rifier le format du fichier
    try:
        torchaudio.set_audio_backend("sox_io")  # Forcer sox_io si disponible
        waveform, sample_rate = torchaudio.load(file_path)
    except RuntimeError:
        print(f"‚ö†Ô∏è Torchaudio ne peut pas charger {file_path}. Tentative avec soundfile...")
        try:
            waveform, sample_rate = sf.read(file_path, dtype="float32")
            waveform = torch.tensor(waveform).unsqueeze(0)  # Ajouter une dimension batch
        except Exception as e:
            print(f"‚ùå Impossible de charger l'audio : {e}")
            return None
    
    # V√©rifier si l'audio est en 16kHz, sinon le r√©√©chantillonner
    if sample_rate != 16000:
        transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
        waveform = transform(waveform)
    
    # Convertir en tenseur et pr√©traiter
    input_values = processor(waveform.squeeze().numpy(), return_tensors="pt", sampling_rate=16000).input_values
    
    # Pr√©diction
    with torch.no_grad():
        logits = model(input_values).logits
    
    # D√©coder la sortie
    predicted_ids = torch.argmax(logits, dim=-1)
    transcription = processor.batch_decode(predicted_ids)[0]
    
    return transcription

# Exemple d'utilisation
if __name__ == "__main__":
    directory = "./adversarial_dataset-B/Adversarial-Examples/Adversarial-Examples/down/go"  # Remplace par ton dossier
    
    for file_name in os.listdir(directory):
        file_path = os.path.join(directory, file_name)
        if file_name.endswith(".wav"):  # V√©rifier que c'est un fichier audio
            print(f"üìÇ Traitement du fichier : {file_name}")
            result = transcribe_audio(file_path)
            if result:
                print("Transcription:", result)
            else:
                print("‚ùå √âchec de la transcription pour", file_name)


  from .autonotebook import tqdm as notebook_tqdm


üìÇ Traitement du fichier : ae04cdbe_nohash_1.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  torchaudio.set_audio_backend("sox_io")  # Forcer sox_io si disponible


Transcription: GO
üìÇ Traitement du fichier : d91a159e_nohash_0.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GO
üìÇ Traitement du fichier : e9901cf0_nohash_0.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: WHO
üìÇ Traitement du fichier : c24d96eb_nohash_0.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GO
üìÇ Traitement du fichier : cb8f8307_nohash_0.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GO
üìÇ Traitement du fichier : 6c968bd9_nohash_2.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GO
üìÇ Traitement du fichier : 6c429c7b_nohash_0.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: THOUGH
üìÇ Traitement du fichier : 4a4e28f1_nohash_0.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: THOUGH
üìÇ Traitement du fichier : e1aa22e4_nohash_0.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GO
üìÇ Traitement du fichier : f839238a_nohash_2.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GO
üìÇ Traitement du fichier : 6071a214_nohash_0.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: NO
üìÇ Traitement du fichier : 004ae714_nohash_1.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GO
üìÇ Traitement du fichier : 0132a06d_nohash_4.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GO
üìÇ Traitement du fichier : f3cee168_nohash_1.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: BELL
üìÇ Traitement du fichier : f0ac2522_nohash_1.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GOW
üìÇ Traitement du fichier : f804cbb3_nohash_1.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: O
üìÇ Traitement du fichier : 1c6e5447_nohash_1.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GO
üìÇ Traitement du fichier : a0f93943_nohash_1.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GO
üìÇ Traitement du fichier : f264e0df_nohash_1.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GO
üìÇ Traitement du fichier : ec201020_nohash_0.wav


Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-large-960h and are newly initialized: ['wav2vec2.masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Transcription: GO
