In [1]:
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC


from transformers import (
    Wav2Vec2Config,
    Wav2Vec2ForCTC,
    Wav2Vec2CTCTokenizer,
    Wav2Vec2Processor,
    Wav2Vec2FeatureExtractor,
    TrainingArguments,
    Trainer,
)

import torch

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def transcribe_audio(audio_path,modelpath,tokenizerpath):
    from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC

    # Path to your checkpoint folder
    checkpoint_path = modelpath

    # Load the processor and model
    #processor = Wav2Vec2Processor.from_pretrained(checkpoint_path)
    model = Wav2Vec2ForCTC.from_pretrained(checkpoint_path)

    # Set the model to evaluation mode
    model.eval()

    #load the tokenizer
    tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(tokenizerpath)

    # Save the tokenizer for future use
    #tokenizer.save_pretrained("./wav2vec2-catalan-tokenizer")

    # Initialize the feature extractor for Wav2Vec2
    feature_extractor = Wav2Vec2FeatureExtractor(sampling_rate=16000)

    # Initialize processor with both feature extractor and tokenizer
    processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)

    waveform, sample_rate = torchaudio.load(audio_path)

    # Resample to 16 kHz if the audio has a different sampling rate
    if sample_rate != 16000:
        resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
        waveform = resampler(waveform)

    # Convert to mono if the audio has multiple channels
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim=0)

    # Prepare the input for the model
    input_values = processor(waveform.squeeze(), sampling_rate=16000, return_tensors="pt").input_values

        # Perform inference
    with torch.no_grad():
        logits = model(input_values).logits

    # Get the predicted token IDs
    predicted_ids = torch.argmax(logits, dim=-1)

    # Decode the token IDs into a transcription
    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)

    # Print the transcription
    print("Transcription:", transcription[0])


In [10]:
# Iterate over files in a path
audiopath = "/fhome/amlai08/ALEX/ca3/clips/"
modelpath = "/fhome/amlai08/ALEX/run_smaller/wav2vec2-catalan-scratch-small/checkpoint-1535"
tokenizerpath = "/fhome/amlai08/ALEX/run_smaller/wav2vec2-catalan-tokenizer"

import os
import torchaudio

for i, filename in enumerate(os.listdir(audiopath)):
    if i >= 15:
        break

    filepath = os.path.join(audiopath, filename)
    if os.path.isfile(filepath):
        print(filepath)  # Check if it's a file
        transcribe_audio(filepath,modelpath,tokenizerpath)


/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39708363.mp3
Transcription: .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39647789.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39867149.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39855756.mp3
Transcription: .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40181732.mp3
Transcription: Q
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40077394.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39744178.mp3
Transcription: Q
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39970966.mp3
Transcription: .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39930596.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40173360.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40162979.mp3
Transcription: .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39860458.mp3
Transcription: .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39846015.mp3
Transcription: .
/fhome/amlai08/AL

In [11]:
# Iterate over files in a path
audiopath = "/fhome/amlai08/ALEX/ca3/clips/"
modelpath = "/fhome/amlai08/ALEX/run_smaller/wav2vec2-catalan-scratch-small2/checkpoint-4368"
tokenizerpath = "/fhome/amlai08/ALEX/run_smaller/wav2vec2-catalan-tokenizer"

import os
import torchaudio

for i, filename in enumerate(os.listdir(audiopath)):
    if i >= 15:
        break

    filepath = os.path.join(audiopath, filename)
    if os.path.isfile(filepath):
        print(filepath)  # Check if it's a file
        transcribe_audio(filepath,modelpath,tokenizerpath)


/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39708363.mp3
Transcription: E .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39647789.mp3
Transcription: Q .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39867149.mp3
Transcription: Qs.
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39855756.mp3
Transcription: E .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40181732.mp3
Transcription: Q.
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40077394.mp3
Transcription: E .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39744178.mp3
Transcription: Q .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39970966.mp3
Transcription: E s .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39930596.mp3
Transcription: E.
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40173360.mp3
Transcription: E .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40162979.mp3
Transcription: Q .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39860458.mp3
Transcription: Es .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39846015.mp3
Tran

In [12]:
# Iterate over files in a path
audiopath = "/fhome/amlai08/ALEX/ca3/clips/"
modelpath = "/fhome/amlai08/ALEX/run_smaller/wav2vec2-catalan-scratch-small2/checkpoint-4504"
tokenizerpath = "/fhome/amlai08/ALEX/run_smaller/wav2vec2-catalan-tokenizer"

import os
import torchaudio

for i, filename in enumerate(os.listdir(audiopath)):
    if i >= 15:
        break

    filepath = os.path.join(audiopath, filename)
    if os.path.isfile(filepath):
        print(filepath)  # Check if it's a file
        transcribe_audio(filepath,modelpath,tokenizerpath)


/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39708363.mp3
Transcription: E a a .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39647789.mp3
Transcription: Q .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39867149.mp3
Transcription: Q s.
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39855756.mp3
Transcription: Es .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40181732.mp3
Transcription: Q .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40077394.mp3
Transcription: E s.
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39744178.mp3
Transcription: Q .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39970966.mp3
Transcription: E s .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39930596.mp3
Transcription: E.
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40173360.mp3
Transcription: E s.
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40162979.mp3
Transcription: Q .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39860458.mp3
Transcription: Es .
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39846015

In [13]:
# Iterate over files in a path
audiopath = "/fhome/amlai08/ALEX/ca3/clips/"
modelpath = "/fhome/amlai08/ALEX/run/wav2vec2-catalan-scratch/checkpoint-7368"
tokenizerpath = "/fhome/amlai08/ALEX/run/wav2vec2-catalan-tokenizer"

import os
import torchaudio

for i, filename in enumerate(os.listdir(audiopath)):
    if i >= 15:
        break

    filepath = os.path.join(audiopath, filename)
    if os.path.isfile(filepath):
        print(filepath)  # Check if it's a file
        transcribe_audio(filepath,modelpath,tokenizerpath)


/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39708363.mp3
Transcription: a
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39647789.mp3
Transcription: a
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39867149.mp3
Transcription: a
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39855756.mp3
Transcription: a
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40181732.mp3
Transcription: a
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40077394.mp3
Transcription: a
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39744178.mp3
Transcription: a
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39970966.mp3
Transcription: a
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39930596.mp3
Transcription: a
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40173360.mp3
Transcription: a
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40162979.mp3
Transcription: a
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39860458.mp3
Transcription: a
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39846015.mp3
Transcription: a
/fhome/amlai

In [5]:
# Iterate over files in a path
audiopath = "/fhome/amlai08/ALEX/ca3/clips/"
modelpath = "/fhome/amlai08/ALEX/runtraininference/wav2vec2-traininf/checkpoint-273"
tokenizerpath = "/fhome/amlai08/ALEX/runtraininference/wav2vec2-catalan-tokenizer"

import os
import torchaudio

for i, filename in enumerate(os.listdir(audiopath)):
    if i >= 15:
        break

    filepath = os.path.join(audiopath, filename)
    if os.path.isfile(filepath):
        print(filepath)  # Check if it's a file
        transcribe_audio(filepath,modelpath,tokenizerpath)


/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39708363.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39647789.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39867149.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39855756.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40181732.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40077394.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39744178.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39970966.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39930596.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40173360.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40162979.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39860458.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39846015.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/c

In [8]:
# Iterate over files in a path
audiopath = "/fhome/amlai08/ALEX/ca3/clips/"
modelpath = "/fhome/amlai08/ALEX/runtraininference/wav2vec2-traininf2/checkpoint-1092"
tokenizerpath = "/fhome/amlai08/ALEX/runtraininference/wav2vec2-catalan-tokenizer"

import os
import torchaudio

for i, filename in enumerate(os.listdir(audiopath)):
    if i >= 15:
        break

    filepath = os.path.join(audiopath, filename)
    if os.path.isfile(filepath):
        print(filepath)  # Check if it's a file
        transcribe_audio(filepath,modelpath,tokenizerpath)


/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39708363.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39647789.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39867149.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39855756.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40181732.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40077394.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39744178.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39970966.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39930596.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40173360.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_40162979.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39860458.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/clips/common_voice_ca_39846015.mp3
Transcription: 
/fhome/amlai08/ALEX/ca3/c