In [1]:
import torch
from transformers import AutoModelForCTC, Wav2Vec2Processor
import soundfile as sf
import numpy as np
import os
import pandas as pd

In [2]:
# Initialize the model and processor
MODEL_ID = "Cnam-LMSSC/wav2vec2-french-phonemizer"
model = AutoModelForCTC.from_pretrained(MODEL_ID)
processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)

In [3]:

# Folder containing the audio files
audio_folder = 'converted_wav_files/'

# List all the .wav files in the folder
audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.wav')]

# Initialize an empty list to store the results
transcriptions = []

# Process each file
for audio_file in audio_files:
    # Load the audio file
    audio_path = os.path.join(audio_folder, audio_file)
    audio, _ = sf.read(audio_path)
    
    # Preprocess the audio and prepare the inputs for the model
    inputs = processor(np.array(audio), sampling_rate=16_000., return_tensors="pt")
    
    # Get the model's predictions
    with torch.no_grad():
        logits = model(**inputs).logits

    # Decode the predictions to get the phonetic transcription
    predicted_ids = torch.argmax(logits, dim=-1)
    transcription = processor.batch_decode(predicted_ids)[0]
    
    # Store the result (file name and transcription)
    transcriptions.append([audio_file, transcription])

In [4]:
# Convert the list of results into a DataFrame
df = pd.DataFrame(transcriptions, columns=['File Name', 'Phonetic Transcription'])

# Print the DataFrame or save it as needed
print(df)

# Save the DataFrame to a CSV file
df.to_csv('converted_wav_files/phonetic_transcriptions.csv', index=False)

                            File Name  \
0  readingTestFluencE_2_converted.wav   
1  readingTestFluencE_6_converted.wav   
2  readingTestFluencE_1_converted.wav   
3  readingTestFluencE_5_converted.wav   
4  readingTestFluencE_8_converted.wav   
5  readingTestFluencE_9_converted.wav   
6  readingTestFluencE_4_converted.wav   
7  readingTestFluencE_0_converted.wav   
8  readingTestFluencE_7_converted.wav   
9  readingTestFluencE_3_converted.wav   

                              Phonetic Transcription  
0  listwaʁ dœ̃ məsø pətitivi dɑ̃ levɛj de mɛzɔ̃sy...  
1  se istwaʁ də lydy dətitivi dɑ̃ yzjœvɛzɔ̃ sitye...  
2  filistwa dəlii pəti iʁy kii dɑsilɛ lizɔ̃ styi ...  
3  sɛ listwaʁ də məsøpətikib dɑ̃z yn vjɛj mɛzɔ̃ s...  
4  sez istwa də məsø pəti kivi dɑ̃z yn jɛ mɛzɔ̃ s...  
5  se listwaʁ də məsi pətiti dɔ̃ il leɔ̃ sity e o...  
6  sə listwaʁ də məsø pəti ki vi dɑ̃z yn vjɛ mɛzɔ...  
7  filistwa dəlii pəti iʁy kii dɑsilɛ lizɔ̃ styi ...  
8  sɛt istwa də lɔse pətiti f dɔ̃y fe mzɔ̃sitye o... 