In [1]:
import os
import ast
import torch
from transformers import AutoModelForCTC, Wav2Vec2Processor
import numpy as np
import pandas as pd
import soundfile as sf
from src.text_processing import *
from src.data_processing import *
from src.ui_tools import *
from src.audio_processing import *

# Add espeak's shared library directory
os.environ['DYLD_LIBRARY_PATH'] = '/opt/homebrew/lib'

In [2]:
# Load the cleaned data
data_path = 'data/df_test_cleaned.csv'
data_cleaned = pd.read_csv(data_path)

# Apply conversion functions to testResults and evaluationResults columns
data_cleaned['testResults'] = data_cleaned['testResults'].apply(lambda x: convert_str_to_dct_eval(x))
data_cleaned['evaluationResults'] = data_cleaned['evaluationResults'].apply(lambda x: convert_str_to_dct_eval(x))

In [3]:
# The tests id of the 10 selected readingTestFluencE tests
tests_id = [
    '2BB671AA-2F6A-4346-8B76-F0C89C236390',
    '3B545E56-D802-4380-9993-21C11066B12E',
    '5C1C826F-E778-48C3-9170-6BF943175984',
    '046E4FEB-E284-48D5-922E-616DA7651F02',
    '75A80925-F8CF-463D-AFED-5CC399848CC2',
    '102DCD09-43EA-434D-A590-0FA5C7C7C1B3',
    '098522E8-2203-425E-85E5-5809D5B0B523',
    '79055215-1979-42D3-9B26-B9C6DD935D83',
    'ABD81BE7-7629-4816-8241-7ECBF32DFFFA',
    'DC79B554-B33E-4E01-83BC-3B97798C5F97'
]

In [4]:
# We extract the text that is read by the children during the readingTestFluencE tests
test = data_cleaned[data_cleaned['id'] == tests_id[0]]['testParameters'].values[0]
test_dict = ast.literal_eval(test)
selected_text = test_dict['textSelected']['text']

save_phonetic_transcription_to_csv(selected_text, test_type='readingTestFluencE', folder='transcriptions', file_name='readingTestFluencE_transcriptions.csv')

Phonetic transcription saved to transcriptions/readingTestFluencE_transcriptions.csv


In [5]:
# We only keep the rows where the id is in the list of tests_id
readingTests = data_cleaned[data_cleaned['id'].apply(lambda x: x in tests_id)]

In [6]:
# Extract recordings and their corresponding evaluation results (e.g., 'wordsState')
recordings = readingTests['testResults'].apply(
    lambda x: x['recording'] if 'recording' in x else None).dropna().tolist()

evaluation_results = readingTests['evaluationResults'].apply(
    lambda x: x['wordsState'] if 'wordsState' in x else None).dropna().tolist()

# Create the interactive audio player with evaluation results
create_audio_player_with_results(recordings, evaluation_results)

HBox(children=(Button(description='Previous', style=ButtonStyle()), IntText(value=0, description='Index:'), Bu…

Output()

Output()

In [7]:
# Initialize the model and processor
MODEL_ID = "Cnam-LMSSC/wav2vec2-french-phonemizer"
model = AutoModelForCTC.from_pretrained(MODEL_ID)
processor = Wav2Vec2Processor.from_pretrained(MODEL_ID)

In [None]:
# Folder containing the audio files
audio_folder = 'converted_wav_files/'

# List all the .wav files in the folder
audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.wav')]

# Initialize an empty list to store the results
transcriptions = []

# Process each file
for audio_file in audio_files:
    # Load the audio file
    audio_path = os.path.join(audio_folder, audio_file)
    audio, _ = sf.read(audio_path)
    
    # Preprocess the audio and prepare the inputs for the model
    inputs = processor(np.array(audio), sampling_rate=16_000., return_tensors="pt")
    
    # Get the model's predictions
    with torch.no_grad():
        logits = model(**inputs).logits

    # Decode the predictions to get the phonetic transcription
    predicted_ids = torch.argmax(logits, dim=-1)
    transcription = processor.batch_decode(predicted_ids)[0]
    
    # Store the result (file name and transcription)
    transcriptions.append([audio_file, transcription])

In [9]:
# Convert the results to a DataFrame and save them to a CSV file
transcriptions_df = pd.DataFrame(transcriptions, columns=['File Name', 'Phonetic Transcription'])
transcriptions_df.to_csv('transcriptions/readingTestFluencE_children.csv', index=False)