In [1]:
import io
from pydub import AudioSegment
from IPython.display import Audio, display
import ast
import re
from datetime import datetime, timezone
import ipywidgets as widgets
import pandas as pd
import numpy as np

In [2]:
from src.audio_processing import *
from src.data_processing import *
from src.ui_tools import *

In [3]:
# Load the cleaned data
data_cleaned = pd.read_csv('data/df_test_cleaned.csv')
data_cleaned.head()

Unnamed: 0,id,dateOfTest,testParameters,parametersType,testResults,testType,experimentResults,childId,evaluationResults,day,extracted_subtype
0,2AACA972-A244-4E75-A4AD-2E052CD5A4B1,2024-04-29 06:37:20.368536+00:00,"{'duration': 120.0, 'textSize': 25.0, 'nonWord...",experiment,{'recording': b'\x00\x00\x00\x1cftypM4A \x00\x...,readingTestNonWords,"{'experimentPhase': 'preTestPhase', 'experimen...",0ED01FB8-8212-4D4A-BE3E-D6B6E591EE4E,"{'wordsState': [{'Femme': 'Incorrect'}, {'Hier...",2024-04-29 00:00:00+00:00,Irréguliers_readingTestNonWords
1,6B905631-C4A5-4E92-97B3-B932FD8DD73F,2024-04-29 07:05:14.552932+00:00,"{'duration': 80.0, 'textSize': 20.0, 'nonWordS...",experiment,{'recording': b'\x00\x00\x00\x1cftypM4A \x00\x...,readingTestNonWords,"{'experimentPhase': 'postTestPhase', 'experime...",0ED01FB8-8212-4D4A-BE3E-D6B6E591EE4E,"{'wordsState': [{'Il': 'Correct'}, {'un': 'Cor...",2024-04-29 00:00:00+00:00,LUM_readingTestNonWords
2,79055215-1979-42D3-9B26-B9C6DD935D83,2024-04-29 06:35:43.691463+00:00,"{'duration': 80.0, 'textSelected': {'text': ""C...",experiment,{'recording': b'\x00\x00\x00\x1cftypM4A \x00\x...,readingTestFluencE,"{'experimentPhase': 'preTestPhase', 'experimen...",0ED01FB8-8212-4D4A-BE3E-D6B6E591EE4E,"{'wordsState': [{""C'est"": 'Incorrect'}, {""l'hi...",2024-04-29 00:00:00+00:00,None_readingTestFluencE
3,F639F673-A88D-49C7-AC75-5360FB939130,2024-04-29 06:51:14.092154+00:00,"{'fileName': 'bell_test_data', 'duration': 120.0}",experiment,{'selectedItems': [{'positionY': 0.06677796327...,testBell,"{'experimentPhase': 'preTestPhase', 'experimen...",0ED01FB8-8212-4D4A-BE3E-D6B6E591EE4E,,2024-04-29 00:00:00+00:00,None_testBell
4,BA62E2B5-EB3A-4B6A-9B5E-7A488B5E015F,2024-04-29 06:39:15.372241+00:00,"{'duration': 120.0, 'textSize': 25.0, 'nonWord...",experiment,{'recording': b'\x00\x00\x00\x1cftypM4A \x00\x...,readingTestNonWords,"{'experimentPhase': 'preTestPhase', 'experimen...",0ED01FB8-8212-4D4A-BE3E-D6B6E591EE4E,"{'wordsState': [{'Sande': 'Correct'}, {'Chon':...",2024-04-29 00:00:00+00:00,Pseudomots_readingTestNonWords


In [4]:
# Apply conversion functions to testResults and evaluationResults columns
data_cleaned['testResults'] = data_cleaned['testResults'].apply(lambda x: convert_str_to_dct_eval(x))
data_cleaned['evaluationResults'] = data_cleaned['evaluationResults'].apply(lambda x: convert_str_to_dct_eval(x))

In [5]:
# Filter out rows where 'testResults' does not contain 'recording'
data_cleaned = data_cleaned[data_cleaned['testResults'].apply(lambda x: isinstance(x, dict) and 'recording' in x)]

In [6]:
# Extract unique test types
test_types = data_cleaned['testType'].unique()

# Dictionary to store the new data frames
data_cleaned_tests = {}

# Iterate over test types and create data frames dynamically
for test_type in test_types:
    df_name = f"data_cleaned_{test_type}"
    data_cleaned_tests[df_name] = data_cleaned[data_cleaned['testType'] == test_type]
    
    # Print the number of rows
    print(f"Number of tests for {test_type}: {data_cleaned_tests[df_name].shape[0]}")

Number of tests for readingTestNonWords: 211
Number of tests for readingTestFluencE: 76
Number of tests for testPhoneme: 304
Number of tests for readingTestRAN: 96


In [7]:
# Extract recordings and their corresponding evaluation results (e.g., 'wordsState')
recordings = data_cleaned[data_cleaned['testType'] == 'readingTestNonWords']['testResults'].apply(
    lambda x: x['recording'] if 'recording' in x else None).dropna().tolist()

evaluation_results = data_cleaned[data_cleaned['testType'] == 'readingTestNonWords']['evaluationResults'].apply(
    lambda x: x['wordsState'] if 'wordsState' in x else None).dropna().tolist()

# Create the interactive audio player with evaluation results
create_audio_player_with_results(recordings, evaluation_results)

HBox(children=(Button(description='Previous', style=ButtonStyle()), IntText(value=0, description='Index:'), Bu…

Output()

Output()

In [8]:
# Extract recordings and their corresponding evaluation results (e.g., 'wordsState')
recordings = data_cleaned[data_cleaned['testType'] == 'readingTestFluencE']['testResults'].apply(
    lambda x: x['recording'] if 'recording' in x else None).dropna().tolist()

evaluation_results = data_cleaned[data_cleaned['testType'] == 'readingTestFluencE']['evaluationResults'].apply(
    lambda x: x['wordsState'] if 'wordsState' in x else None).dropna().tolist()

# Create the interactive audio player with evaluation results
create_audio_player_with_results(recordings, evaluation_results)

HBox(children=(Button(description='Previous', style=ButtonStyle()), IntText(value=0, description='Index:'), Bu…

Output()

Output()

In [9]:
# Extract recordings and their corresponding evaluation results (e.g., 'wordsState')
recordings = data_cleaned[data_cleaned['testType'] == 'testPhoneme']['testResults'].apply(
    lambda x: x['recording'] if 'recording' in x else None).dropna().tolist()

evaluation_results = data_cleaned[data_cleaned['testType'] == 'testPhoneme']['evaluationResults'].apply(
    lambda x: x['wordsState'] if 'wordsState' in x else None).dropna().tolist()

# Create the interactive audio player with evaluation results
create_audio_player_with_results(recordings, evaluation_results)

HBox(children=(Button(description='Previous', style=ButtonStyle()), IntText(value=0, description='Index:'), Bu…

Output()

Output()

In [10]:
# Extract recordings and their corresponding evaluation results (e.g., 'wordsState')
recordings = data_cleaned[data_cleaned['testType'] == 'readingTestRAN']['testResults'].apply(
    lambda x: x['recording'] if 'recording' in x else None).dropna().tolist()

evaluation_results = data_cleaned[data_cleaned['testType'] == 'readingTestRAN']['evaluationResults'].apply(
    lambda x: x['wordsState'] if 'wordsState' in x else None).dropna().tolist()

# Create the interactive audio player with evaluation results
create_audio_player_with_results(recordings, evaluation_results)

HBox(children=(Button(description='Previous', style=ButtonStyle()), IntText(value=0, description='Index:'), Bu…

Output()

Output()