In [None]:
import os
import pandas as pd
from src.test_evaluation import *
from src.readingTestFluencE_eval import *

In [2]:
# Load the test results data
data_path = 'sample_readingTestFluencE/readingTestFluencE_children.csv'
tests_df = pd.read_csv(data_path)

In [3]:
# We extract the ground truth for the readingTestFluencE
df = pd.read_csv('transcriptions/readingTestFluencE_transcriptions.csv')
ground_truth = df.loc[0, 'Phonetic Transcription']

In [4]:
tests_df.head()

Unnamed: 0,File Name,Phonetic Transcription
0,readingTestFluencE_2BB671AA-2F6A-4346-8B76-F0C...,sɛ listwaʁ də məsjø pəti ki vi dɑ̃z yn vil mɛz...
1,readingTestFluencE_3B545E56-D802-4380-9993-21C...,sɛ listwaʁ də məsjø pəti ki vø ki vi dɑ̃z yn v...
2,readingTestFluencE_5C1C826F-E778-48C3-9170-6BF...,sɛ listwaʁ dœ̃ məsjø pəti ki vi dɑ̃ le vɛj de ...
3,readingTestFluencE_046E4FEB-E284-48D5-922E-616...,sɛ listwaʁ dœ̃ məsjø pəti ki vi dɑ̃z yn vjɛj m...
4,readingTestFluencE_75A80925-F8CF-463D-AFED-5CC...,sɛ listwaʁ də məsjø pəti ki vi dɑ̃z yn vjɛj mɛ...


In [5]:
# We iterate over the rows of the tests_df and for each row we extract the phonetic transcription
for index, row in tests_df.iterrows():
    child_transcription = row['Phonetic Transcription']
    print("=" * 30)
    print(f"Child transcription: {child_transcription}")

    results = run_all_evaluations(ground_truth, child_transcription)

    for res in results:
        print("-" * 30)
        print(f"Method: {res['method']}")
        print(f"Score: {res['score']}")


Child transcription: sɛ listwaʁ də məsjø pəti ki vi dɑ̃z yn vil mɛzɔ̃ syi o kɔʁ dyn vjø vilaʒ a mɛzɔ̃ ɛt ɑ̃tuʁ ɑ̃tuʁ dyn ʒaʁdɛ̃ avɛk yn baʁe il i a de kɔ̃kɔ̃bʁ de ʃu fʁɛze tu sɔʁt de də leɡym o fʁwa o fɔ̃ dy ʒaʁdɛ̃ lə pɔʁtwalɔ̃ ʁɛstɛ tuʒuʁ fɛʁme puʁ puʁ kə ʃjɛ̃ a pøse nə ʃɛʁ ləʃaʁ pa ʃjɛ̃ a
------------------------------
Method: Exact Word Match
Score: 15
------------------------------
Method: Levenshtein Word Match
Score: 16
------------------------------
Method: Chunked Alignment
Score: 12
------------------------------
Method: Sliding Window
Score: 9
Child transcription: sɛ listwaʁ də məsjø pəti ki vø ki vi dɑ̃z yn vjɛj mɛzɔ̃ sitye o kœʁ dœ̃ vjø vilaʒ la mɛzɔ̃ ɛt ɑ̃tuʁe dœ̃ ʒaʁdɛ̃ avɛk yn baʁjɛʁ il i a de kɔ̃kɔ̃bʁ de ʃu fʁize tu sɔʁt tut sɔʁt də leɡymz o fɔ̃ dy ʒaʁdɛ̃ lə pɔʁtiljɔ̃ ʁɛst tuʒuʁ fɛʁme puʁ kə ʃjɛ̃ a pisy nə seʃap pa ʃjɛ̃ a pys a py ɛme ɛm sə kuʃe pʁɛ də la pubɛl a lɔm
------------------------------
Method: Exact Word Match
Score: 7
------------------------------
Method: 

In [6]:
thresholds = [0.6, 0.7, 0.8, 0.9]

results = compare_methods_with_different_thresholds(ground_truth, child_transcription, thresholds)

# Print out the results to compare them
for result in results:
    print(f"Threshold: {result['threshold']} -> Score: {result['score']}")

Threshold: 0.6 -> Score: 7
Threshold: 0.7 -> Score: 2
Threshold: 0.8 -> Score: 2
Threshold: 0.9 -> Score: 2


In [7]:
window_sizes = [3, 4, 5, 6, 7]
thresholds = [0.6, 0.7, 0.8, 0.9]

results = compare_methods_with_different_parameters(ground_truth, child_transcription, window_sizes, thresholds)

# Print out the results to compare them
for result in results:
    print(f"Window Size: {result['window_size']} | Threshold: {result['threshold']} -> Score: {result['score']}")

Window Size: 3 | Threshold: 0.6 -> Score: 1
Window Size: 3 | Threshold: 0.7 -> Score: 1
Window Size: 3 | Threshold: 0.8 -> Score: 1
Window Size: 3 | Threshold: 0.9 -> Score: 1
Window Size: 4 | Threshold: 0.6 -> Score: 1
Window Size: 4 | Threshold: 0.7 -> Score: 1
Window Size: 4 | Threshold: 0.8 -> Score: 1
Window Size: 4 | Threshold: 0.9 -> Score: 1
Window Size: 5 | Threshold: 0.6 -> Score: 1
Window Size: 5 | Threshold: 0.7 -> Score: 1
Window Size: 5 | Threshold: 0.8 -> Score: 1
Window Size: 5 | Threshold: 0.9 -> Score: 1
Window Size: 6 | Threshold: 0.6 -> Score: 1
Window Size: 6 | Threshold: 0.7 -> Score: 1
Window Size: 6 | Threshold: 0.8 -> Score: 1
Window Size: 6 | Threshold: 0.9 -> Score: 1
Window Size: 7 | Threshold: 0.6 -> Score: 1
Window Size: 7 | Threshold: 0.7 -> Score: 1
Window Size: 7 | Threshold: 0.8 -> Score: 1
Window Size: 7 | Threshold: 0.9 -> Score: 1


In [33]:
# We iterate over the rows of the tests_df and for each row we extract the phonetic transcription
for index, row in tests_df.iterrows():
    child_transcription = row['Phonetic Transcription']
    print("=" * 30)
    print(f"Child transcription: {child_transcription}")

    word_state, score = eval_2(ground_truth, child_transcription)
    print(f"Score: {score}")
    print(word_state)

Child transcription: sɛ listwaʁ də məsjø pəti ki vi dɑ̃z yn vil mɛzɔ̃ syi o kɔʁ dyn vjø vilaʒ a mɛzɔ̃ ɛt ɑ̃tuʁ ɑ̃tuʁ dyn ʒaʁdɛ̃ avɛk yn baʁe il i a de kɔ̃kɔ̃bʁ de ʃu fʁɛze tu sɔʁt de də leɡym o fʁwa o fɔ̃ dy ʒaʁdɛ̃ lə pɔʁtwalɔ̃ ʁɛstɛ tuʒuʁ fɛʁme puʁ puʁ kə ʃjɛ̃ a pøse nə ʃɛʁ ləʃaʁ pa ʃjɛ̃ a
Score: 47
[('sɛ', 'Correct'), ('listwaʁ', 'Correct'), ('də', 'Correct'), ('məsjø', 'Correct'), ('pəti', 'Correct'), ('ki', 'Correct'), ('vi', 'Correct'), ('dɑ̃z', 'Correct'), ('yn', 'Correct'), ('vjɛj', 'Incorrect'), ('mɛzɔ̃', 'Correct'), ('sitye', 'Incorrect'), ('o', 'Correct'), ('kœʁ', 'Incorrect'), ('dœ̃', 'Incorrect'), ('vjø', 'Correct'), ('vilaʒ', 'Correct'), ('la', 'Incorrect'), ('mɛzɔ̃', 'Correct'), ('ɛt', 'Correct'), ('ɑ̃tuʁe', 'Incorrect'), ('dœ̃', 'Incorrect'), ('ʒaʁdɛ̃', 'Correct'), ('avɛk', 'Correct'), ('yn', 'Correct'), ('baʁjɛʁ', 'Incorrect'), ('il', 'Correct'), ('i', 'Correct'), ('a', 'Correct'), ('de', 'Correct'), ('kɔ̃kɔ̃bʁ', 'Correct'), ('de', 'Correct'), ('ʃu', 'Correct'), ('fʁize