In [10]:
import contextlib
import ipynb.fs
import os
import math
import pandas as pd
import praat_formants_python as pfp
import wave

from .defs.formant_vowel_prediction import vowel_prediction
from .defs.vowel_feedback_function import vowel_feedback
from .defs.extract_formant import extract_formant

from pathlib import Path
from sklearn.metrics import classification_report

In [32]:
data_folder = Path('../../InterFra/extracted')

vowels_2 = {'oe': '9', 'OE': '2', 'aN': 'ã', 'EN': 'ẽ', 'ON': 'õ'}
vowels = ['a', 'e', 'E', 'i', 'o', 'O', 'u', 'y', '9', '2', 'ã', 'ẽ', 'õ']

y_true_nat = []
y_pred_nat = []
y_true_nnat = []
y_pred_nnat = []

y_true_f = []
y_pred_f = []
y_true_m = []
y_pred_m = []

for file in os.listdir(data_folder):
    path = (data_folder / file).as_posix()
    
    with contextlib.closing(wave.open(path,'r')) as f:
        frames = f.getnframes()
        rate = f.getframerate()
        duration = frames / float(rate)
    
    try:
        f1, f2, f3, f4 = extract_formant(path, start_time=0, end_time=duration, f0min=math.ceil(3/duration))
    except ZeroDivisionError:
        continue
    
    native = file.startswith('C')
    file2 = file.split('.')[0].split('_')[1:]
    attempted = file2[0].translate({ord(c): None for c in '0123456789'})
    ok = False
    for k, v in vowels_2.items():
        attempted = attempted.replace(k, v)

    for vowel in vowels:
        if vowel in attempted:
            ok = True
            idx = attempted.index(vowel)
            if idx == 1:
                attempted = vowel
                break
            elif idx == 0 and len(attempted) < 3:
                attempted = vowel
                break
            else:
                ok = False

    said = attempted
    if len(file2) > 1:
        said = file2[1]
        for k, v in vowels_2.items():
            said = said.replace(k, v)
            
    ref_data = pd.read_csv('vowel_formants.txt', header=0, sep=' ')
    vowel, min_dist = vowel_prediction([f1, f2, f3], ref_data)
    said = said.replace('õ', 'O').replace('ã', 'a').replace('ẽ', 'E')
    vowel = vowel[1:2]
    
    female = 'CAT' in file or 'ELS' in file
    
    if native:
        y_true_nat.append(said)
        y_pred_nat.append(vowel)
    else:
        y_true_nnat.append(said)
        y_pred_nnat.append(vowel)
        
    if female:
        y_true_f.append(said)
        y_pred_f.append(vowel)
    else:
        y_true_m.append(said)
        y_pred_m.append(vowel)
    
    #(pred -> attempted)
    
    # attempted

print(classification_report(y_true_nat, y_pred_nat, digits=3, zero_division=0))
print(classification_report(y_true_nnat, y_pred_nnat, digits=3, zero_division=0))

print(classification_report(y_true_f, y_pred_f, digits=3, zero_division=0))
print(classification_report(y_true_m, y_pred_m, digits=3, zero_division=0))

print(classification_report(y_true_m + y_true_f, y_pred_m + y_pred_f, digits=3, zero_division=0))

              precision    recall  f1-score   support

           2      0.000     0.000     0.000         2
           9      0.111     0.500     0.182         2
           @      0.000     0.000     0.000         0
           A      0.000     0.000     0.000         0
           E      0.000     0.000     0.000        10
           O      0.000     0.000     0.000         3
           a      0.667     0.056     0.103        36
           e      0.286     0.143     0.190        14
           i      0.308     0.400     0.348        10
           o      0.000     0.000     0.000         3
           u      0.200     0.500     0.286         2
           y      0.333     1.000     0.500         1

    accuracy                          0.133        83
   macro avg      0.159     0.217     0.134        83
weighted avg      0.386     0.133     0.136        83

              precision    recall  f1-score   support

           2      0.091     0.400     0.148         5
           9      0.333 

In [37]:
data_folder = Path('../../Classmate corpus/extracted')

vowels_2 = {'oe': '9', 'OE': '2'}
vowels = ['a', 'e', 'E', 'i', 'o', 'O', 'u', 'y', '9', '2']

y_true_nat = []
y_pred_nat = []
y_true_nnat = []
y_pred_nnat = []

y_true_f = []
y_pred_f = []
y_true_m = []
y_pred_m = []

for file in os.listdir(data_folder):
    path = (data_folder / file).as_posix()
    
    with contextlib.closing(wave.open(path,'r')) as f:
        frames = f.getnframes()
        rate = f.getframerate()
        duration = frames / float(rate)
    
    try:
        f1, f2, f3, f4 = extract_formant(path, start_time=0, end_time=duration, f0min=math.ceil(3/duration))
    except ZeroDivisionError:
        continue
    
    native = any(file.startswith(x) for x in ['Jimmy', 'Kevin', 'Mathilde', 'Maxime', 'Omar'])
    file2 = file.split('.')[0].split('_')[1:]
    attempted = file2[0].translate({ord(c): None for c in '0123456789'})
    ok = False
    for k, v in vowels_2.items():
        attempted = attempted.replace(k, v)

    for vowel in vowels:
        if vowel in attempted:
            ok = True
            idx = attempted.index(vowel)
            if idx == 1:
                attempted = vowel
                break
            elif idx == 0 and len(attempted) < 3:
                attempted = vowel
                break
            else:
                ok = False
    

    said = attempted
    if len(file2) > 1:
        said = file2[1]
        for k, v in vowels_2.items():
            said = said.replace(k, v)
            
    ref_data = pd.read_csv('vowel_formants.txt', header=0, sep=' ')
    vowel, min_dist = vowel_prediction([f1, f2, f3], ref_data)
    said = said.replace('õ', 'O').replace('ã', 'a').replace('ẽ', 'E')
    vowel = vowel[1:2]
    
    female = any(file.startswith(x) for x in ['Dimitra', 'Mathilde'])
    
    if native:
        y_true_nat.append(said)
        y_pred_nat.append(vowel)
    else:
        y_true_nnat.append(said)
        y_pred_nnat.append(vowel)
        
    if female:
        y_true_f.append(said)
        y_pred_f.append(vowel)
    else:
        y_true_m.append(said)
        y_pred_m.append(vowel)
    
    #(pred -> attempted)
    
    # attempted

print(classification_report(y_true_nat, y_pred_nat, digits=3, zero_division=0))
print(classification_report(y_true_nnat, y_pred_nnat, digits=3, zero_division=0))

print(classification_report(y_true_f, y_pred_f, digits=3, zero_division=0))
print(classification_report(y_true_m, y_pred_m, digits=3, zero_division=0))

print(classification_report(y_true_m + y_true_f, y_pred_m + y_pred_f, digits=3, zero_division=0))

              precision    recall  f1-score   support

           2      0.000     0.000     0.000         5
           9      0.286     0.200     0.235        10
           @      0.000     0.000     0.000         0
           A      0.000     0.000     0.000         0
           E      0.429     0.375     0.400         8
           O      0.400     0.800     0.533         5
           a      1.000     0.222     0.364         9
           e      0.500     0.429     0.462         7
           i      0.714     1.000     0.833         5
           o      0.667     0.400     0.500         5
           u      0.000     0.000     0.000         5
           y      0.400     0.400     0.400         5

    accuracy                          0.359        64
   macro avg      0.366     0.319     0.311        64
weighted avg      0.464     0.359     0.365        64

              precision    recall  f1-score   support

           2      0.000     0.000     0.000         2
           9      0.429 