## Run once

In [82]:
import contextlib
import ipynb.fs
import math
import numpy as np
import wave

from .defs.extract_formant import extract_formant
from joblib import load

clf = load('../models/rule_based.joblib') # The classifier
scaler = load('../models/scaler.joblib') # The scaler, transforms formants so that they have a mean of 0 and a variance of 1

## Run once per file (config)

In [98]:
input_file = '../../allwavs/allvowl/corrected/extracted/trimmed/o__f__0.14000__0.22000__m_o__mot__hzc1_mono.wav'
target_vowel = 'o' # Possible values: 'a', 'e', 'E', 'i', 'o', 'O', 'u', 'y', '2', '9'
speaker_gender = 'm' # Possible values: 'f' or 'm'
previous_phoneme = 'm' # Possible values: 'l', 'm', 'p', 's', 't' or 't1' (last one shouldn't be used)

## Run once per file (inference)

In [99]:
idx2key = ['2', '9', 'a', 'a~', 'e', 'E', 'i', 'O', 'o', 'o~', 'u', 'U~+', 'y'] # All possible vowels
valid = [0, 1, 2, 4, 5, 6, 7, 8, 10, 12] # Vowels we consider here (depends on the classifier)

with contextlib.closing(wave.open(input_file, 'r')) as f: # Open file
    frames = f.getnframes()
    rate = f.getframerate()
    duration = frames / float(rate)
    
    # Extract formants
    try:
        formants = extract_formant(input_file, start_time=0, end_time=duration, f0min=math.ceil(3/duration + 0.000001), n_formants=4)
    except ZeroDivisionError:
        print('The file is too short to analyze!')
    
    # Add additional features (gender, previous phoneme)
    features = formants
    features.append(speaker_gender == 'f')
    for prev in 'lmpst':
        features.append(previous_phoneme == prev)
    features.append(previous_phoneme == 't1')
    
    # Rescale formants
    features[:4] = scaler.transform(np.array(features[:4]).reshape(1, -1))[0]
    
    # Prediction with probabilities
    pred = clf.predict_proba([features]) # Probabilities
    final_vowel = np.argmax(pred)
    final_confidence = pred[0][final_vowel] # Best score
    final_vowel = idx2key[valid[final_vowel]] # Actual prediction
    
    print('Vowel ', 'Confidence')
    print('-'*25)
    for i in range(len(valid)):
        vowel = idx2key[valid[i]]
        print(f'{vowel:<6} {pred[0][i]:.3f}', '='*int(pred[0][i]*100))

    print(f'Prediction: /{final_vowel}/, confidence: {final_confidence:.3f}')

Vowel  Confidence
-------------------------
2      0.002 
9      0.000 
a      0.000 
e      0.002 
E      0.000 
i      0.000 
O      0.002 
y      0.002 
Prediction: /o/, confidence: 0.644
