In [1]:
import os
import fnmatch
import re
import numpy as np
from collections import defaultdict
import model_utilities

In [2]:
############################### STATIC DATA #################################

# List of phonemes approximated by our models. 
# As there are also other phonetic items segmented in our database, we use this list to sort those out.
# Example of excluded phonetic items: <p:>, <usb>
valid_phonemes = ["a", "a~", "e", "E", "I", "i", "O", "o", "U", "u", "Y", "y", "9", "2", "a:", "a~:", "e:", "E:", "i:",
                 "o:", "u:", "y:", "2:", "OY", "aU", "aI", "@", "6", "z", "S", "Z", "C", "x", "N", "Q", "b", "d", "f", 
                 "g", "h", "j", "k", "l", "m", "n", "p", "r", "s", "t", "v"]

vowel_list = ["Q", "a", "a~", "e", "E", "I", "i", "O", "o", "U", "u", "Y", "y", "9", "2", "@", "6", "a:", "a~:", "e:", "E:", "i:", "o:", "u:", "y:", "2:", "OY", "aU", "aI"]

# Classification of phonemes used by model_wl. Note that this classification is done manually.
# A clustering method using k-means would be more appropriate
phon_class_dict = {"diphthong" : ["aI", "aU", "OY"], "long_vowels" : ["a:", "E:", "e:", "i:", "2:", "@", "m", "k", "N"], 
					"short_vowels" : ["a", "u:", "o:", "e", "O", "E", "C", "6", "U", "f", "y", "o", "S", "j", "y:"], 
					"cons_allg" : ["x", "h", "l", "n", "I", "9", "z", "s", "Y", "v", "t"], "short_cons" : ["p", "b", "d", "g"], 
					"others" : ["Q", "r"]}

In [4]:
# Create list of filepaths to explore. This one uses entire (training) data set, for exploration purposes.
#path_list_training = get_path_list('C:/Users/alexutza_a/Abschlussarbeit/DB_Verbmobil/Evaluation/Training')

# List of paths to the files in our test data, to later iterate on
#path_list_test = get_path_list('C:/Users/alexutza_a/Abschlussarbeit/DB_Verbmobil/Evaluation/Test')

In [5]:
################################## FUNCTIONS USING ->TRAINING DATA<- #########################################

# Processed version of the phon_wordleng_dict (defined in module model_utilities): 
#    - the value-lists of the inner dict include only the phoneme proportions for the given word length (inner key)
# Returns: a dictionary. Looks like: 
# {"a" : { 1 : [median_a1, mean_a1, (m_a1 + m_a1) /2], 2 : [median_a2, mean_a2, (m_a2 + m_a2) /2], ...}, 
#  "b" : { 1 : [...], ...}, ...}
def phon_wl_compressed_dict(phon_wordleng_dict):
	phon_wl_compressed_dict = dict( (i, defaultdict(list)) for i in phon_wordleng_dict.keys() )
	pho_key_list = [key for key, val in phon_wordleng_dict.items()]

	for phon in pho_key_list:
		for w_leng in phon_wordleng_dict[phon].keys():
			phon_wl_compressed_dict[phon][w_leng] = [round(np.median(phon_wordleng_dict[phon][w_leng][2::3]), 3)]
			phon_wl_compressed_dict[phon][w_leng].append(round(np.mean(phon_wordleng_dict[phon][w_leng][2::3]), 3))
			phon_wl_compressed_dict[phon][w_leng].append(round((np.median(phon_wordleng_dict[phon][w_leng][2::3]) + np.mean(phon_wordleng_dict[phon][w_leng][2::3]))/2, 3))

	return phon_wl_compressed_dict

In [6]:
################################## FUNCTIONS USING ->TEST DATA<- #########################################

# Wendet den phon_wl_compressed_dict auf dem vorgegebenen Wort ein
# Return: dictionary mit den entsprechenden phoneme-steaks für das aktuelle Wort (erfasst in composition_dict)
#
# Looks like: phon_wl_compressed_dict but is not nested anymore: {"a" : [median1, mean1, (m1+m2)/2 ], "aU" : [...], ...}
#      and contains as keys only phonemes from the given word (only keys of composition_dict)
def phoneme_steak(composition_dict, word_dur, phon_count, path_list_training):
	phoneme_steak_dict = dict( (i, []) for i in composition_dict.keys())
	p_wl_compressed_dict = phon_wl_compressed_dict(model_utilities.phon_wordleng_dict(path_list_training))

	for phoneme in composition_dict.keys():
		# In case the word contains 2 identical phonemes, the values of the 1st one will be overwritten
		# -> this is ok, because the values would be the same (e.g. steak of "a" in a 5-elem-word)
		if len(p_wl_compressed_dict[phoneme][len(composition_dict)]) > 0:
			phoneme_steak_dict[phoneme] = p_wl_compressed_dict[phoneme][len(composition_dict)]
		# For unknown phonemes / w_lengs we calculate the steak as if all phoneme durations in word would be equal
		else:
			phoneme_steak_dict[phoneme] += ([word_dur/(word_dur * phon_count), word_dur/(word_dur * phon_count), word_dur/(word_dur * phon_count)])

	return phoneme_steak_dict

In [7]:
# Return: A dictionary of word composition. 
# Looks like:  {"g" : 1, "@" : 1, "n": 1, "aU" : 1}
def build_composition_dict(datei, word_no):
	work_file = open(datei)

	# Initialize composition_dict with value_type : int
	composition_dict = defaultdict(int)

	for line in work_file:
		if re.match("MAU", line) and (int(line.split()[3]) == word_no):
			composition_dict[str(line.split()[4])] += 1
	work_file.close()

	return composition_dict

In [12]:
# Returns: (type: int) the predicted duration (samples) of given phoneme based on word composition
# @param model: 0, 1, or 2 meaning (median, mean, or (median + mean)/2)
def pdur_prediction_value(datei, word_no, phoneme, model, path_list_training):
	composition_dict = build_composition_dict(datei, word_no)
	word_dur, phoneme_count, mau_syl_count = model_utilities.word_statistics(datei, word_no)
	print("It goes to pdur_pred_val")
	# Actually calculate the phon duration prediction
	pdur_prediction = int(round( ((word_dur * phoneme_steak(composition_dict, word_dur, phoneme_count, path_list_training)[phoneme][model])/composition_dict[phoneme]), 0))

	return pdur_prediction

In [13]:
########################################### MODEL EVALUATION ####################################################

# Model type uses individual phonemes and their median, mean, or both
# Actually creates the lists of predictions and actual values to evaluate model
# @param model: 0, 1, or 2 meaning (median, mean, or (median + mean)/2)
# Return: 2 lists of integers
def create_modelWLIP_lists(model, path_list_test, path_list_training):
	predictions_list, actuals_list = [], []
	print("Yey!")
	for datei in path_list_test:
		work_file = open(datei)
		for line in work_file:
			# Restrain model to relevant phonemes
			if re.match("MAU", line) and (str(line.split()[4]) in valid_phonemes):
				predicted_pdur = model_wl_ip.pdur_prediction_value(datei, int(line.split()[3]), str(line.split()[4]), model, path_list_training)
				predictions_list.append(predicted_pdur)
				actuals_list.append(int(line.split()[2]))
		work_file.close()
	return predictions_list, actuals_list

In [None]:
import model_utilities
import performance_measures
import create_eval_lists

# Split dataset into 10 disjunct sets
def ten_fold(dataset_path):
    dataset = model_utilities.get_path_list(dataset_path)
    num_folds = 10
    subset_size = int(len(dataset)/num_folds)
    for i in range(num_folds):
        path_list_test = dataset[i*subset_size:][:subset_size]
        path_list_training = dataset[:i*subset_size] + dataset[(i+1)*subset_size:]
        
        print("Starting test with fold no. " + str(i))
        # Using the fold-results to learn and evaluate model
        prediction_list = create_eval_lists.create_modelWLIP_lists(1, path_list_test, path_list_training)[0] 
        actuals_list = create_eval_lists.create_modelWLIP_lists(1, path_list_test, path_list_training)[1]
        print("continue")
        global_rmse, mae = performance_measures.calc_error_for_data(prediction_list, actuals_list)
        corrCoef = performance_measures.corrCoef(prediction_list, actuals_list)[0]
        print("Finishing test with fold no. " + str(i))
        print(global_rmse, mae, corrCoef)
    return corrCoef

In [None]:
ten_fold('C:/Users/alexutza_a/Abschlussarbeit/DB_Verbmobil/Evaluation/Training')

Starting test with fold no. 0
