In [72]:
%matplotlib inline
from scipy.io import loadmat
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cmx
import pandas as pd
import numpy as np
import os
# import os.path
import seaborn as sns
import csv
import random

<h2> Process Multiple Files</h3>

<h3>Make F0 raw csv files per syllable

In [73]:
# time step is 0.005 seconds, but the syll time resolution is 4dp, so must round

# for each mat file:
#     load mat file
#     extract f0_raw, syll_label and syll_time into variables, and reshape
#     for each syll_label
#         if syll_label contains a '#' in it
#             get syll_time start and end times
#             divide each by 0.005 to get start and end indexes
#             extract f0_raw range using indexes
#             do the f0 rounding, shifting, casting as before
#             write to file, adding syllable loop index to end of filename
#         else
#             skip to next syllable

# must also modify combo source/target section that follows, to remove segmenting parts

# ###########################################

# set number of decimal places
dec = 0

# path to input files directory
directory_path_root = '/Users/robinson/Dropbox/anasynth/_data/emoVC/Olivia2006'
directory = os.fsencode(directory_path_root)
# path to output files directory
directory_path_f0raw = '/Users/robinson/Dropbox/anasynth/_data/emoVC/Olivia2006/f0_raw_syllable'
if not os.path.exists(directory_path_f0raw):
    os.mkdir(directory_path_f0raw)
    
# list to store all syllables in all files
all_syllables = []

# for each mat file in directory (each mat file has one sequence of f0 raw values in it)
for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename.endswith('.mat'): 
        # build filepath (should use file var here really)
        filepath = os.path.join(directory_path_root, filename)
        # print(filepath)
        
        # load the file and extract f0 raw, syll_label and syll_time into variables, and reshape
        mat_dict = loadmat(filepath)
        f0_raw = mat_dict['f0_raw']
        f0_raw = f0_raw.reshape((f0_raw.shape[1],))
        
        syll_label = mat_dict['syll_label']
        syll_label = syll_label.reshape((syll_label.shape[1],))        
#         print(syll_label.shape)
        
#         for label in syll_label:
#             print(label[0])
        
        # reshape this to 2d, to preserve start/end relationship
        syll_time = mat_dict['syll_time']
        syll_time = syll_time.reshape((syll_time.shape[1],syll_time.shape[0]))    
#         print(syll_time.shape)
#         print(syll_time)

#         break #debug
        
        # for each syll in syll_label
        for i, syll in enumerate(syll_label):
            
            # if syll_label doesn't contain a '#' in it
            if '#' not in syll[0]:
                                
                # add syllable to a list
                all_syllables.append(syll[0])
                
                # get syll_time start and end times
                syll_start_time = syll_time[i,0]
                syll_end_time = syll_time[i,1]
                
#                 print(syll_start_time)
#                 print(syll_end_time)                
                
                # divide each by 0.005 to get start and end indexes
                syll_start_idx = (int)(syll_start_time // 0.005)
                syll_end_idx = (int)(syll_end_time // 0.005)
                
#                 print(syll_start_idx)
#                 print(syll_end_idx)
                
                # extract f0_raw range using indexes
                syll_f0 = f0_raw[syll_start_idx:syll_end_idx]

#                 debug
#                 if syll[0] == 't E t':
#                     print(filename)
#                     print('syll_start_time ', syll_start_time)
#                     print('syll_end_time ', syll_end_time)
#                     print('syll_start_idx ', syll_start_idx)
#                     print('syll_end_idx ', syll_end_idx)
#                     print('syll_f0 ', syll_f0)
            
#                 print(syll_f0)
                
#                 break #debug

                # create new array to hold rounded values
                syll_f0_dec = np.zeros(syll_f0.shape)
                # round all values to dec dp
                np.around(syll_f0, decimals=dec, out=syll_f0_dec)
                # multiply by 10^dec to shift dp dec places to the right
                syll_f0_dec = syll_f0_dec * (10**dec)
                # cast to int to ensure precise number representation in memory
                syll_f0_dec = syll_f0_dec.astype(int)

                # write out csv file of f0_raw values - specify format as %u for values to be written as int
                # add syllable loop index to end of filename
                filename_noext, _ = os.path.splitext(filename)
                output_file_extension = '.csv'
                output_file_name = ''.join([filename_noext, '.s', format(i, '02d'), '_', syll[0], 
                                            output_file_extension])  
                np.savetxt(os.path.join(directory_path_f0raw, output_file_name), syll_f0_dec, delimiter=',', fmt='%u')
            
            # syll_label contains a '#' in it (an unvoiced region), skip to next syllable
            else:
                continue        
    else:
        continue
        
print('done')

done


<h3> Make Combo Source and Target Syllable Input Files</h3>

In [74]:
# the above code makes output files with one row per syllable
# we can't use these as-is, as we need to read the lines in as pairs, so source and target must have equal num of rows
# next step is to pair the files using the phrase and intensities in the filenames

# source: 10 phrases of i00 intensity across e01 to e08 - each phrase is said 8 times, neutrally
# target: 10 phrases of i01-i05 intensity for e02 - each phrase is said 5 times, expressively (5 times)
# so for each utterance (8 of) of each 'p' source phrase (10 of), copy it 5 times, matched with i01-i05 of 'p' target
# P(10) > E(8) > I(5)

# build paths and open output files
# path to input files directories
# input_directory_path = '/Users/robinson/Dropbox/anasynth/_data/emoVC/Olivia2006/f0_raw'
input_directory_path = '/Users/robinson/Dropbox/anasynth/_data/emoVC/Olivia2006/f0_raw_syllable'
# define filename components
# Olivia2006.e02.p01.i01.csv
input_file_root = 'Olivia2006'
input_file_extension = '.csv'

# define output filenames and paths
output_directory = os.path.join(input_directory_path, 'out')
if not os.path.exists(output_directory):
    os.mkdir(output_directory)
# output filenames
filename_source = 'source.txt'
filename_target = 'target.txt'
filename_log = 'log.txt'
# open output files in subdirectory of input files directory (must create manually)
fs = open(os.path.join(output_directory, filename_source), 'w')
ft = open(os.path.join(output_directory, filename_target), 'w')
fo = open(os.path.join(output_directory, filename_log), 'w')


# pass it a symbol string 'p' / 'e' / 'i' with range, or a syllable code string
# it finds all files in a directory that have this in their filename, and returns their filenames as a set
def getSet(symbol, num_from=None, num_to=None):
    # path to input files directory
    directory = os.fsencode(input_directory_path)
    
    # filepath_list = []
    filename_list = []
    
    # for each csv file in directory (each csv file has one sequence of f0 raw values in it)
    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        if filename.endswith('.csv'): 
            # build filepath (should use file var here really)
            # filepath = os.path.join(input_directory_path, filename)
            
            # if num_from is set, then it's either a p/e/i, so loop the range specified
            if num_from != None:
                for i in range(num_from, num_to + 1):
                    if ''.join(['.', symbol, format(i, '02d')]) in filename:
                        filename_list.append(filename)
            # if num_from is not set, then it's a syllable symbol specified
            else:
                if ''.join(['_', symbol, '.']) in filename:
                        filename_list.append(filename)

    # return a set of unique filenames that satisfy the given parameters
    return set(filename_list)


# #####################
# DEFINE PARAMETERS

# define phrase range
phrase_from = 1
phrase_to = 10
# define source and target emotion ranges
source_emotion_from = 1
source_emotion_to = 8
target_emotion_from = 2
target_emotion_to = 2
# define source and target intensity ranges
source_intensity_from = 0
source_intensity_to = 0
target_intensity_from = 1
target_intensity_to = 2

# END PARAMETERS
# #######################


# SOURCE
# create lists of sets for each phrase, emotion, intensity and syllable code
set_phrases = getSet('p', phrase_from, phrase_to)
set_source_emotions = getSet('e', source_emotion_from, source_emotion_to)
set_target_emotions = getSet('e', target_emotion_from, target_emotion_to)
set_source_intensities = getSet('i', source_intensity_from, source_intensity_to)
set_target_intensities = getSet('i', target_intensity_from, target_intensity_to)

# print(set_source_intensities)

# that do too, then I just  - do this for all syllablesfor each syllable, get set of source filenames 
# which satisfy the parameters, and a set of target filenames that do too, then I just make a set of filename pairs 
# with a loop (for each filename in source set, match with a filename in target set) - do this for all syllables

# get unique list of syllables
all_syllables_set = set([x for x in all_syllables])
# print(len(set_one_syllable))
# print(all_syllables_set)

# for each syllable
for syll in all_syllables_set:
    set_one_syllable = getSet(syll)
#     print(set_one_syllable)

    # get a set of source filenames which satisfy the parameters - note: returned set can be empty
    if set_one_syllable & set_phrases & set_source_emotions & set_source_intensities:
        set_sources = set.intersection(set_one_syllable, set_phrases, set_source_emotions, set_source_intensities)
#         print(set_sources)
    else:
        continue
    
    # get a set of target filenames which satisfy the parameters
    if set_one_syllable & set_phrases & set_target_emotions & set_target_intensities:
        set_targets = set.intersection(set_one_syllable, set_phrases, set_target_emotions, set_target_intensities)
#         print(set_targets)
    else:
        continue
    
    # make a set of filename pairs (for every filename in source set, match with every filename in target set)
    for source_file in set_sources:
        for target_file in set_targets:
            
            # build the source file path
            source_file_path = os.path.join(input_directory_path, source_file)           
            # build the target file path
            target_file_path = os.path.join(input_directory_path, target_file)
            # if this file doesn't exist, break out of syllable loop and try next one
            if not os.path.isfile(target_file_path) or os.stat(target_file_path).st_size== 0 or not os.path.isfile(source_file_path) or os.stat(source_file_path).st_size == 0:
                break
                

            # load the source file and extract vars
            source_f0_raw = np.loadtxt(source_file_path, dtype='int')
            # reshape to have two indices, the first being a constant so all values belong to the same 'row'
            source_f0_raw = source_f0_raw.reshape((1, source_f0_raw.shape[0]))                    
            # append it to output file as a new row, with space delimiter between elements, format unsigned int
            np.savetxt(fs, source_f0_raw, delimiter=' ', fmt='%u')

            # load the target file and extract vars
            target_f0_raw = np.loadtxt(target_file_path, dtype='int')
            # reshape to have two indices, the first being a constant so all values belong to the same 'row'
            target_f0_raw = target_f0_raw.reshape((1, target_f0_raw.shape[0]))                    
            # append it to output file as a new row, with space delimiter between elements, format unsigned int
            np.savetxt(ft, target_f0_raw, delimiter=' ', fmt='%u')
            

            # write input and output file pair to log file
            logstring = source_file_path + '   ' + target_file_path
            print(logstring, file=fo)

# close the output files
fs.close()
ft.close()
fo.close()

print('done')

done


In [75]:
# shuffle the source/target pairs and split them out into train/val/test files

# set ratios for train/val/test split e.g. 0.6, 0.2, 0.2
train_split = 0.6
val_split = 0.2
test_split = 0.2

# open source and target input files to read from
fs = open(os.path.join(output_directory, filename_source), 'r')
ft = open(os.path.join(output_directory, filename_target), 'r')

# get line counts of files (source and target will be the same, so just need to check one of them)
with open(os.path.join(output_directory, filename_source)) as f:
    f_lines = sum(1 for _ in f)
    # set index values for train, val and test
    train_lines = int(f_lines // (1 / train_split))
    val_lines = int(f_lines // (1 / val_split))
    test_lines = f_lines - train_lines - val_lines # whatever is left

# double check that source and target have the same number of lines
with open(os.path.join(output_directory, filename_target)) as f2:
    f_lines2 = sum(1 for _ in f2)
    if f_lines != f_lines2:
        raise ValueError('Not the same')
    
# open source and target input files to read from
fs = open(os.path.join(output_directory, filename_source), 'r')
ft = open(os.path.join(output_directory, filename_target), 'r')

# read the source and target input files line by line, stripping all whitespace and empty lines
source_data = fs.read().strip().split('\n')
# print(type(source_data))
# print(len(source_data)) #6597
target_data = ft.read().strip().split('\n')
# print(len(target_data)) #6597

# make a list of tuples, each holding a pair of source and target strings
merged_data = list(zip(source_data, target_data))
# shuffle the tuples (preserving the pairing) to ensure a good mix of p/e/i in each set
random.shuffle(merged_data)
# print(len(merged_data)) #6597

# seperate the tuples into two lists of source and target lines
train_data_source = [x[0] for x in merged_data[:train_lines]]
train_data_target = [x[1] for x in merged_data[:train_lines]]
val_data_source = [x[0] for x in merged_data[train_lines:(train_lines+val_lines)]]
val_data_target = [x[1] for x in merged_data[train_lines:(train_lines+val_lines)]]
test_data_source = [x[0] for x in merged_data[(train_lines+val_lines):]]
test_data_target = [x[1] for x in merged_data[(train_lines+val_lines):]]

print(len(train_data_source))
print(len(train_data_target))
# print(len(val_data_source))
# print(len(val_data_target))
# print(len(test_data_source))
# print(len(test_data_target))

# make train, test, dev, model directories
train_dir = os.path.join(output_directory, 'train')
dev_dir = os.path.join(output_directory, 'dev')
test_dir = os.path.join(output_directory, 'test')
model_dir = os.path.join(output_directory, 'model')
if not os.path.exists(train_dir):
    os.mkdir(train_dir)
if not os.path.exists(dev_dir):
    os.mkdir(dev_dir)
if not os.path.exists(test_dir):
    os.mkdir(test_dir)
if not os.path.exists(model_dir):
    os.mkdir(model_dir)
    
# open output files to write to
f_train_source = open(os.path.join(train_dir, 'train_source.txt'), 'w')
f_train_target = open(os.path.join(train_dir, 'train_target.txt'), 'w')
f_val_source = open(os.path.join(dev_dir, 'val_source.txt'), 'w')
f_val_target = open(os.path.join(dev_dir, 'val_target.txt'), 'w')
f_test_source = open(os.path.join(test_dir, 'test_source.txt'), 'w')
f_test_target = open(os.path.join(test_dir, 'test_target.txt'), 'w')

# print(train_data_source)

# write each of the lists to the opened files
print(len([line for line in train_data_source]))
print(len([line for line in train_data_target]))

[print(line, file=f_train_source) for line in train_data_source]
[print(line, file=f_train_target) for line in train_data_target]
[print(line, file=f_val_source) for line in val_data_source]
[print(line, file=f_val_target) for line in val_data_target]
[print(line, file=f_test_source) for line in test_data_source]
[print(line, file=f_test_target) for line in test_data_target]

# close the input source and target files
fs.close()
ft.close()

# close the output files
f_train_source.close()
f_train_target.close()
f_val_source.close()
f_val_target.close()
f_test_source.close()
f_test_target.close()

print('fs_lines = ' + str(f_lines))
print('train_lines = ' + str(train_lines))
print('val_lines = ' + str(val_lines))
print('test_lines = ' + str(test_lines))
print('done')

1592
1592
1592
1592
fs_lines = 2654
train_lines = 1592
val_lines = 530
test_lines = 532
done


<h3> Make Vocabulary Inputs

In [76]:
for file in [filename_source, filename_target]:

    # open output files in subdirectory of input files directory (must create manually)
    fs = open(os.path.join(output_directory, file), 'r')

    # read the source and target input files line by line, stripping all whitespace and empty lines
    source_data = fs.read().strip().split('\n')

    # set min and max initial values
    source_data_min = float('Inf')
    source_data_max = 0.0

    for i in range(len(source_data)):
        source_array = np.array([int(x) for x in source_data[i].split(' ')])
        if source_array.max() > source_data_max:
            source_data_max = source_array.max()
        if np.min(source_array[np.nonzero(source_array)]) < source_data_min:
            source_data_min = np.min(source_array[np.nonzero(source_array)])

    # print range of integers from min to max found in files
    range_size = (source_data_max - source_data_min) + 1
    samples = np.linspace(source_data_min, source_data_max, num=range_size, endpoint=True, retstep=False, dtype=int)
    print(samples)

    # save vocabulary input files to train_dir
    filename_noext, _ = os.path.splitext(file)
    np.savetxt(os.path.join(train_dir, filename_noext + '_vocab_input.txt'), samples, delimiter=' ', fmt='%u')

    
# delete the input source and target files
# os.remove(os.path.join(output_directory, filename_source))
# os.remove(os.path.join(output_directory, filename_target))

# now run the vocabulary script to make the proper vocab files

[ 50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67
  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85
  86  87  88  89  90  91  92  93  94  95  96  97  98  99 100 101 102 103
 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
 284 285 286 287 288 289 290 291 292 293 294 295 29