In [221]:
from pathlib import Path
import os
import shutil

# Load up sample paths and VCTK wav paths

In [222]:
main_folder = Path("/Users/jonojace/github/jonojace.github.io/IS22-speech-audio-corrector-samples/")

tts_g = main_folder / "vanillatts-grapheme-input"
sac_g = main_folder / "SAC-grapheme-input"
sac_us = main_folder / "SAC-speechcode-input-us-fem"
sac_scot = main_folder / "SAC-speechcode-input-scot-fem"

In [223]:
def num_from_posixpath(posixpath):
    p = str(posixpath.stem)
    if 'vanillatts' in str(posixpath.stem):
        num = p.split(' ')[0]
    else:
        num = p.split('-')[0]
    return int(num)

In [224]:
def sorted_paths(path_to_wav_folder, expected_num=78):
    # get wav paths
    paths = []
    for wav_path in path_to_wav_folder.glob('*.wav'):
        paths.append(wav_path)
    assert len(paths) == expected_num
    # sort by number 
    return sorted(paths, key=lambda x: num_from_posixpath(x))

In [225]:
def path_to_vctk_path(path, indir):
    path
    utt_id = str(path).split('|')[0].split('-')[-1]
    speaker = utt_id.split('_')[0]
    source_wav_path = indir / speaker / (utt_id + ".wav")
    return source_wav_path, utt_id + ".wav"

In [226]:
# copy vctk files from original vctk folder into new one
indir = Path("/Users/jonojace/VCTK/wav48")
sac_us_outdir = main_folder / "SAC-us-sourcewavs"
sac_scot_outdir = main_folder / "SAC-scot-sourcewavs"

for tts_g_path, sac_g_path, sac_us_path, sac_scot_path in zip(sorted_paths(tts_g), sorted_paths(sac_g), sorted_paths(sac_us), sorted_paths(sac_scot)):
    assert num_from_posixpath(tts_g_path) == num_from_posixpath(sac_g_path) == num_from_posixpath(sac_us_path) == num_from_posixpath(sac_scot_path)
    
    # get relevant info
    target_word = str(tts_g_path).split('vanillatts-')[-1].split('.wav')[0]
    sac_us_source_utt_id = str(sac_us_path).split('|')[0].split('-')[-1]
    sac_scot_source_utt_id = str(sac_scot_path).split('|')[0].split('-')[-1]
    
    # get path to vctk source wavs for sac us and sac scot generated sentences
    sac_us_source_full_path, sac_us_source_wav_file = path_to_vctk_path(sac_us_path, indir)
    sac_scot_source_full_path, sac_scot_source_wav_file = path_to_vctk_path(sac_scot_path, indir)
    
    sac_us_source_wav_path = sac_us_outdir / sac_us_source_wav_file
    sac_scot_source_wav_path = sac_us_outdir / sac_scot_source_wav_file
    
    # copy source wavs to folder
    shutil.copyfile(sac_us_source_full_path, sac_us_source_wav_path)
    shutil.copyfile(sac_scot_source_full_path, sac_scot_source_wav_path)
    
    # print html for this row
    columns = []
    columns.append('<tr>')
    # target word
    columns.append(f'<td>{target_word}</td>')
    #tts_g
    columns.append(f'<td><audio controls="controls"><source src="{get_rel_path(str(tts_g_path))}" type="audio/wav" /></audio></td>')
    #sac_g
    columns.append(f'<td><audio controls="controls"><source src="{get_rel_path(str(sac_g_path))}" type="audio/wav" /></audio></td>')
    #sac_us 
    columns.append(f'<td><audio controls="controls"><source src="{get_rel_path(str(sac_us_path))}" type="audio/wav" /></audio><audio controls="controls"><source src="{get_rel_path(str(sac_us_source_wav_path))}" type="audio/wav" /></audio> {sac_us_source_utt_id}</td>')
    #sac_scot
    columns.append(f'<td><audio controls="controls"><source src="{get_rel_path(str(sac_scot_path))}" type="audio/wav" /></audio><audio controls="controls"><source src="{get_rel_path(str(sac_scot_source_wav_path))}" type="audio/wav" /></audio> {sac_scot_source_utt_id}</td>')
    columns.append('</tr>')
    print(" ".join(columns))

<tr> <td>afghan</td> <td><audio controls="controls"><source src="IS22-speech-audio-corrector-samples/vanillatts-grapheme-input/1 vanillatts-afghan.wav" type="audio/wav" /></audio></td> <td><audio controls="controls"><source src="IS22-speech-audio-corrector-samples/SAC-grapheme-input/1-how is afghan pronounced.wav" type="audio/wav" /></audio></td> <td><audio controls="controls"><source src="IS22-speech-audio-corrector-samples/SAC-speechcode-input-us-fem/1-how is <afghan> pronounced-p330_400|1.wav" type="audio/wav" /></audio><audio controls="controls"><source src="IS22-speech-audio-corrector-samples/SAC-us-sourcewavs/p330_400.wav" type="audio/wav" /></audio> p330_400</td> <td><audio controls="controls"><source src="IS22-speech-audio-corrector-samples/SAC-speechcode-input-scot-fem/1-how is <afghan> pronounced-p249_050|1.wav" type="audio/wav" /></audio><audio controls="controls"><source src="IS22-speech-audio-corrector-samples/SAC-us-sourcewavs/p249_050.wav" type="audio/wav" /></audio> p24

# Create html for one row of table (a target word)

In [227]:
def get_rel_path(local_path):
    """get relative path from local path"""
    return local_path[local_path.find("IS22"):]

In [228]:
columns = []
columns.append('<tr>')
# target word
columns.append(f'<td>{target_word}</td>')
#tts_g
columns.append(f'<td><audio controls="controls"><source src="{get_rel_path(str(tts_g_path))}" type="audio/wav" /></audio></td>')
#sac_g
columns.append(f'<td><audio controls="controls"><source src="{get_rel_path(str(sac_g_path))}" type="audio/wav" /></audio></td>')
#sac_us 
columns.append(f'<td><audio controls="controls"><source src="{get_rel_path(str(sac_us_path))}" type="audio/wav" /></audio><audio controls="controls"><source src="{get_rel_path(str(sac_us_source_wav_path))}" type="audio/wav" /></audio> {sac_us_source_utt_id}</td>')
#sac_scot
columns.append(f'<td><audio controls="controls"><source src="{get_rel_path(str(sac_scot_path))}" type="audio/wav" /></audio><audio controls="controls"><source src="{get_rel_path(str(sac_scot_source_wav_path))}" type="audio/wav" /></audio> {sac_scot_source_utt_id}</td>')
columns.append('</tr>')

In [229]:
" ".join(columns)

'<tr> <td>zone</td> <td><audio controls="controls"><source src="IS22-speech-audio-corrector-samples/vanillatts-grapheme-input/78 vanillatts-zone.wav" type="audio/wav" /></audio></td> <td><audio controls="controls"><source src="IS22-speech-audio-corrector-samples/SAC-grapheme-input/78-how is zone pronounced.wav" type="audio/wav" /></audio></td> <td><audio controls="controls"><source src="IS22-speech-audio-corrector-samples/SAC-speechcode-input-us-fem/78-how is <zone> pronounced-p341_363|1.wav" type="audio/wav" /></audio><audio controls="controls"><source src="IS22-speech-audio-corrector-samples/SAC-us-sourcewavs/p341_363.wav" type="audio/wav" /></audio> p341_363</td> <td><audio controls="controls"><source src="IS22-speech-audio-corrector-samples/SAC-speechcode-input-scot-fem/78-how is <zone> pronounced-p299_360|1.wav" type="audio/wav" /></audio><audio controls="controls"><source src="IS22-speech-audio-corrector-samples/SAC-us-sourcewavs/p299_360.wav" type="audio/wav" /></audio> p299_360

In [230]:
sac_us_source_wav_path

PosixPath('/Users/jonojace/github/jonojace.github.io/IS22-speech-audio-corrector-samples/SAC-us-sourcewavs/p341_363.wav')

# Save html to disk