In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from utils import parse_filelist
from text import cmudict
from text.converters import text_to_arpabet, check_arpabet
from paths import DATA_DIR

data_dir = DATA_DIR / "LJSpeech-1.1"
splits_dir = Path.cwd() / "resources/filelists/ljspeech"
cmudict_path = 'resources/cmu_dictionary'

dictionary = cmudict.CMUDict(cmudict_path)

# Create new filelists (arpabet convertible samples)

In [None]:
#metadat.csv
filepaths_and_text = parse_filelist(data_dir / "metadata.csv", split_char='|')
df = pd.DataFrame(np.array(filepaths_and_text), columns=["id", "transcript", "norm_transcript"])
for idx in range(10):
    id = df.iloc[idx]["id"]
    if df.loc[idx, "transcript"] != df.loc[idx, "norm_transcript"]:
        print(f"ID: {id}, idx: {idx}")
        print(f"Original: {df.loc[idx, 'transcript']}")
        print(f"Normalized: {df.loc[idx, 'norm_transcript']}")
df.head()

In [None]:
def get_split_df(filename:str="train.txt"):
    filepaths_and_text = parse_filelist(splits_dir / filename, split_char='|')
    split_df = pd.DataFrame(np.array(filepaths_and_text), columns=["id", "text"])
    split_df["id"] = split_df["id"].apply(lambda x: x.split("/")[-1].split(".")[0])
    return split_df

train_df = get_split_df("train.txt")
valid_df = get_split_df("valid.txt")
test_df = get_split_df("test.txt")

_ = train_df.merge(df, on="id", how="left")
print("splits 'text' is metadata 'norm_transcript': ", np.all(_["norm_transcript"] == _["text"]))

print(f"Train samples: {len(train_df)}, Validation samples: {len(valid_df)}, Test samples: {len(test_df)}")
print(f"Train ratio: {len(train_df) / (len(train_df) + len(valid_df) + len(test_df)):.3f}, "
      f"Validation ratio: {len(valid_df) / (len(train_df) + len(valid_df) + len(test_df)):.3f}, "
      f"Test ratio: {len(test_df) / (len(train_df) + len(valid_df) + len(test_df)):.3f}")

In [None]:
def good_bad_df(split_df):
    """
    Transcribe the samples from the dataframe to ARPabet
    and return two dataframes:
    1. good_samples_df: samples with valid ARPAbet
    2. bad_samples_df: samples with invalid ARPAbet
    """
    good_samples = []
    bad_samples = []
    for idx in split_df.index:
        id = split_df.loc[idx]["id"]
        text = split_df.loc[idx, "text"]
        #text = "Turn left on {HH AW1 S S T AH0 N} Street."
        cleaner_names=["english_cleaners_v2"]
        arpabets = text_to_arpabet(text, dictionary, cleaner_names)
        arpabets = check_arpabet(arpabets, remove_punctuation=True)
        if arpabets is None:
            bad_samples.append({"id": id,
                                "text": text,
                                "arpabets": arpabets})
        else:
            good_samples.append({"id": id,
                                "text": text,
                                "arpabets": arpabets})
    good_samples_df = pd.DataFrame(good_samples)
    bad_samples_df = pd.DataFrame(bad_samples)
    return good_samples_df, bad_samples_df

train_good_df, train_bad_df = good_bad_df(train_df)
valid_good_df, valid_bad_df = good_bad_df(valid_df)
test_good_df, test_bad_df = good_bad_df(test_df)
print(f"Train good samples: {len(train_good_df)}, \
    Train conversion rate: {len(train_good_df) / len(train_df):.3f}")
print(f"Validation good samples: {len(valid_good_df)}, \
    Validation conversion rate: {len(valid_good_df) / len(valid_df):.3f}")
print(f"Test good samples: {len(test_good_df)}, \
    Test conversion rate: {len(test_good_df) / len(test_df):.3f}")

n_valid = (len(train_good_df) + len(valid_good_df) + len(test_good_df))
print(f"Train ratio: {len(train_good_df) / n_valid:.3f}, "
      f"Validation ratio: {len(valid_good_df) / n_valid:.3f}, "
      f"Test ratio: {len(test_good_df) / n_valid:.3f}")

In [None]:
train_good_df.head(10)

In [None]:
def write_split_file(filepath, splits_df):
    lines = []
    for row in splits_df[["id", "text"]].values:
        id = row[0]
        text = row[1]
        line = f"DUMMY/{id}.wav|{text}\n"
        lines.append(line)
    with open(filepath, "w") as file:
        file.writelines(lines)
    print(f"Filelist written to {filepath}")


#write_split_file(splits_dir / "train_v0.txt", train_good_df)
#write_split_file(splits_dir / "valid_v0.txt", valid_good_df)
#write_split_file(splits_dir / "test_v0.txt", test_good_df)

# ARPabet to IPA ternary traits

In [None]:
train_df = get_split_df("train_v0.txt")
valid_df = get_split_df("valid_v0.txt")
test_df = get_split_df("test_v0.txt")
print(f"Train samples: {len(train_df)}, Validation samples: {len(valid_df)}, Test samples: {len(test_df)}")

In [None]:
import panphon
from utils import intersperse
from text.cleaners import _punctuation_list
from text.converters import arpabet2ipa

ft = panphon.FeatureTable()

#'ɚ' and 'ɝ' were not recognized by panphon, we replaced them with 'ɜ˞' and 'ə˞' respectively
# in the arpabet2ipa dictionary
print(ft.validate_word('ɚ'))
print(ft.validate_word('ɝ'))
print(ft.validate_word('ɜ˞'))
print(ft.validate_word('ə˞'))

# Check if all ARPAbet symbols used in the dictionary are ipa translatable
for k, v in arpabet2ipa.items():
    if not ft.validate_word(v):
        print(v)

In [None]:
from text.converters import text_to_ipa, ipa_to_ternary
from utils import intersperse

add_blank = True

In [None]:
id = 'LJ001-0001'
text = train_df.loc[train_df["id"] == id, "text"].values[0]
print(text)
ipawords_list = text_to_ipa(text, dictionary, cleaner_names=["english_cleaners_v2"], remove_punctuation=False)
if add_blank:
    ipawords_list = intersperse(ipawords_list, " ")
print(ipawords_list)
ternary_emb = ipa_to_ternary(ipawords_list)
print(ternary_emb.shape)
print(ternary_emb)

In [None]:
#for id in train_df["id"].values:
#    text = train_df.loc[train_df["id"] == id, "text"].values[0]
#    ipawords_list = text_to_ipa(text, dictionary, cleaner_names=["english_cleaners_v2"], remove_punctuation=False)
#    if add_blank:
#        ipawords_list = intersperse(ipawords_list, " ")
#    ternary_emb = ipa_to_ternary(ipawords_list)

In [None]:
for id in valid_df["id"].values:
    text = valid_df.loc[valid_df["id"] == id, "text"].values[0]
    ipawords_list = text_to_ipa(text, dictionary, cleaner_names=["english_cleaners_v2"], remove_punctuation=False)
    if add_blank:
        ipawords_list = intersperse(ipawords_list, " ")
    ternary_emb = ipa_to_ternary(ipawords_list)

In [None]:
for id in test_df["id"].values:
    text = test_df.loc[test_df["id"] == id, "text"].values[0]
    ipawords_list = text_to_ipa(text, dictionary, cleaner_names=["english_cleaners_v2"], remove_punctuation=False)
    if add_blank:
        ipawords_list = intersperse(ipawords_list, " ")
    ternary_emb = ipa_to_ternary(ipawords_list)

# Audio to art features

In [None]:
import torch
from sparc import load_model
from huggingface_hub import hf_hub_download

# Since we don't have internet access on jean zay, 
# we download the model checkpoint from HuggingFace

def download_huggingface(file_name):
    return hf_hub_download(repo_id="cheoljun95/Speech-Articulatory-Coding", filename=file_name,)

model_name = "model_english_1500k"
ckpt = download_huggingface(f"{model_name}.ckpt")
ckpt = torch.load(ckpt)

# Also need to download the speech model
from transformers import WavLMModel

speech_model = WavLMModel.from_pretrained("microsoft/wavlm-large")
#speech_model.save_pretrained("ckpt/wavlm-large")

# Modify default ckpt, so as to load the speech model from local path on jean zay
ckpt["config"]["speech_model"] = "./ckpt/wavlm-large"
#torch.save(ckpt, "ckpt/sparc_en.ckpt")

In [None]:
from sparc import load_model
from IPython.display import Audio

coder = load_model(ckpt="ckpt/sparc_en.ckpt", device= "cpu")
#coder = load_model("feature_extraction", device= "cpu")  # returns 1024 spk_emb...

In [None]:
audio_fp = data_dir / "LJ001-0001.wav"
code = coder.encode(audio_fp, concat=True)
#features = 12 EMA + pitch + loudness + periodicity
for name, values in code.items():
    print(f"{name}: {values.shape}")

In [None]:
import random
import numpy as np

import torch
import torchaudio as ta

from typing import List, Tuple
from pathlib import Path

from sparc import load_model

from text import cmudict
from text.converters import text_to_ipa, ipa_to_ternary
from text.symbols import symbols
from utils import parse_filelist, intersperse
#from model.utils import fix_len_compatibility
from configs.params_v0 import seed as random_seed
from configs.params_v0 import (wavs_dir, artic_dir,
                               sparc_ckpt_path)

#import sys
#sys.path.insert(0, 'hifi-gan')
#from meldataset import mel_spectrogram

device = "cuda" if torch.cuda.is_available() else "cpu"
spk_emb_save_dir = Path(artic_dir)/"spk_emb"
spk_emb_save_dir.mkdir(exist_ok=True)
ft_save_dir = Path(artic_dir)/"emasrc"
ft_save_dir.mkdir(exist_ok=True)
coder = load_model(ckpt=sparc_ckpt_path, 
                    device=device)

class TextArticDataset(torch.utils.data.Dataset):
    def __init__(self, filelist_path, cmudict_path, add_blank=True,
                 sample_rate=22050,
                 ):
        self.filepaths_and_text = parse_filelist(filelist_path)
        self.cmudict = cmudict.CMUDict(cmudict_path)
        self.add_blank = add_blank
        self.sample_rate = sample_rate
        random.seed(random_seed)
        random.shuffle(self.filepaths_and_text)

    def get_pair(self,
                 filepath_and_text:List[str],
                 from_preprocessed:bool=True,
                 )-> Tuple[torch.IntTensor, torch.FloatTensor]: # shape: (n_ipa_feats, seq_len), (n_art_feats, T)
        filepath, text = filepath_and_text[0], filepath_and_text[1]
        text = self.get_text(text, add_blank=self.add_blank)
        art = self.get_art(filepath, from_preprocessed=from_preprocessed)
        return (text, art)

    def get_text(self,
                 text:str,
                 add_blank:bool=True
                 )-> torch.IntTensor: # shape: (n_ipa_feats, seq_len)
        ipawords_list = text_to_ipa(text, dictionary=self.cmudict, 
                                    cleaner_names=["english_cleaners_v2"], 
                                    remove_punctuation=False)
        if add_blank:
            ipawords_list = intersperse(ipawords_list, " ")
        ternary_emb = ipa_to_ternary(ipawords_list)
        ternary_emb = torch.IntTensor(ternary_emb).T  # shape: (n_ipa_feats, seq_len)
        return ternary_emb
    
    def get_art(self,
                filepath:str,
                from_preprocessed:bool=True
                )-> torch.FloatTensor: #shape: (n_art_feats, T)
        art_filename = f"{Path(filepath).stem}.npy"
        if from_preprocessed: # Favor loading precomputed features
            preprocessed_fp = Path(artic_dir) / "emasrc" / art_filename
            if preprocessed_fp.exists():
                art = np.load(preprocessed_fp)[:, :14] # Extract only the first 14 articulatory features
            else:
                raise FileNotFoundError(f"Preprocessed file {preprocessed_fp} does not exist.")
        else: # Long inference time better to precompute the features
            filepath = filepath.replace("DUMMY/", str(wavs_dir) + "/")
            with torch.no_grad():
                outputs = coder.encode(filepath, concat=True)
            # Save the outputs to avoid recomputing
            if not ft_save_dir.exists():
                ft_save_dir.mkdir(parents=True, exist_ok=True)
            if not spk_emb_save_dir.exists():
                spk_emb_save_dir.mkdir(parents=True, exist_ok=True)
            ft_save_path = ft_save_dir/art_filename
            spk_emb_save_path = spk_emb_save_dir/art_filename
            np.save(ft_save_path, outputs["features"])
            np.save(spk_emb_save_path, outputs["spk_emb"])
            # Extract the first 14 features
            art = outputs["features"][:, :14]
        return torch.FloatTensor(art).T # shape: (n_art_feats, T)

    def __getitem__(self, index):
        text, art = self.get_pair(self.filepaths_and_text[index], from_preprocessed=True)
        item = {'y': art, 'x': text}
        return item

    def __len__(self):
        return len(self.filepaths_and_text)

    def sample_test_batch(self, size):
        idx = np.random.choice(range(len(self)), size=size, replace=False)
        test_batch = []
        for index in idx:
            test_batch.append(self.__getitem__(index))
        return test_batch


In [None]:
from configs.params_v0 import (cmudict_path,
                               train_filelist_path,
                               valid_filelist_path, test_filelist_path)

train_dataset = TextArticDataset(
    filelist_path=train_filelist_path,
    cmudict_path=cmudict_path,
    add_blank=True,
)
valid_dataset = TextArticDataset(
    filelist_path=valid_filelist_path,
    cmudict_path=cmudict_path,
    add_blank=True,
)
test_dataset = TextArticDataset(
    filelist_path=test_filelist_path,
    cmudict_path=cmudict_path,
    add_blank=True,
)

In [None]:
for i,e in enumerate(train_dataset.filepaths_and_text):
    if "LJ001-0001.wav" in e[0]:
        print("found")
        fp_and_txt = e
        idx = i

In [None]:
(text, art) = train_dataset.get_pair(fp_and_txt, from_preprocessed=False)

In [None]:
text.shape, art.shape

In [None]:
batch = [train_dataset[idx]]

In [None]:
from model.utils import fix_len_compatibility

B = len(batch)
y_max_length = max([item['y'].shape[-1] for item in batch])
y_max_length = fix_len_compatibility(y_max_length)
x_max_length = max([item['x'].shape[-1] for item in batch])
n_feats = batch[0]['y'].shape[-2]
n_ipa_feats = batch[0]['x'].shape[-2]

y = torch.zeros((B, n_feats, y_max_length), dtype=torch.float32)
x = torch.zeros((B, n_ipa_feats, x_max_length), dtype=torch.long)
y_lengths, x_lengths = [], []

for i, item in enumerate(batch):
    y_, x_ = item['y'], item['x']
    y_lengths.append(y_.shape[-1])
    x_lengths.append(x_.shape[-1])
    y[i, :, :y_.shape[-1]] = y_
    x[i, :, :x_.shape[-1]] = x_

y_lengths = torch.LongTensor(y_lengths)
x_lengths = torch.LongTensor(x_lengths)

In [None]:
x_lengths, y_lengths, x.shape, y.shape

In [None]:
from model.utils import fix_len_compatibility

class TextArticBatchCollate(object):
    def __call__(self, batch):
        B = len(batch)
        y_max_length = max([item['y'].shape[-1] for item in batch])
        y_max_length = fix_len_compatibility(y_max_length)
        x_max_length = max([item['x'].shape[-1] for item in batch])
        n_feats = batch[0]['y'].shape[-2]

        y = torch.zeros((B, n_feats, y_max_length), dtype=torch.float32)
        x = torch.zeros((B, x_max_length), dtype=torch.long)
        y_lengths, x_lengths = [], []

        for i, item in enumerate(batch):
            y_, x_ = item['y'], item['x']
            y_lengths.append(y_.shape[-1])
            x_lengths.append(x_.shape[-1])
            y[i, :, :y_.shape[-1]] = y_
            x[i, :x_.shape[-1]] = x_

        y_lengths = torch.LongTensor(y_lengths)
        x_lengths = torch.LongTensor(x_lengths)
        return {'x': x, 'x_lengths': x_lengths, 'y': y, 'y_lengths': y_lengths}
    
batch_collate = TextArticBatchCollate()

from torch.utils.data import DataLoader

batch_size = 1

loader = DataLoader(dataset=train_dataset, batch_size=batch_size,
                        collate_fn=batch_collate, drop_last=True,
                        num_workers=4, shuffle=False)

# Create v4 filelists

In [None]:
def get_filestem(filepath:str) -> str:
    """
    Extract the filestem from the filepath.
    """
    return filepath.split("/")[-1].split(".")[0]

def art_filepath(filestem:str, prefix:str) -> str:
    return prefix + f"{filestem}.npy"

def write_filelist(filepath, filelist, sep='|'):
    lines = []
    for e in filelist:
        lines.append(f"{e[0]}{sep}{e[1]}\n")
    with open(filepath, "w") as file:
        file.writelines(lines)
    print(f"Filelist written to {filepath}")

In [None]:
#For LJSpeech dataset
from paths import FILELISTS_DIR
from utils import parse_filelist

dataset = "ljspeech"
DATASET = "LJSpeech-1.1"

for split in ["train", "valid", "test"]:
    v0_filelist_path = FILELISTS_DIR / f"{dataset}/{split}_v0.txt"
    v0_filelist = parse_filelist(v0_filelist_path, split_char='|')
    art_prefix = f"DUMMY/{DATASET}/encoded_audio_en/emasrc/"
    v4_filelist = [[f"{art_filepath(get_filestem(fp), art_prefix)}", text] for fp, text in v0_filelist]
    v4_filelist_path = FILELISTS_DIR / f"{dataset}/{split}_v4.txt"
    #write_filelist(v4_filelist_path, v4_filelist, sep='|')

In [None]:
from utils_dataset.mngu0 import get_mngu0_sentence
from pathlib import Path
from paths import DATA_DIR, FILELISTS_DIR
from utils import parse_filelist

dataset = "MNGU0"
speakers=["s1"]

for spk in speakers:
    sentence_dir = DATA_DIR / dataset / "src_data" / spk / "phone_labels"
    v1_filelist_path = FILELISTS_DIR / f"{dataset}/{spk}_v1.txt"
    v1_filelist = parse_filelist(v1_filelist_path, split_char='|')
    filelist = []
    for e in v1_filelist:
        wav_fp = e[0]
        filestem = get_filestem(wav_fp)
        sentence = get_mngu0_sentence(sentence_dir / f"{filestem}.utt")
        ema_fp = f"DUMMY/{dataset}/arttts/{spk}/encoded_audio_en/emasrc/{filestem}.npy"
        filelist.append([ema_fp, sentence])
    v4_filelist_path = FILELISTS_DIR / f"{dataset}/{spk}_v4.txt"
    #write_filelist(v4_filelist_path, filelist, sep='|')

In [None]:
import joblib
import utils_ema.ema_dataset
from pathlib import Path
from paths import DATA_DIR, FILELISTS_DIR
from utils import parse_filelist

dataset = "mocha_timit"
speakers=["faet0", "ffes0", "fsew0", "maps0", "mjjn0", "msak0"]
processed_data_dir = DATA_DIR / dataset / "processed_data"
spkmetadata_filename = "mixed_speaker_metadata_100Hz.joblib"


for spk in speakers:
    #get sentences dict
    spkmeta = joblib.load(processed_data_dir / f"{spk}/{spkmetadata_filename}")
    ids = spkmeta.list_valid_ids()
    sentences_dict = {}
    for id in ids:
        sentencemeta = spkmeta.sentence_info[id]
        sentences_dict[sentencemeta.filestem] = sentencemeta.sentence
    #get filelist samples
    v1_filelist_path = FILELISTS_DIR / f"{dataset}/{spk}_v1.txt"
    v1_filelist = parse_filelist(v1_filelist_path, split_char='|')
    filelist = []
    for e in v1_filelist:
        wav_fp = e[0]
        filestem = get_filestem(wav_fp)
        ema_fp = f"DUMMY/{dataset}/arttts/{spk}/encoded_audio_en/emasrc/{filestem}.npy"
        sentence = sentences_dict.get(filestem, "No sentence found")
        filelist.append([ema_fp, sentence])
    v4_filelist_path = FILELISTS_DIR / f"{dataset}/{spk}_v4.txt"
    #write_filelist(v4_filelist_path, filelist, sep='|')

In [None]:
import joblib
import utils_ema.ema_dataset
from pathlib import Path
from paths import DATA_DIR, FILELISTS_DIR
from utils import parse_filelist

dataset = "MSPKA_EMA_ita"
speakers=["cnz", "lls", "olm"]
processed_data_dir = DATA_DIR / dataset / "processed_data"
spkmetadata_filename = "mixed_speaker_metadata_100Hz.joblib"

for spk in speakers:
    #get sentences dict
    spkmeta = joblib.load(processed_data_dir / f"{spk}/{spkmetadata_filename}")
    ids = spkmeta.list_valid_ids()
    sentences_dict = {}
    for id in ids:
        sentencemeta = spkmeta.sentence_info[id]
        sentences_dict[sentencemeta.filestem] = sentencemeta.sentence
    #get filelist samples
    v1_filelist_path = FILELISTS_DIR / f"{dataset}/{spk}_v1.txt"
    v1_filelist = parse_filelist(v1_filelist_path, split_char='|')
    filelist = []
    for e in v1_filelist:
        wav_fp = e[0]
        filestem = get_filestem(wav_fp)
        ema_fp = f"DUMMY/{dataset}/arttts/{spk}/encoded_audio_en/emasrc/{filestem}.npy"
        sentence = sentences_dict.get(filestem, "No sentence found")
        filelist.append([ema_fp, sentence])
    v4_filelist_path = FILELISTS_DIR / f"{dataset}/{spk}_v4.txt"
    #write_filelist(v4_filelist_path, filelist, sep='|')

# Create v2 filelists

In [None]:
def get_filestem(filepath:str) -> str:
    """
    Extract the filestem from the filepath.
    """
    return filepath.split("/")[-1].split(".")[0]

def wav_filepath(filestem:str, prefix:str) -> str:
    return prefix + f"{filestem}.wav"

def write_filelist(filepath, filelist, sep='|'):
    lines = []
    for e in filelist:
        lines.append(f"{e[0]}{sep}{e[1]}\n")
    with open(filepath, "w") as file:
        file.writelines(lines)
    print(f"Filelist written to {filepath}")

In [None]:
#For LJSpeech dataset
from paths import FILELISTS_DIR
from utils import parse_filelist

dataset = "ljspeech"
DATASET = "LJSpeech-1.1"

for split in ["train", "valid", "test"]:
    v0_filelist_path = FILELISTS_DIR / f"{dataset}/{split}_v0.txt"
    v0_filelist = parse_filelist(v0_filelist_path, split_char='|')
    wav_prefix = f"DUMMY/{DATASET}/wavs/"
    v2_filelist = [[f"{wav_filepath(get_filestem(fp), wav_prefix)}", text] for fp, text in v0_filelist]
    v2_filelist_path = FILELISTS_DIR / f"{dataset}/{split}_v2.txt"
    #write_filelist(v2_filelist_path, v2_filelist, sep='|')

In [None]:
#For LJSpeech dataset
from paths import FILELISTS_DIR
from utils import parse_filelist

dataset = "ljspeech"
DATASET = "LJSpeech-1.1"

for split in ["train", "valid", "test"]:
    v0_filelist_path = FILELISTS_DIR / f"{dataset}/{split}.txt"
    v0_filelist = parse_filelist(v0_filelist_path, split_char='|')
    wav_prefix = f"DUMMY/{DATASET}/wavs/"
    v2_filelist = [[f"{wav_filepath(get_filestem(fp), wav_prefix)}", text] for fp, text in v0_filelist]
    v2_filelist_path = FILELISTS_DIR / f"{dataset}/{split}_v2_full.txt"
    #write_filelist(v2_filelist_path, v2_filelist, sep='|')

In [None]:
from utils_dataset.mngu0 import get_mngu0_sentence
from pathlib import Path
from paths import DATA_DIR, FILELISTS_DIR
from utils import parse_filelist

dataset = "MNGU0"
speakers=["s1"]

for spk in speakers:
    sentence_dir = DATA_DIR / dataset / "src_data" / spk / "phone_labels"
    v1_filelist_path = FILELISTS_DIR / f"{dataset}/{spk}_v1.txt"
    v1_filelist = parse_filelist(v1_filelist_path, split_char='|')
    filelist = []
    for e in v1_filelist:
        wav_fp = e[0]
        filestem = get_filestem(wav_fp)
        sentence = get_mngu0_sentence(sentence_dir / f"{filestem}.utt")
        prefix = f"DUMMY/{dataset}/src_data/{spk}/wav_16kHz/"
        wav_fp = wav_filepath(filestem, prefix)
        filelist.append([wav_fp, sentence])
    v2_filelist_path = FILELISTS_DIR / f"{dataset}/{spk}_v2.txt"
    write_filelist(v2_filelist_path, filelist, sep='|')

In [None]:
import joblib
import utils_ema.ema_dataset
from pathlib import Path
from paths import DATA_DIR, FILELISTS_DIR
from utils import parse_filelist

dataset = "mocha_timit"
speakers=["faet0", "ffes0", "fsew0", "maps0", "mjjn0", "msak0"]
processed_data_dir = DATA_DIR / dataset / "processed_data"
spkmetadata_filename = "mixed_speaker_metadata_100Hz.joblib"


for spk in speakers:
    #get sentences dict
    spkmeta = joblib.load(processed_data_dir / f"{spk}/{spkmetadata_filename}")
    ids = spkmeta.list_valid_ids()
    sentences_dict = {}
    for id in ids:
        sentencemeta = spkmeta.sentence_info[id]
        sentences_dict[sentencemeta.filestem] = sentencemeta.sentence
    #get filelist samples
    v1_filelist_path = FILELISTS_DIR / f"{dataset}/{spk}_v1.txt"
    v1_filelist = parse_filelist(v1_filelist_path, split_char='|')
    filelist = []
    for e in v1_filelist:
        wav_fp = e[0]
        filestem = get_filestem(wav_fp)
        sentence = sentences_dict.get(filestem, "No sentence found")
        prefix = f"DUMMY/{dataset}/src_data/wavs/"
        wav_fp = wav_filepath(filestem, prefix)
        filelist.append([wav_fp, sentence])
    v2_filelist_path = FILELISTS_DIR / f"{dataset}/{spk}_v2.txt"
    #write_filelist(v2_filelist_path, filelist, sep='|')

In [None]:
import joblib
import utils_ema.ema_dataset
from pathlib import Path
from paths import DATA_DIR, FILELISTS_DIR
from utils import parse_filelist

dataset = "MSPKA_EMA_ita"
speakers=["cnz", "lls", "olm"]
processed_data_dir = DATA_DIR / dataset / "processed_data"
spkmetadata_filename = "mixed_speaker_metadata_100Hz.joblib"

for spk in speakers:
    #get sentences dict
    spkmeta = joblib.load(processed_data_dir / f"{spk}/{spkmetadata_filename}")
    ids = spkmeta.list_valid_ids()
    sentences_dict = {}
    for id in ids:
        sentencemeta = spkmeta.sentence_info[id]
        sentences_dict[sentencemeta.filestem] = sentencemeta.sentence
    #get filelist samples
    v1_filelist_path = FILELISTS_DIR / f"{dataset}/{spk}_v1.txt"
    v1_filelist = parse_filelist(v1_filelist_path, split_char='|')
    filelist = []
    for e in v1_filelist:
        wav_fp = e[0]
        filestem = get_filestem(wav_fp)
        sentence = sentences_dict.get(filestem, "No sentence found")        
        prefix = f"DUMMY/{dataset}/src_data/{spk}/"
        wav_fp = wav_filepath(filestem, prefix)
        filelist.append([wav_fp, sentence])
    v2_filelist_path = FILELISTS_DIR / f"{dataset}/{spk}_v2.txt"
    #write_filelist(v2_filelist_path, filelist, sep='|')