In [None]:
from paths import DATA_DIR, LJLISTS_DIR, LOGS_DIR, FILELISTS_DIR
from pathlib import Path
import numpy as np
import re
import torch

from text import cmudict
from utils import parse_filelist, intersperse

from text.converters import ipa_to_ternary, text_to_ipa, traits_list
from panphon import FeatureTable

ft = FeatureTable()

cmudict_path = "resources/cmu_dictionary"
dictionary_cmu = cmudict.CMUDict(cmudict_path)
cleaner_names = ["english_cleaners_v2"]

## MNGU0

In [None]:
mngu0_dir = DATA_DIR / 'MNGU0' / "src_data" / "s1" / "phone_labels"

utt_files = sorted(list(mngu0_dir.glob('*.utt')))
lab_files = sorted(list(mngu0_dir.glob('*.lab')))

In [None]:
utt_file = utt_files[0]  # Example utt file
lab_file = lab_files[0]  # Example lab file

In [None]:
from utils_dataset.mngu0 import mngu02ipa, get_mngu0_sentence, get_mngu0_phnm3

sentence = get_mngu0_sentence(utt_file)
ipa_phnm3 = get_mngu0_phnm3(lab_file) # norm
ipawords_list = ['%'.join([elem[2] for elem in ipa_phnm3])]
ternary_emb_phnm = ipa_to_ternary(ipawords_list, merge_diphtongues=True)
ternary_emb_phnm = torch.FloatTensor(ternary_emb_phnm).T 
print(f"Sentence: {sentence}")
print(f"IPA Phones: {ipa_phnm3}")
print(f"IPA Phones: {len(ipa_phnm3)}")
print(f"Ternary Phones: {ternary_emb_phnm.shape}")

In [None]:
text = sentence
add_blank = False  # Whether to add a blank token between IPA symbols
merge_diphtongues = True  # Whether to merge diphthongs into single symbols

ipawords_list = text_to_ipa(
    text,
    dictionary=dictionary_cmu,
    cleaner_names=["english_cleaners_v2"],
    remove_punctuation=False,
)
if add_blank:
    ipawords_list = intersperse(ipawords_list, " ")
ternary_emb = ipa_to_ternary(
    ipawords_list, merge_diphtongues=merge_diphtongues
)
ternary_emb = torch.FloatTensor(ternary_emb).T  # shape: (n_ipa_feats, seq_len)
ternary_emb.shape

In [None]:
for k, v in mngu02ipa.items():
    emb = ipa_to_ternary([v], merge_diphtongues=True)
    if emb.shape[0] != 1:
        print(f"emb shape: {emb.shape}")
        print(f"Invalid IPA: {v} for MNGU0 phone {k}")

## Mocha

In [None]:
mocha_dir = DATA_DIR / 'mocha_timit'
mocha_trans_files = sorted(list(mocha_dir.glob('*.trans')))
mocha_phnm_files = sorted(list(mocha_dir.glob('*.phnm')))

trans_file = mocha_trans_files[0]  # Example trans file
phnm_file = mocha_phnm_files[0]  # Example phnm file

from utils_dataset.mocha import get_mocha_sentence, get_mocha_phnm3

sentence = get_mocha_sentence(trans_file)
ipa_phnm3 = get_mocha_phnm3(phnm_file)
print(f"Sentence: {sentence}")
print(f"Phonemes: {ipa_phnm3}")

In [None]:
ipawords_list = ['%'.join([elem[2] for elem in ipa_phnm3])]
ternary_emb_phnm = ipa_to_ternary(ipawords_list, merge_diphtongues=True)
ternary_emb_phnm = torch.FloatTensor(ternary_emb_phnm).T 
print(f"Sentence: {sentence}")
print(f"IPA Phones: {ipa_phnm3}")
print(f"IPA Phones: {len(ipa_phnm3)}")
print(f"Ternary Phones: {ternary_emb_phnm.shape}")

In [None]:
text = sentence
add_blank = False  # Whether to add a blank token between IPA symbols
merge_diphtongues = True  # Whether to merge diphthongs into single symbols

ipawords_list = text_to_ipa(
    text,
    dictionary=dictionary_cmu,
    cleaner_names=["english_cleaners_v2"],
    remove_punctuation=False,
)
if add_blank:
    ipawords_list = intersperse(ipawords_list, " ")
ternary_emb = ipa_to_ternary(
    ipawords_list, merge_diphtongues=merge_diphtongues
)
ternary_emb = torch.FloatTensor(ternary_emb).T  # shape: (n_ipa_feats, seq_len)
ternary_emb.shape

In [None]:
phonems = set()
mocha_phnm_files = sorted(list(mocha_dir.glob('*arttts/*/phnm3/*_phnm3.npy')))
for phnm_file in mocha_phnm_files:
    #phnm3 = get_mocha_phnm3(phnm_file)
    phnm3 = np.load(phnm_file)
    for s, e, phone in phnm3:
        phonems.add(phone)
        if phone != "." and not ft.validate_word(phone):
            print(f"Invalid IPA: {phone} in file {phnm_file.name}")
        #emb = ipa_to_ternary([phone], merge_diphtongues=True)
        #if emb.shape[0] != 1:
        #    print(f"emb shape: {emb.shape}")
        #    print(f"Invalid IPA: {phone} in file {phnm_file.name}")

In [None]:
for v in phonems:
    emb = ipa_to_ternary([v], merge_diphtongues=True)
    if emb.shape[0] != 1:
        print(f"emb shape: {emb.shape}")
        print(f"Invalid IPA: {v} for MNGU0 phone {k}")

## MSPKA

In [None]:
from utils_dataset.mspka import get_mspka_sentence, get_mspka_phnm3, mspka2ipa

mspka_dir = DATA_DIR / 'MSPKA_EMA_ita'
mspka_lab_files = sorted(list(mspka_dir.glob('*.lab')))
mspka_sent_files = sorted(list(mspka_dir.glob('list_sentences')))

lab_file = mspka_lab_files[0]  # Example lab file
sent_file = mspka_sent_files[0]  # Example phnm file

sentence = get_mspka_sentence(sent_file)
ipa_phnm3 = get_mspka_phnm3(lab_file)

#with open(sent_file, 'r', encoding='utf-8') as f:
#    sentence = f.read().strip()
#print(f"Sentence from file: {sentence}")

#mspka_phnms = set()
#
#for lab_file in mspka_lab_files:
#    sent, phnm3 = get_mspka_sentence_phnm3(lab_file)
#    for s, e, phone in phnm3:
#        mspka_phnms.add(phone)
#mspka_phnms

In [None]:
for lab_file in mspka_lab_files:
    sentence = get_mspka_sentence(lab_file)[0]
    phnm3 = get_mspka_phnm3(lab_file)
    for s, e, phone in phnm3:
        if phone != "." and not ft.validate_word(phone):
            print(f"Invalid IPA: {phone} in file {lab_file.name}")

In [None]:
ipawords_list = ['%'.join([elem[2] for elem in ipa_phnm3])]
ternary_emb_phnm = ipa_to_ternary(ipawords_list, merge_diphtongues=True)
ternary_emb_phnm = torch.FloatTensor(ternary_emb_phnm).T 
print(f"Sentence: {sentence}")
print(f"IPA Phones: {ipa_phnm3}")
print(f"IPA Phones: {len(ipa_phnm3)}")
print(f"Ternary Phones: {ternary_emb_phnm.shape}")

In [None]:
for k, v in mspka2ipa.items():
    emb = ipa_to_ternary([v], merge_diphtongues=True)
    if emb.shape[0] != 1:
        print(f"emb shape: {emb.shape}")
        print(f"Invalid IPA: {v} for MPSKA phone {k}")
        sentence = get_mspka_sentence(lab_file)


In [None]:
sentences = []
phnm3s = []
for lab_file in mspka_lab_files:
    sentence = get_mspka_sentence(lab_file)[0]
    phnm3 = get_mspka_phnm3(lab_file)
    truc = False
    for s, e, phone in phnm3:
        emb = ipa_to_ternary([phone], merge_diphtongues=True)
        if phone != "." and emb.shape[0] != 1:
            print(f"Invalid IPA: {phone} in file {lab_file.name}")
            print(sentence)
            truc = True
    if truc:
        sentences.append(sentence)
        phnm3s.append(phnm3)

## PB2007

In [None]:
pb_dir =  DATA_DIR / 'pb2007'
pb_phone_files = sorted(list(pb_dir.glob('*.phone')))

from utils_dataset.pb2007 import pb20072ipa, get_pb2007_phnm3

In [None]:
def get_pb2007_phnm3_ori(phone_file: str) -> np.ndarray:
    with open(phone_file, "r", encoding="utf-8") as f:
        lines = f.readlines()
    lines = [line.strip() for line in lines if line.strip()]
    lines = [line.split(" ") for line in lines]

    phnm3 = []
    for line in lines:
        if len(line) == 3:
            s_frame, e_frame, phone = line
            s_sec = float(s_frame) / 100
            e_sec = float(e_frame) / 100
            phnm3.append((s_sec, e_sec, phone))
    phnm3 = [(s, e, phone) for s, e, phone in phnm3]
    phnm3 = np.array(phnm3, dtype=[("start", "f4"), ("end", "f4"), ("phone", "U10")])
    return phnm3

In [None]:
phnm3s = []
phone_files = []
for phone_file in pb_phone_files:
    phnm3 = get_pb2007_phnm3_ori(phone_file)
    truc=False
    for s, e, phone in phnm3:
        if phone == "e~":
            truc = True
        #if (phone not in [".", ".."]) and not ft.validate_word(phone):
        #    print(f"Invalid IPA: {phone} in file {phone_file.name}")
    if truc:
        phnm3s.append(phnm3)
        phone_files.append(phone_file)
        #print(f"Invalid IPA: {phone} in file {phone_file.name}")
        #print(sentence)

In [None]:
phone_file = pb_phone_files[-1]  # Example phone file
ipa_phnm3 = get_pb2007_phnm3(phone_file)
print(f"Phonemes: {ipa_phnm3}")
ipawords_list = ['%'.join([elem[2] for elem in ipa_phnm3])]
ternary_emb_phnm = ipa_to_ternary(ipawords_list, merge_diphtongues=True)
ternary_emb_phnm = torch.FloatTensor(ternary_emb_phnm).T 
print(f"IPA Phones: {ipa_phnm3}")
print(f"IPA Phones: {len(ipa_phnm3)}")
print(f"Ternary Phones: {ternary_emb_phnm.shape}")

In [None]:
for k, v in pb20072ipa.items():
    emb = ipa_to_ternary([v], merge_diphtongues=True)
    if emb.shape[0] != 1:
        print(f"emb shape: {emb.shape}")
        print(f"Invalid IPA: {v} for MPSKA phone {k}")

## LJSpeech

In [None]:
from configs import params_v1

merge_diphtongues = params_v1.merge_diphtongues
cmudict_path = "resources/cmu_dictionary"
dictionary_cmu = cmudict.CMUDict(cmudict_path)

def get_phonemes(
        text: str, add_blank: bool = False, #False to uniformize with other aligned datasets not using blanks
    ) -> torch.IntTensor:  # shape: (n_ipa_feats, seq_len)
    ipawords_list = text_to_ipa(
        text,
        dictionary=dictionary_cmu,
        cleaner_names=["english_cleaners_v2"],
        remove_punctuation=True, # Remove punctuation from the text to uniformize with other aligned datasets
    )
    if add_blank:
        ipawords_list = intersperse(ipawords_list, " ")
    phnm_string = '%'.join(ipawords_list)
    phnm_string = ".%" + phnm_string + "%."
    return phnm_string.split("%")

In [None]:
#from paths import FILELISTS_DIR
#
#filelist_fp = FILELISTS_DIR / "ljspeech" / "valid_v0.txt"
#lines = parse_filelist(filelist_fp)
#save_dir = DATA_DIR / "LJSpeech-1.1" / "phnm3"
#for fp, text in lines:
#    filestem = Path(fp).stem
#    ipawords_list = get_phonemes(text, add_blank=False)
#    phnm3_name = filestem + "_phnm3.npy"
#    phones = []
#    for phone in ipawords_list:
#        start_time = float("nan")
#        end_time = float("nan")
#        phones.append((start_time, end_time, phone))
#    phones = np.array(phones, dtype=[("start", "f4"), ("end", "f4"), ("phone", "U10")])
#    phnm3_fp = save_dir / phnm3_name
#    np.save(phnm3_fp, phones)

## Dataset, dataloader

In [None]:
from data_phnm import PhnmArticDataset, PhnmArticBatchCollate
from torch.utils.data import DataLoader
from paths import DATA_DIR

from configs import params_v1

batch_size = 1

train_dataset = PhnmArticDataset(
        params_v1.train_filelist_path,
        data_root_dir=DATA_DIR,
        load_coder=False,
        shuffle=False,
        merge_diphtongues=True,
    )
valid_dataset = PhnmArticDataset(
        params_v1.valid_filelist_path,
        data_root_dir=DATA_DIR,
        load_coder=False,
        shuffle=False,
        merge_diphtongues=True,
    )


train_dataset.filepaths_list = train_dataset.filepaths_list[:10]
valid_dataset.filepaths_list = valid_dataset.filepaths_list[:10]
print("train_size", len(train_dataset), "valid_size", len(valid_dataset))

batch_collate = PhnmArticBatchCollate()

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    collate_fn=batch_collate,
    drop_last=True,
    shuffle=False,
)
valid_loader = DataLoader(
    dataset=valid_dataset,
    batch_size=batch_size,
    collate_fn=batch_collate,
    drop_last=True,
    shuffle=False,
)

In [None]:
import torch
from model import ArtTTS
from configs import params_v0

model = ArtTTS(
        params_v0.n_ipa_feats,
        params_v0.n_spks,
        None if params_v0.n_spks == 1 else params_v0.spk_emb_dim, #spk_emb_dim
        params_v0.n_enc_channels,
        params_v0.filter_channels,
        params_v0.filter_channels_dp,
        params_v0.n_heads,
        params_v0.n_enc_layers,
        params_v0.enc_kernel,
        params_v0.enc_dropout,
        params_v0.window_size,
        params_v0.n_feats,
        params_v0.dec_dim,
        params_v0.beta_min,
        params_v0.beta_max,
        params_v0.pe_scale,
    )

version = 'v0_es_ema_200'
grad_filename = 'grad_10.pt'
ckpt_state_dict = torch.load(LOGS_DIR / version / grad_filename,
                  map_location=torch.device('cpu'))
model.load_state_dict(ckpt_state_dict)

In [None]:
model.eval()
with torch.no_grad():
    for i, item in enumerate(valid_loader):
        x = item["x"].to(torch.float32)
        x_lengths = torch.LongTensor([x.shape[-1]])
        y_enc, y_dec, attn = model(x, x_lengths, n_timesteps=50)
        break

## Never know

In [None]:
#mngu0_dir = LJLISTS_DIR / "../mngu0"
#mocha_dir = LJLISTS_DIR / "../mocha"
#mspka_dir = LJLISTS_DIR / "../mspka"
#pb2007_dir = LJLISTS_DIR / "../pb2007"
#
#flist_dir = pb2007_dir
#
#with open(flist_dir / "file_list.txt", "r") as f:
#    lines = [line.strip() for line in f.readlines()]
#
#data_prefix = "DUMMY/"
#data_prefix += "pb2007" # or "mocha_timit" or "MSPKA_EMA_ita" or "pb2007"
#data_prefix += "/arttts"
#
#new_lines = []
#for line in lines:
#    _, spk, phnm3, filename = line.split("/")
#    spk_prefix = data_prefix + "/" + spk
#    phnm3_prefix = spk_prefix + "/phnm3"
#    phnm3_fp = phnm3_prefix +"/" + filename
#    wav_fp = spk_prefix + "/wavs" + "/" + filename.replace("_phnm3.npy", ".wav")
#    new_line = f"{wav_fp}|{phnm3_fp}"
#    new_lines.append(new_line)
#new_lines = sorted(new_lines)
#
#with open(flist_dir / "total_v1.txt", "w") as f:
#    for line in new_lines:
#        f.write(line + "\n")
#print(f"Total lines: {len(new_lines)}")

In [None]:
#import numpy as np
#
#dataset = "pb2007"
#for speaker in ["spk1"]:
#    with open(f"resources/filelists/{dataset}/total_v1.txt", "r") as f:
#        total_v1 = f.read().splitlines()
#    total_v1 = np.array(total_v1)
#    speakers = np.array([e.split("/")[3] for e in total_v1])
#    speaker_v1 = total_v1[np.where(np.array(speakers) == speaker)[0]]
#
#    with open(f"resources/filelists/{dataset}/{speaker}_v1.txt", "w") as file:
#        file.writelines([e + '\n' for e in speaker_v1])