In [42]:
import pandas as pd
import numpy as np
from pathlib import Path
from paths import DATA_DIR, FILELISTS_DIR
from voxcommunis.io import read_alignment, read_manifest
from voxcommunis.data import (PanPhonInventory, FeatureTokenizer, PhoneticFeatureDataset,
                              SAMPLE_RATE, ALIGNMENT_FREQ, MODEL_FREQ, SUBSAMPLE
)
from voxcommunis.decoder import UniqueSegmentFeature, UniversalUniqueSegmentFeature, FeatureDecoder
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
import soundfile as sf


vc_dir = Path(DATA_DIR) / "VoxCommunis"

# Create new manifests for custom datasets

In [44]:
def create_custom_manifest(dataset):
    filelist_fp = FILELISTS_DIR / f"{dataset}/total_v1.txt"
    manifest_fp = DATA_DIR / f"VoxCommunis/{dataset}/manifests/{dataset}.tsv"
    alignment_fp = DATA_DIR / f"VoxCommunis/{dataset}/alignments/{dataset}.align"
    alignment_fp.parent.mkdir(parents=True, exist_ok=True)
    manifest_fp.parent.mkdir(parents=True, exist_ok=True)

    with open(filelist_fp) as f:
        filelist = [line.strip() for line in f.readlines()]
    filepaths = [Path(line.split("|")[0]) for line in filelist]
    filepaths = [str(fp).replace("DUMMY/","") for fp in filepaths]

    prefix = "/lustre/fsn1/projects/rech/rec/commun/data\n"
    lines = [prefix]

    for fp in filepaths:
        line = fp + '\t'
        audio, sr = sf.read(DATA_DIR / fp)
        n_frames = len(audio)
        line += str(n_frames) + '\n'
        lines.append(line)

    with open(manifest_fp, 'w') as f:
        f.writelines(lines)
    
create_custom_manifest("pb2007")
create_custom_manifest("mocha_timit")
create_custom_manifest("MSPKA_EMA_ita")
create_custom_manifest("MNGU0")

# Create alignments for the new manifests

In [58]:
def create_custom_alignments(dataset):
    manifest_fp = DATA_DIR / f"VoxCommunis/{dataset}/manifests/{dataset}.tsv"
    alignment_fp = DATA_DIR / f"VoxCommunis/{dataset}/alignments/{dataset}.align"
    alignment_fp.parent.mkdir(parents=True, exist_ok=True)
    with open(manifest_fp, 'r') as f:
        lines = f.readlines()
    wav_fps = [e.strip().split("\t")[0] for e in lines[1:]]  # skip header
    for wav_fp in wav_fps:
        sample_id = Path(wav_fp).stem
        line = sample_id + '\t'
        phnm3_fp = wav_fp.replace("/wavs/", "/phnm3/")
        phnm3_fp = phnm3_fp.replace(".wav", "_phnm3.npy")
        phnm3 = np.load(DATA_DIR / phnm3_fp)
        for (start, end, phnm) in phnm3:
            start = float(start)
            end = float(end)
            delta = end - start
            mutiply = int(delta * ALIGNMENT_FREQ)
            if phnm == ".":
                phnm = "SIL"
            for _ in range(mutiply):
                line += phnm + ' '
        line = line.strip() + '\n'
        with open(alignment_fp, 'a') as f:
            f.write(line)

create_custom_alignments("pb2007")
create_custom_alignments("mocha_timit")
create_custom_alignments("MSPKA_EMA_ita")
create_custom_alignments("MNGU0")