In [5]:

import numpy as np


# 加载音素标签和MFCC特征的 .npy 文件
phoneme_labels_file = 'E:/cv_data/cmu11785_data/HW3P2_data/11-785-s24-hw3p2/train-clean-100/transcript/19-198-0000.npy'  # 替换为实际音素标签文件路径
mfcc_features_file = 'E:/cv_data/cmu11785_data/HW3P2_data/11-785-s24-hw3p2/train-clean-100/mfcc/19-198-0000.npy'  # 替换为实际MFCC特征文件路径

# 加载文件
phoneme_labels = np.load(phoneme_labels_file)
mfcc_features = np.load(mfcc_features_file)

# 打印音素标签和对应的MFCC数据的帧数
print("音素标签内容：")
print(phoneme_labels)
print("\nMFCC数据的形状（样本数，帧数，特征维度）：")
print(mfcc_features.shape)

# 获取MFCC数据的帧数
mfcc_frame_count = mfcc_features.shape[0]
print(f"\nMFCC数据的帧数：{mfcc_frame_count}")

音素标签内容：
['[SOS]' '[SIL]' 'N' 'AO' 'R' 'TH' 'AH' 'N' 'JH' 'ER' 'AE' 'B' 'IY'
 '[SIL]' '[EOS]']

MFCC数据的形状（样本数，帧数，特征维度）：
(192, 27)

MFCC数据的帧数：192


In [7]:
import os
import numpy as np

# 设置验证集路径（假设音素标签目录路径）
phoneme_labels_dir = 'E:/cv_data/cmu11785_data/HW3P2_data/11-785-s24-hw3p2/dev-clean/transcript/'  # 替换为实际音素标签目录路径

# CMUdict_ARPAbet 字典
CMUdict_ARPAbet = {
    "" : "-",
    "[SIL]": "|", "NG": "G", "F" : "f", "M" : "m", "AE": "@",
    "R"    : "r", "UW": "u", "N" : "n", "IY": "i", "AW": "W",
    "V"    : "v", "UH": "U", "OW": "o", "AA": "a", "ER": "R",
    "HH"   : "h", "Z" : "z", "K" : "k", "CH": "C", "W" : "w",
    "EY"   : "e", "ZH": "Z", "T" : "t", "EH": "E", "Y" : "y",
    "AH"   : "A", "B" : "b", "P" : "p", "TH": "T", "DH": "D",
    "AO"   : "c", "G" : "g", "L" : "l", "JH": "j", "OY": "O",
    "SH"   : "S", "D" : "d", "AY": "Y", "S" : "s", "IH": "I",
    "[SOS]": "[SOS]", "[EOS]": "[EOS]"
}

# 初始化一个集合来存储所有唯一的标签字符
unique_labels = set()

# 遍历验证集中的每个文件
for phoneme_file in os.listdir(phoneme_labels_dir):
    # 跳过非npy文件
    if not phoneme_file.endswith('.npy'):
        continue
    
    # 加载音素标签文件
    phoneme_labels = np.load(os.path.join(phoneme_labels_dir, phoneme_file))
    
    # 删除开头的[SOS]和结尾的[EOS]
    if phoneme_labels[0] == '[SOS]':
        phoneme_labels = phoneme_labels[1:]
    if phoneme_labels[-1] == '[EOS]':
        phoneme_labels = phoneme_labels[:-1]
    
    # 将标签字符添加到集合中
    unique_labels.update(phoneme_labels)

# 打印唯一标签的数量
print(f"验证集中所有文件的唯一标签数量（去除[SOS]和[EOS]后）：{len(unique_labels)}")

# 检查每个标签是否都在 CMUdict_ARPAbet 中
missing_labels = [label for label in unique_labels if label not in CMUdict_ARPAbet]

if not missing_labels:
    print("所有标签都存在于 CMUdict_ARPAbet 中。")
else:
    print(f"以下标签不在 CMUdict_ARPAbet 中：{missing_labels}")


验证集中所有文件的唯一标签数量（去除[SOS]和[EOS]后）：40
所有标签都存在于 CMUdict_ARPAbet 中。


In [2]:
import torch
from ym_dataset import *
from ym_config import *

# 加载验证数据集
val_data = AudioDataset(partition="dev-clean", use_cmn=True, audio_transformation=None)
val_loader = torch.utils.data.DataLoader(
    dataset=val_data,
    num_workers=1,
    batch_size=config["batch_size"],
    pin_memory=True,
    persistent_workers=True,
    shuffle=False,
    collate_fn=val_data.collate_fn
)

# 创建一个集合来存储所有唯一标签
unique_labels = set()

# 遍历验证集
for _, y, _, _ in val_loader:
    # 将标签添加到集合中
    for label in y.flatten().tolist():
        unique_labels.add(label)

# 输出验证集中标签的数量
print("Number of unique labels in validation set:", len(unique_labels))

# 输出验证集中所有唯一标签的具体内容
print("Unique labels:", sorted(unique_labels))


100%|██████████| 2703/2703 [00:00<00:00, 3263.59it/s]


2703 2703 2703
Number of unique labels in validation set: 41
Unique labels: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40]


None
