In [2]:
import librosa
import numpy as np
import os
import pandas as pd
import pandas as pd
from tqdm.notebook import tqdm
# import cv2

In [3]:
#you can modify the path to your own data to transform any test dataset you want
meta = pd.read_csv("../Data/meta_final.csv")
# meta = pd.read_csv("../Data/11lab.csv") #for test real ood dataset

In [4]:
len(meta)

499778

In [5]:
meta.head()

Unnamed: 0,data_source,speaker,absolute_path,relative_path,label,Set
0,itw,Donald Trump,/home/jupyter/Data/inthewild/release_in_the_wi...,release_in_the_wild/18212.wav,1,train
1,itw,Barack Obama,/home/jupyter/Data/inthewild/release_in_the_wi...,release_in_the_wild/18386.wav,1,train
2,itw,Louis C.K.,/home/jupyter/Data/inthewild/release_in_the_wi...,release_in_the_wild/18240.wav,0,train
3,itw,Winston Churchill,/home/jupyter/Data/inthewild/release_in_the_wi...,release_in_the_wild/18461.wav,0,test
4,itw,Winston Churchill,/home/jupyter/Data/inthewild/release_in_the_wi...,release_in_the_wild/18211.wav,1,test


In [6]:
# 定义保存 npy 文件的根目录
npy_root = '/home/jupyter/npy/'

In [9]:
class MFCCExtractor:
    def __init__(self, target_sr=16000, n_mfcc=40, hop_length=512, n_fft=1024, target_frames=400, **kwargs):
        self.target_sr = target_sr
        self.n_mfcc = n_mfcc
        self.hop_length = hop_length
        self.n_fft = n_fft
        self.target_frames = target_frames
        self.kwargs = kwargs

    def extract_features(self, audio, sr):
        mfccs = librosa.feature.mfcc(
            y=audio,
            sr=sr,
            n_mfcc=self.n_mfcc,
            hop_length=self.hop_length,
            n_fft=self.n_fft,
            **self.kwargs
        )
        target_n_mfcc = self.n_mfcc
        target_frames = self.target_frames

        # 调整频率维度：不足补0，多余裁剪
        current_n_mfcc, current_frames = mfccs.shape
        if current_n_mfcc < target_n_mfcc:
            pad_amount = target_n_mfcc - current_n_mfcc
            mfccs = np.pad(mfccs, ((0, pad_amount), (0, 0)), mode='constant')
        elif current_n_mfcc > target_n_mfcc:
            mfccs = mfccs[:target_n_mfcc, :]

        # 调整时间帧数：不足补0，多余裁剪
        if current_frames < target_frames:
            pad_amount = target_frames - current_frames
            mfccs = np.pad(mfccs, ((0, 0), (0, pad_amount)), mode='constant')
        elif current_frames > target_frames:
            mfccs = mfccs[:, :target_frames]

        return mfccs.astype(np.float32)


# 配置参数（可根据需要调整）
target_sr = 16000
n_mfcc = 40
hop_length = 512
n_fft = 1024
target_frames = 400

# 创建 MFCCExtractor 实例
mfcc_extractor = MFCCExtractor(target_sr=target_sr, n_mfcc=n_mfcc, hop_length=hop_length,
                               n_fft=n_fft, target_frames=target_frames)

In [10]:
# 定义处理单个文件的函数
def process_file(row):
    absolute_path = row['absolute_path']
    relative_path = row['relative_path']  # 例如 "release_in_the_wild/18212.wav"

    # 构造新的相对路径，将扩展名替换为 .npy
    new_relative = os.path.splitext(relative_path)[0] + '.npy'

    # 构造保存路径： /home/jupyter/npy/release_in_the_wild/18212.npy
    save_path = os.path.join(npy_root, new_relative)

    # 确保保存文件夹存在
    os.makedirs(os.path.dirname(save_path), exist_ok=True)

    try:
        # 加载 wav 文件
        audio, sr = librosa.load(absolute_path, sr=target_sr, mono=True)
        # 利用 MFCCExtractor 提取 MFCC 特征
        mfcc_features = mfcc_extractor.extract_features(audio, sr)
        # 保存为 npy 文件
        np.save(save_path, mfcc_features)
        # print(f"Saved: {save_path}")
        return
    except Exception as e:
        return f"Error processing {absolute_path}: {e}"

In [11]:
from concurrent.futures import ThreadPoolExecutor, as_completed

with ThreadPoolExecutor(max_workers=48) as executor:
    futures = [executor.submit(process_file, row) for idx, row in meta.iterrows()]
    for future in tqdm(as_completed(futures), total=len(futures), desc="Processing files"):
        continue
        # print(future.result())

Processing files:   0%|          | 0/631 [00:00<?, ?it/s]

In [12]:
def process_npy_path(row):
    # 根据 relative_path 构造新的相对路径（扩展名替换为 .npy）
    new_relative = os.path.splitext(row['relative_path'])[0] + '.npy'
    # 构造保存路径，例如 /home/jupyter/npy/release_in_the_wild/18212.npy
    save_path = os.path.join(npy_root, new_relative)
    return save_path


meta['npy_path'] = [process_npy_path(row) for idx, row in
                    tqdm(meta.iterrows(), total=len(meta), desc="Processing npy paths")]

Processing npy paths:   0%|          | 0/631 [00:00<?, ?it/s]

In [13]:
meta.head()

Unnamed: 0,data_source,speaker,absolute_path,relative_path,label,Set,npy_path
0,11lab,11lab,/home/jupyter/Data/11Lab/2cc45e80-c7a4-4be0-ba...,2cc45e80-c7a4-4be0-baa6-fc007e01df04.wav,0,test,/home/jupyter/npy/11lab/2cc45e80-c7a4-4be0-baa...
1,11lab,11lab,/home/jupyter/Data/11Lab/c88addad-0f07-437b-ac...,c88addad-0f07-437b-acdb-57aeb627edeb.wav,0,test,/home/jupyter/npy/11lab/c88addad-0f07-437b-acd...
2,11lab,11lab,/home/jupyter/Data/11Lab/f47e5bd1-9885-45cb-87...,f47e5bd1-9885-45cb-87e4-1599bac242de.wav,0,test,/home/jupyter/npy/11lab/f47e5bd1-9885-45cb-87e...
3,11lab,11lab,/home/jupyter/Data/11Lab/bf7fdbd6-5391-49ac-b4...,bf7fdbd6-5391-49ac-b447-98f70fc0102d.wav,0,test,/home/jupyter/npy/11lab/bf7fdbd6-5391-49ac-b44...
4,11lab,11lab,/home/jupyter/Data/11Lab/cca038cb-8d77-44ac-99...,cca038cb-8d77-44ac-99a7-0cbf6684e404.wav,0,test,/home/jupyter/npy/11lab/cca038cb-8d77-44ac-99a...


In [14]:
# 用于存放缺失文件的列表
missing_files = []

# 遍历 meta['npy_path'] 中的每个路径，并检查文件是否存在
for npy_path in tqdm(meta['npy_path'], total=len(meta), desc="Checking npy files"):
    if not os.path.exists(npy_path):
        missing_files.append(npy_path)

print(f"Missing files count: {len(missing_files)}")
if missing_files:
    print("Missing files:")
    for path in missing_files:
        print(path)

Checking npy files:   0%|          | 0/631 [00:00<?, ?it/s]

Missing files count: 0


In [13]:
# output_csv = '/home/jupyter/Data/meta_npy.csv'
# meta.to_csv(output_csv, index=False)
# print(f"Updated CSV saved to {output_csv}")

Updated CSV saved to /home/jupyter/Data/meta_npy.csv


In [15]:
output_csv = '../Data/meta_npy.csv'
meta.to_csv(output_csv, index=False)
print(f"Updated CSV saved to {output_csv}")

Updated CSV saved to /home/jupyter/Data/11lab_npy.csv
