In [12]:
import librosa
import librosa.display
from glob import glob
from tqdm import tqdm
import pandas as pd
import numpy as np
import os

In [2]:
def extract_mfcc(file_path, max_pad_len=44):
    # 讀取音檔，轉為單聲道
    audio, sample_rate = librosa.load(file_path, mono=True, sr=None)
    # 計算 MFCC
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13)
    # 計算填充或截斷的長度
    pad_width = max_pad_len - mfccs.shape[1]
    if pad_width < 0:
        # 截斷
        mfccs = mfccs[:, :max_pad_len]
    else:
        # 填充
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    return mfccs

In [5]:
male_wav_files = glob(f"voice_source\\male_wav\\*")
print(len(male_wav_files))
male_wav_files

1467


['voice_source\\male_wav\\M_a.wav',
 'voice_source\\male_wav\\M_a2.wav',
 'voice_source\\male_wav\\M_a4.wav',
 'voice_source\\male_wav\\M_a5.wav',
 'voice_source\\male_wav\\M_ai.wav',
 'voice_source\\male_wav\\M_ai2.wav',
 'voice_source\\male_wav\\M_ai3.wav',
 'voice_source\\male_wav\\M_ai4.wav',
 'voice_source\\male_wav\\M_an.wav',
 'voice_source\\male_wav\\M_an2.wav',
 'voice_source\\male_wav\\M_an3.wav',
 'voice_source\\male_wav\\M_an4.wav',
 'voice_source\\male_wav\\M_ang.wav',
 'voice_source\\male_wav\\M_ang2.wav',
 'voice_source\\male_wav\\M_ang3.wav',
 'voice_source\\male_wav\\M_ang4.wav',
 'voice_source\\male_wav\\M_ao.wav',
 'voice_source\\male_wav\\M_ao2.wav',
 'voice_source\\male_wav\\M_ao3.wav',
 'voice_source\\male_wav\\M_ao4.wav',
 'voice_source\\male_wav\\M_ba.wav',
 'voice_source\\male_wav\\M_ba2.wav',
 'voice_source\\male_wav\\M_ba3.wav',
 'voice_source\\male_wav\\M_ba4.wav',
 'voice_source\\male_wav\\M_ba5.wav',
 'voice_source\\male_wav\\M_bai.wav',
 'voice_source\\ma

In [8]:
male_mfcc_list = []

for file in tqdm(male_wav_files):
    mfcc = extract_mfcc(file)
    male_mfcc_list.append(mfcc)

male_mfcc_list = np.array(male_mfcc_list)

print(male_mfcc_list.shape)
male_mfcc_list

100%|██████████| 1467/1467 [00:13<00:00, 112.51it/s]

(1467, 13, 44)





array([[[-5.97088501e+02, -5.76611755e+02, -5.41859863e+02, ...,
         -1.52202866e+02, -1.52455399e+02, -1.55038544e+02],
        [ 0.00000000e+00,  2.79701996e+01,  6.61757050e+01, ...,
          1.33019501e+02,  1.28277695e+02,  1.25102844e+02],
        [ 0.00000000e+00,  2.52507095e+01,  4.12896652e+01, ...,
         -7.65459442e+01, -7.60506439e+01, -7.45947571e+01],
        ...,
        [ 0.00000000e+00,  4.11769056e+00,  2.98013878e+00, ...,
         -1.73794479e+01, -1.73384228e+01, -1.66952801e+01],
        [ 0.00000000e+00,  3.45481348e+00,  2.06819582e+00, ...,
          1.61053467e+01,  1.42304916e+01,  1.29429541e+01],
        [ 0.00000000e+00,  3.15862393e+00,  4.72926569e+00, ...,
         -5.84750843e+00, -5.16071129e+00, -4.71365595e+00]],

       [[-6.09800903e+02, -5.88575378e+02, -5.10379333e+02, ...,
         -1.47877945e+02, -1.44981659e+02, -1.40031845e+02],
        [ 0.00000000e+00,  2.51225815e+01,  7.75164719e+01, ...,
          1.45634277e+02,  1.49698425e

In [10]:
male_mfcc_npy_paths = []

for voice_file in male_wav_files:
    mfcc_file_path = voice_file.replace("voice_", "mfcc_")
    mfcc_file_path = mfcc_file_path.replace("male_wav", "male_mfcc")
    mfcc_file_path = mfcc_file_path.replace(".wav", ".npy")
    male_mfcc_npy_paths.append(mfcc_file_path)

print(len(male_mfcc_npy_paths))
male_mfcc_npy_paths

1467


['mfcc_source\\male_mfcc\\M_a.npy',
 'mfcc_source\\male_mfcc\\M_a2.npy',
 'mfcc_source\\male_mfcc\\M_a4.npy',
 'mfcc_source\\male_mfcc\\M_a5.npy',
 'mfcc_source\\male_mfcc\\M_ai.npy',
 'mfcc_source\\male_mfcc\\M_ai2.npy',
 'mfcc_source\\male_mfcc\\M_ai3.npy',
 'mfcc_source\\male_mfcc\\M_ai4.npy',
 'mfcc_source\\male_mfcc\\M_an.npy',
 'mfcc_source\\male_mfcc\\M_an2.npy',
 'mfcc_source\\male_mfcc\\M_an3.npy',
 'mfcc_source\\male_mfcc\\M_an4.npy',
 'mfcc_source\\male_mfcc\\M_ang.npy',
 'mfcc_source\\male_mfcc\\M_ang2.npy',
 'mfcc_source\\male_mfcc\\M_ang3.npy',
 'mfcc_source\\male_mfcc\\M_ang4.npy',
 'mfcc_source\\male_mfcc\\M_ao.npy',
 'mfcc_source\\male_mfcc\\M_ao2.npy',
 'mfcc_source\\male_mfcc\\M_ao3.npy',
 'mfcc_source\\male_mfcc\\M_ao4.npy',
 'mfcc_source\\male_mfcc\\M_ba.npy',
 'mfcc_source\\male_mfcc\\M_ba2.npy',
 'mfcc_source\\male_mfcc\\M_ba3.npy',
 'mfcc_source\\male_mfcc\\M_ba4.npy',
 'mfcc_source\\male_mfcc\\M_ba5.npy',
 'mfcc_source\\male_mfcc\\M_bai.npy',
 'mfcc_source\\mal

In [13]:
dir_path = "mfcc_source\\male_mfcc"
print(f"'{dir_path}' exiet? {os.path.exists(dir_path)}")
if not os.path.exists(dir_path):
    os.mkdir(dir_path)
    print(f"make '{dir_path}'")

for i in tqdm(range(len(male_mfcc_list))):
    file_path = male_mfcc_npy_paths[i]
    mfcc = male_mfcc_list[i]
    np.save(file=file_path, arr=mfcc)

'mfcc_source\male_mfcc' exiet? False
make 'mfcc_source\male_mfcc'


100%|██████████| 1467/1467 [00:00<00:00, 3980.68it/s]


In [6]:
female_wav_files = glob(f"voice_source\\female_wav\\*")
print(len(female_wav_files))
female_wav_files

1467


['voice_source\\female_wav\\F_a.wav',
 'voice_source\\female_wav\\F_a2.wav',
 'voice_source\\female_wav\\F_a4.wav',
 'voice_source\\female_wav\\F_a5.wav',
 'voice_source\\female_wav\\F_ai.wav',
 'voice_source\\female_wav\\F_ai2.wav',
 'voice_source\\female_wav\\F_ai3.wav',
 'voice_source\\female_wav\\F_ai4.wav',
 'voice_source\\female_wav\\F_an.wav',
 'voice_source\\female_wav\\F_an2.wav',
 'voice_source\\female_wav\\F_an3.wav',
 'voice_source\\female_wav\\F_an4.wav',
 'voice_source\\female_wav\\F_ang.wav',
 'voice_source\\female_wav\\F_ang2.wav',
 'voice_source\\female_wav\\F_ang3.wav',
 'voice_source\\female_wav\\F_ang4.wav',
 'voice_source\\female_wav\\F_ao.wav',
 'voice_source\\female_wav\\F_ao2.wav',
 'voice_source\\female_wav\\F_ao3.wav',
 'voice_source\\female_wav\\F_ao4.wav',
 'voice_source\\female_wav\\F_ba.wav',
 'voice_source\\female_wav\\F_ba2.wav',
 'voice_source\\female_wav\\F_ba3.wav',
 'voice_source\\female_wav\\F_ba4.wav',
 'voice_source\\female_wav\\F_ba5.wav',
 'voic

In [14]:
female_mfcc_list = []

for file in tqdm(female_wav_files):
    mfcc = extract_mfcc(file)
    female_mfcc_list.append(mfcc)

female_mfcc_list = np.array(female_mfcc_list)

print(female_mfcc_list.shape)
female_mfcc_list

100%|██████████| 1467/1467 [00:09<00:00, 151.62it/s]


(1467, 13, 44)


array([[[-5.68577087e+02, -5.68577087e+02, -5.65725281e+02, ...,
         -3.27552246e+02, -3.54149933e+02, -3.70087219e+02],
        [ 0.00000000e+00,  0.00000000e+00,  4.02585506e+00, ...,
          1.31978851e+02,  1.18856598e+02,  1.13728714e+02],
        [ 0.00000000e+00,  0.00000000e+00,  4.00415897e+00, ...,
         -9.31504669e+01, -8.27313080e+01, -7.43676453e+01],
        ...,
        [ 0.00000000e+00,  0.00000000e+00,  3.35119772e+00, ...,
         -1.07087851e+01, -6.95665884e+00, -9.89547634e+00],
        [ 0.00000000e+00,  0.00000000e+00,  3.21864986e+00, ...,
          1.83107319e+01,  1.77170315e+01,  1.52893028e+01],
        [ 0.00000000e+00,  0.00000000e+00,  3.07752848e+00, ...,
         -2.72973557e+01, -2.72603683e+01, -2.39279118e+01]],

       [[-6.13480225e+02, -6.11797607e+02, -6.07194763e+02, ...,
         -2.43488083e+02, -2.53922440e+02, -2.68048767e+02],
        [ 0.00000000e+00,  2.37782121e+00,  8.87556458e+00, ...,
          1.81816559e+02,  1.82753723e

In [15]:
female_mfcc_npy_paths = []

for voice_file in female_wav_files:
    mfcc_file_path = voice_file.replace("voice_", "mfcc_")
    mfcc_file_path = mfcc_file_path.replace("female_wav", "female_mfcc")
    mfcc_file_path = mfcc_file_path.replace(".wav", ".npy")
    female_mfcc_npy_paths.append(mfcc_file_path)

print(len(female_mfcc_npy_paths))
female_mfcc_npy_paths

1467


['mfcc_source\\female_mfcc\\F_a.npy',
 'mfcc_source\\female_mfcc\\F_a2.npy',
 'mfcc_source\\female_mfcc\\F_a4.npy',
 'mfcc_source\\female_mfcc\\F_a5.npy',
 'mfcc_source\\female_mfcc\\F_ai.npy',
 'mfcc_source\\female_mfcc\\F_ai2.npy',
 'mfcc_source\\female_mfcc\\F_ai3.npy',
 'mfcc_source\\female_mfcc\\F_ai4.npy',
 'mfcc_source\\female_mfcc\\F_an.npy',
 'mfcc_source\\female_mfcc\\F_an2.npy',
 'mfcc_source\\female_mfcc\\F_an3.npy',
 'mfcc_source\\female_mfcc\\F_an4.npy',
 'mfcc_source\\female_mfcc\\F_ang.npy',
 'mfcc_source\\female_mfcc\\F_ang2.npy',
 'mfcc_source\\female_mfcc\\F_ang3.npy',
 'mfcc_source\\female_mfcc\\F_ang4.npy',
 'mfcc_source\\female_mfcc\\F_ao.npy',
 'mfcc_source\\female_mfcc\\F_ao2.npy',
 'mfcc_source\\female_mfcc\\F_ao3.npy',
 'mfcc_source\\female_mfcc\\F_ao4.npy',
 'mfcc_source\\female_mfcc\\F_ba.npy',
 'mfcc_source\\female_mfcc\\F_ba2.npy',
 'mfcc_source\\female_mfcc\\F_ba3.npy',
 'mfcc_source\\female_mfcc\\F_ba4.npy',
 'mfcc_source\\female_mfcc\\F_ba5.npy',
 'mfcc

In [None]:
dir_path = "mfcc_source\\female_mfcc"
print(f"'{dir_path}' exiet? {os.path.exists(dir_path)}")
if not os.path.exists(dir_path):
    os.mkdir(dir_path)
    print(f"make '{dir_path}'")

for i in tqdm(range(len(female_mfcc_list))):
    file_path = female_mfcc_npy_paths[i]
    mfcc = female_mfcc_list[i]
    np.save(file=file_path, arr=mfcc)