In [1]:
!pip install librosa numpy scipy dtw-python

Collecting librosa
  Downloading librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting dtw-python
  Downloading dtw_python-1.5.3-cp312-cp312-win_amd64.whl.metadata (48 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting numba>=0.51.0 (from librosa)
  Downloading numba-0.61.2-cp312-cp312-win_amd64.whl.metadata (2.9 kB)
Collecting scikit-learn>=1.1.0 (from librosa)
  Downloading scikit_learn-1.7.0-cp312-cp312-win_amd64.whl.metadata (14 kB)
Collecting joblib>=1.0 (from librosa)
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-0.5.0.post1-cp312-abi3-win_amd64.whl.metadata (5.6 kB)
Collecting lazy_loader>=0.1 (from librosa)
  Downloa

In [37]:
!pip install fastdtw

Collecting fastdtw
  Downloading fastdtw-0.3.4.tar.gz (133 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: fastdtw
  Building wheel for fastdtw (setup.py): started
  Building wheel for fastdtw (setup.py): finished with status 'done'
  Created wheel for fastdtw: filename=fastdtw-0.3.4-py3-none-any.whl size=3631 sha256=b14213d2335a0bcabfeef07924afba5faf86218a4b99f919da0c510fdcd66d16
  Stored in directory: c:\users\user\appdata\local\pip\cache\wheels\ab\d0\26\b82cb0f49ae73e5e6bba4e8462fff2c9851d7bd2ec64f8891e
Successfully built fastdtw
Installing collected packages: fastdtw
Successfully installed fastdtw-0.3.4


  DEPRECATION: Building 'fastdtw' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'fastdtw'. Discussion can be found at https://github.com/pypa/pip/issues/6334


In [38]:
!pip install --upgrade librosa



In [40]:
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean
import numpy as np

In [41]:

def compute_mcd(mfcc_ref, mfcc_syn, use_dtw=True):
    # 移除第 0 維（通常是能量）
    mfcc_ref = mfcc_ref[1:]
    mfcc_syn = mfcc_syn[1:]

    if use_dtw:
        # 使用 fastdtw 進行對齊（近似 DTW）
        distance, path = fastdtw(mfcc_ref.T, mfcc_syn.T, dist=euclidean)

        # 轉換 path 為對應 index
        ref_aligned = mfcc_ref[:, [i for i, j in path]]
        syn_aligned = mfcc_syn[:, [j for i, j in path]]
    else:
        # 使用最小長度對齊
        min_len = min(mfcc_ref.shape[1], mfcc_syn.shape[1])
        ref_aligned = mfcc_ref[:, :min_len]
        syn_aligned = mfcc_syn[:, :min_len]

    # 計算 MCD
    diff = ref_aligned - syn_aligned
    mcd = (10.0 / np.log(10)) * np.sqrt((diff ** 2).sum(axis=0).mean())

    return mcd

In [56]:
# 讀取語音檔
ref_audio, sr = librosa.load("reference.wav", sr=None)
syn_audio, _ = librosa.load("synthesized.wav", sr=sr)

In [57]:
mfcc_ref = librosa.feature.mfcc(y=ref_audio, sr=sr, n_mfcc=13)
mfcc_syn = librosa.feature.mfcc(y=syn_audio, sr=sr, n_mfcc=13)

In [58]:
# 計算 MCD
mcd_value = compute_mcd(mfcc_ref, mfcc_syn)

In [59]:
# 品質說明
def quality_description(mcd):
    if mcd > 10:
        return "❌ 非常差：MCD > 10"
    elif mcd > 6:
        return "⚠️ 尚可：6 < MCD ≤ 10"
    elif mcd > 4:
        return "✅ 良好：4 < MCD ≤ 6"
    else:
        return "🌟 非常高品質：MCD ≤ 4"

In [60]:
print(f"MCD: {mcd_value:.2f}")
print("品質評估:", quality_description(mcd_value))

MCD: 215.06
品質評估: ❌ 非常差：MCD > 10


## Compare same audio

In [61]:
ref_audio, sr = librosa.load("reference.wav", sr=None)
syn_audio, _ = librosa.load("reference.wav", sr=sr)

In [62]:
mfcc_ref = librosa.feature.mfcc(y=ref_audio, sr=sr, n_mfcc=13).astype(np.float64)
mfcc_syn = librosa.feature.mfcc(y=syn_audio, sr=sr, n_mfcc=13).astype(np.float64)

In [63]:
mcd_value = compute_mcd(mfcc_ref, mfcc_syn)

In [64]:
print(f"MCD: {mcd_value:.2f}")
print("品質評估:", quality_description(mcd_value))

MCD: 0.00
品質評估: 🌟 非常高品質：MCD ≤ 4
