In [None]:
import numpy as np
from corpus import Synchronizer

def calculate_wps(origin_path: str, cover_path: str, song_dir: str, lambda_val: float = 0.1) -> float:
    try:
        s = Synchronizer()
        wp = s.get_wp(origin_path, cover_path, song_dir)
        
        t_cover = s.t1
        t_orig = s.t2
        
        if t_cover is None or t_orig is None:
            raise ValueError("時間戳序列未能成功在 Synchronizer 物件中生成。")

        wp_int = wp.astype(int)

        indices_cover = wp_int[0]
        indices_orig = wp_int[1]

        time_diff_sequence = t_orig[indices_orig] - t_cover[indices_cover]

        sigma_d = np.std(time_diff_sequence)
        print(f"wp-std: {sigma_d}")
        wps_score = np.exp(-lambda_val * sigma_d)

        return wps_score

    except Exception as e:
        print(f"計算 WPS 分數時發生錯誤：{e}")
        return 0.0

if __name__ == '__main__':
    target_dir = "CPOP4"
    song_dir = f"./dataset/eval/{target_dir}"
    original_audio = f"{song_dir}/origin.wav"
    versions = ["picogen", "amtapc", "music2midi", "human"]

    for v in versions:
        cover_audio = f"{song_dir}/{v}.wav"
        wps_score = calculate_wps(original_audio, cover_audio, song_dir, 0.25)

        if wps_score > 0:
            print(f"WPS of {v}: {wps_score:.4f}")

In [None]:
import json
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from corpus import Synchronizer

def calculate_nwpd(origin_path: str, cover_path: str, song_dir: str, lambda_nwpd: float = 1.0) -> float:
    """
    計算正規化路徑偏差 (Normalized Warp Path Deviation, NWPD) 分數。
    此版本已根據 wp 的實際 shape (2, L) 進行修正。
    """
    try:
        s = Synchronizer()
        wp = s.get_wp(origin_path, cover_path, song_dir)
        
        t_cover = s.t1
        t_orig = s.t2
        
        if t_cover is None or t_orig is None:
            raise ValueError("時間戳序列未能成功在 Synchronizer 物件中生成。")

        wp_int = wp.astype(int)
        path_t_cover = t_cover[wp_int[0]]
        path_t_orig = t_orig[wp_int[1]]

        coeffs = np.polyfit(path_t_cover, path_t_orig, 1)
        a, b = coeffs[0], coeffs[1]

        t_orig_predicted = a * path_t_cover + b
        deviation = path_t_orig - t_orig_predicted
        sigma_dev = np.std(deviation)
        nwpd_score = np.exp(-lambda_nwpd * sigma_dev)
        return nwpd_score
    except Exception as e:
        print(f"計算 NWPD 分數時發生錯誤：{e}")
        return 0.0


def analyze_and_visualize_scores():
    """
    主函式：計算所有歌曲各版本的 NWPD 分數，計算平均值，並進行視覺化。
    """
    # --- 1. 設定 ---
    base_dir = os.path.join(".", "dataset", "eval")
    metadata_path = os.path.join(base_dir, "metadata.json")
    origin_filename = "origin.wav"
    versions = ["human", "picogen", "amtapc", "music2midi"]
    lambda_val = 0.5
    
    # 用於儲存所有分數的字典
    scores_by_version = {v: [] for v in versions}

    # --- 2. 讀取 metadata 並計算分數 ---
    try:
        with open(metadata_path, 'r', encoding='utf-8') as f:
            metadata = json.load(f)
        # print(f"✅ 成功讀取 metadata.json，共找到 {len(metadata)} 首歌曲。")
    except FileNotFoundError:
        print(f"❌ 錯誤：找不到 metadata.json 檔案。")
        return

    for i, song_data in enumerate(metadata):
        dir_name = song_data.get("dir_name")
        if not dir_name:
            continue

        song_dir = os.path.join(base_dir, dir_name)
        # print(f"\n🎵 正在處理歌曲: {song_dir} ({i+1}/{len(metadata)})")

        origin_path = os.path.join(song_dir, origin_filename)
        if not os.path.exists(origin_path):
            print(f"  ↪️ 已跳過 (找不到 origin.wav)")
            continue

        for v in versions:
            cover_path = os.path.join(song_dir, f"{v}.wav")
            if not os.path.exists(cover_path):
                print(f"  ↪️ 已跳過版本 '{v}' (找不到 {v}.wav) {song_dir}")
                continue
            
            # 計算分數
            score = calculate_nwpd(origin_path, cover_path, song_dir, lambda_nwpd=lambda_val)
            if score > 0:
                # print(f"  📊 版本 '{v}' 的 NWPD 分數: {score:.4f}")
                scores_by_version[v].append(score)

    # --- 3. 計算並打印平均分數 ---
    print("\n\n--- 平均分數統計 ---")
    average_scores = {}
    for version, scores in scores_by_version.items():
        if scores:
            avg_score = np.mean(scores)
            average_scores[version] = avg_score
            print(f"版本 {version:<12}: 平均 NWPD 分數 = {avg_score:.4f} (基於 {len(scores)} 個樣本)")
        else:
            print(f"版本 {version:<12}: 無有效分數可供計算。")
    
    # --- 4. 數據視覺化 ---
    print("\n🎨 正在生成分數分佈圖...")
    
    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(12, 7))

    # 為每個版本繪製一條 KDE 曲線
    for version, scores in scores_by_version.items():
        if scores:
            sns.kdeplot(scores, label=version, fill=True, alpha=0.5, ax=ax, lw=2.5)

    ax.set_title('NWPD Score Distribution by Version', fontsize=16, pad=20)
    ax.set_xlabel('NWPD Score', fontsize=12)
    ax.set_ylabel('Density', fontsize=12)
    ax.set_xlim(0, 1.05)
    ax.legend(title='Version', fontsize=10)
    
    # 儲存圖表
    output_image_path = "nwpd_score_distribution.png"
    plt.savefig(output_image_path, dpi=300, bbox_inches='tight')
    
    print(f"✅ 分數分佈圖已成功儲存至: {output_image_path}")


if __name__ == '__main__':
    analyze_and_visualize_scores()

In [None]:
from evaluation import IPECalculator

# 初始化計算器。這裡的 mu 和 sigma 應該根據您的資料集進行設定
# n_clusters 也是一個重要的超參數，會影響符號的粒度
ipe_calc = IPECalculator(n_gram=8, n_clusters=16, mu_entropy=4.5, sigma_entropy=0.5)

# 假設您有一個 MIDI 檔案列表
midi_files = ["./dataset/eval/JPOP1/cover.mid", "./dataset/eval/JPOP1/picogen.mid", "./dataset/eval/JPOP1/amtapc.mid", "./dataset/eval/JPOP1/music2midi.mid"]

for midi_file in midi_files:
    print(f"\nAnalyzing {midi_file}...")
    results = ipe_calc.calculate_ipe(midi_file)

    if "error" in results:
        print(f"  Error: {results['error']}")
    else:
        # 打印完整的計算結果
        print("  IPE Score: {:.4f}".format(results['ipe_score']))
        print("  Shannon Entropy: {:.4f}".format(results['shannon_entropy']))
        print("  N-gram Count: {}".format(results['n_gram_count']))

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm # 用於顯示進度條

from evaluation import IPECalculator

def analyze_dataset_for_ipe_params():
    """
    遍歷資料集，計算所有人類演奏的香農熵，並輸出統計數據以決定 IPE 參數。
    """
    dataset_dir = "./dataset/synced/"
    if not os.path.exists(dataset_dir):
        print(f"錯誤：找不到資料集目錄 {dataset_dir}")
        return

    # 初始化計算器。mu 和 sigma 在這裡不重要，但 n_gram 和 n_clusters 會影響熵的計算
    ipe_calculator = IPECalculator(n_gram=8, n_clusters=16)
    
    entropy_values = []
    
    # 獲取所有子目錄
    subdirectories = [d for d in os.scandir(dataset_dir) if d.is_dir()]
    
    print(f"正在分析 {len(subdirectories)} 首人類演奏歌曲...")
    
    # 使用 tqdm 顯示進度條
    for entry in tqdm(subdirectories, desc="Analyzing songs"):
        json_path = os.path.join(entry.path, "cover.json")
        
        if os.path.exists(json_path):
            # 我們只需要計算熵值
            results = ipe_calculator.calculate_ipe(json_path)
            if "shannon_entropy" in results:
                entropy_values.append(results["shannon_entropy"])

    if not entropy_values:
        print("未能在資料集中計算出任何熵值。")
        return
        
    # --- 統計分析 ---
    entropy_series = pd.Series(entropy_values)
    stats = entropy_series.describe()
    
    mean_entropy = stats['mean']
    std_entropy = stats['std']
    
    print("\n\n--- 人類演奏資料集熵值統計分析 ---")
    print(stats)
    
    print("\n--- 建議的 IPE 參數值 ---")
    print(f"建議的 𝜇_Hn (mu_entropy): {mean_entropy:.4f}")
    print(f"建議的 σ_c (sigma_entropy): {std_entropy:.4f}  (這是一個好的起始點，您可以根據需要調整)")

    # --- 視覺化 ---
    print("\n正在生成熵值分佈圖...")
    plt.style.use('seaborn-v0_8-whitegrid')
    plt.figure(figsize=(12, 6))
    sns.histplot(entropy_series, kde=True, bins=50)
    plt.axvline(mean_entropy, color='r', linestyle='--', label=f'Mean: {mean_entropy:.2f}')
    plt.axvline(mean_entropy + std_entropy, color='g', linestyle=':', label=f'+1 Std Dev: {mean_entropy + std_entropy:.2f}')
    plt.axvline(mean_entropy - std_entropy, color='g', linestyle=':', label=f'-1 Std Dev: {mean_entropy - std_entropy:.2f}')
    plt.title('人類演奏資料集的香農熵 (H_n) 分佈', fontsize=16)
    plt.xlabel('Shannon Entropy', fontsize=12)
    plt.ylabel('歌曲數量 (Count)', fontsize=12)
    plt.legend()
    plt.savefig("ipe_entropy_distribution.png", dpi=300)
    print("✅ 熵值分佈圖已儲存為 ipe_entropy_distribution.png")


if __name__ == '__main__':
    # 安裝必要的函式庫
    # pip install pandas matplotlib seaborn tqdm
    analyze_dataset_for_ipe_params()

In [None]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# 假設您的 IpeCalculator 類別儲存在 evaluation/IPE.py
from evaluation.IPE import IPECalculator 

def evaluate_models_with_ipe():
    """
    使用校準後的 IPE 參數，評估 eval 資料集中各個模型的表現。
    """
    # --- 1. 設定 ---
    eval_dir = "./dataset/eval"
    metadata_path = os.path.join(eval_dir, "metadata.json")
    versions = ["cover", "picogen", "amtapc", "music2midi"]
    
    # 使用您從 4751 首歌曲中分析出的黃金參數
    EMPIRICAL_MU = 10.8956
    EMPIRICAL_SIGMA = 0.6923
    
    # 初始化計算器
    ipe_calculator = IPECalculator(
        mu_entropy=EMPIRICAL_MU, 
        sigma_entropy=EMPIRICAL_SIGMA,
        n_gram=8, 
        n_clusters=16 # 確保與分析時的參數一致
    )

    # --- 2. 讀取 metadata 並計算分數 ---
    try:
        with open(metadata_path, 'r', encoding='utf-8') as f:
            metadata = json.load(f)
        print(f"✅ 成功讀取 metadata.json，將分析 {len(metadata)} 首歌曲。")
    except FileNotFoundError:
        print(f"❌ 錯誤：找不到 metadata.json 檔案。")
        return

    results_list = []
    for song_data in tqdm(metadata, desc="Evaluating Songs"):
        dir_name = song_data.get("dir_name")
        if not dir_name: continue

        song_dir = os.path.join(eval_dir, dir_name)
        
        for version in versions:
            midi_path = os.path.join(song_dir, f"{version}.mid")
            if not os.path.exists(midi_path): continue

            results = ipe_calculator.calculate_ipe(midi_path)
            if "error" not in results:
                results_list.append({
                    "song": dir_name,
                    "version": version,
                    "ipe_score": results["ipe_score"],
                    "entropy": results["shannon_entropy"]
                })

    # --- 3. 使用 Pandas 進行統計分析 ---
    if not results_list:
        print("未計算出任何有效分數。")
        return

    df = pd.DataFrame(results_list)
    
    print("\n\n--- 各版本 IPE 分數統計摘要 ---")
    # 根據平均分數進行排序
    summary = df.groupby('version')['ipe_score'].describe().sort_values('mean', ascending=False)
    print(summary)
    
    print("\n--- 各版本平均熵值 (與理想值 10.8956 比較) ---")
    mean_entropy = df.groupby('version')['entropy'].mean().sort_values(ascending=False)
    print(mean_entropy)

    # --- 4. 數據視覺化 ---
    print("\n🎨 正在生成分數分佈的箱形圖 (Box Plot)...")
    
    # 根據平均分對版本進行排序，讓圖表更清晰
    order = summary.index 
    
    plt.style.use('seaborn-v0_8-whitegrid')
    plt.figure(figsize=(12, 8))
    
    sns.boxplot(data=df, x='ipe_score', y='version', order=order, palette='viridis')
    
    plt.title('各版本 IPE 分數分佈比較', fontsize=18, pad=20)
    plt.xlabel('IPE Score (越高越好)', fontsize=14)
    plt.ylabel('版本 (Version)', fontsize=14)
    plt.xlim(-0.05, 1.05)
    plt.grid(axis='x', linestyle='--', alpha=0.7)
    
    output_image_path = "ipe_evaluation_results.png"
    plt.savefig(output_image_path, dpi=300, bbox_inches='tight')
    
    print(f"✅ 評估結果圖表已成功儲存至: {output_image_path}")


if __name__ == '__main__':
    evaluate_models_with_ipe()