In [None]:
import os
from typing import List
from tqdm import tqdm
from corpus import extract

def batch_extract(root_dir: str, path_model: str) -> None:
    tasks: List[str] = []
    for dirpath, _, filenames in os.walk(root_dir):
        if 'origin.wav' in filenames and 'extract.json' not in filenames:
            tasks.append(dirpath)

    for dirpath in tqdm(tasks, desc="Processing origin.wav", unit="folder"):
        path_input = os.path.join(dirpath, 'origin.wav')
        path_output_json = os.path.join(dirpath, 'extract.json')
        extract(path_input, path_output_json, "", path_model)

ROOT = "dataset/synced/"
MODEL = "checkpoint/extractor/9.pth"
batch_extract(ROOT, MODEL)

In [None]:
import os
from typing import List
from tqdm import tqdm
from corpus.tempo import TempoInfoGenerator

def batch_generate_tempo(root_dir: str) -> None:
    tasks: List[str] = []
    for dirpath, _, filenames in os.walk(root_dir):
        if 'beat_pred.json' in filenames and 'tempo.json' not in filenames:
            tasks.append(dirpath)

    for dirpath in tqdm(tasks, desc="Generating tempo info", unit="folder"):
        path_beat_pred   = os.path.join(dirpath, "beat_pred.json")
        path_tempo_output= os.path.join(dirpath, "tempo.json")

        tg = TempoInfoGenerator(path_beat_pred, verbose=False)
        tg.generate_tempo_info(path_tempo_output)


ROOT_DIR = "dataset/synced/"
batch_generate_tempo(ROOT_DIR)

In [None]:
import json
import os
import yt_dlp

# --- 設定基本路徑 ---
# 假設此腳本與 'dataset' 資料夾在同一目錄下
base_dir = os.path.join(".", "dataset", "eval")
metadata_path = os.path.join(base_dir, "metadata.json")

# --- 讀取 metadata.json ---
try:
    with open(metadata_path, 'r', encoding='utf-8') as f:
        metadata = json.load(f)
    print(f"成功讀取 {metadata_path}，共找到 {len(metadata)} 首歌曲。")
except FileNotFoundError:
    print(f"錯誤：找不到 metadata.json 檔案，請確認路徑 '{metadata_path}' 是否正確。")
    exit()
except json.JSONDecodeError:
    print(f"錯誤：{metadata_path} 的 JSON 格式不正確，請檢查檔案內容。")
    exit()

# --- 遍歷每首歌曲並下載 ---
for i, song_data in enumerate(metadata):
    try:
        dir_name = song_data["dir_name"]
        song_name = song_data["song_name"]
        url = song_data["cover_url"] # 使用 cover_url 進行下載

        print(f"\n--- 進度: {i+1}/{len(metadata)} ---")
        print(f"檢查中: {song_name}")

        # 1. 建立目標目錄和檔案的路徑
        output_dir = os.path.join(base_dir, dir_name)
        target_file_path = os.path.join(output_dir, "cover.wav")

        # 2. 【新增的檢查】確認目標檔案是否已存在
        if os.path.exists(target_file_path):
            print(f"檔案已存在，跳過下載: {target_file_path}")
            continue  # 跳到迴圈的下一個項目

        # 如果檔案不存在，則繼續執行下載流程
        print("目標檔案不存在，準備下載...")
        
        # 3. 建立輸出目錄 (如果它還不存在)
        os.makedirs(output_dir, exist_ok=True)

        # 4. 設定 yt_dlp 參數
        # 將輸出模板動態指向對應的目錄，並命名為 cover
        output_template = os.path.join(output_dir, "cover")

        ydl_opts = {
            "format": "bestaudio/best",
            "outtmpl": {"default": output_template},
            "postprocessors": [
                {
                    "key": "FFmpegExtractAudio",
                    "preferredcodec": "wav",
                    "preferredquality": "192",
                }
            ],
            "ignoreerrors": True,
            "overwrites": True, # 雖然有檢查，但保留此選項以防萬一
        }

        # 5. 執行下載
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            print(f"正在從 {url} 下載音訊...")
            ydl.download([url])
            print(f"成功儲存音訊至: {target_file_path}")

    except KeyError as e:
        print(f"警告：第 {i+1} 筆資料缺少必要的鍵 {e}，已跳過。")
    except Exception as e:
        print(f"處理第 {i+1} 筆資料時發生未知錯誤：{e}")


print("\n--- 所有檢查與下載任務已完成 ---")

In [None]:
from corpus import extract

extract("origin.wav", "extract.json", "extract.mid", "checkpoint/extractor/9.pth")

In [None]:
from utils.midi_tool import json_to_midi

json_to_midi("extract.json", "extract.mid")

In [None]:
from midi_player import MIDIPlayer
from midi_player.stylers import basic

midi_file = "extract.mid"

MIDIPlayer(url_or_file=midi_file, height=600, styler=basic, title='My Player')