## Import

In [1]:
import pytorch_lightning as pl
import sys 
sys.path.append('/app/')
RANDOM_SEED = 42
pl.seed_everything(seed=RANDOM_SEED)
print("done")

Global seed set to 42


done


# Train Segmentation model (Transformer) 

In [2]:
import logging
import torch
logging.disable(logging.CRITICAL)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

exp_args = {
    "encoder": "root-interval",
}

segmentation_train_args = {
    "seed": 42,
    # "test_mode": False,
    # "full_chord": False,
    # "disable_wandb": False,
    "wandb_run_name": "transformer_run_1",
    "out": "/app/segmentation_out",
    "source_input_dim": 3,
    "model_dim": 16,  # Transformer 内部使用的隐藏层维度
    "feedforward_dim": 32,  # Transformer 的前馈网络维度
    "num_classes": 14,  # 预测的类别数（section label 的类别数）
    "num_heads": 8,  # 多头注意力机制中头的数量
    "num_layers": 3,  # Transformer 模型中编码器和解码器的层数
    "decoder_max_length": 500,  # 解码器的最大序列长度
    "init_method": "xavier", # or orthogonal

    "max_epochs":150,
    "batch_size": 32,
    "device": device,  # 训练模型使用的设备
    "lr": 0.001,  # 学习率
    "warmup": 100,  # 学习率预热步数
    "factor": 0.5,
    "patience": 5,
    "max_iters": 1,  # 训练的最大迭代次数
    "dropout": 0.1,  # 在模型中应用的 dropout 比率
    "input_dropout": 0.1,  # 在输入特征上应用的 dropout 比率
}

print("done")

done


In [3]:
# Auto generate a Linux command
command_parts = ["python /app/tasks/segmentation/trainning_function/transformer_train.py"]

for arg, value in exp_args.items():
    command_parts.append(f"--{arg} {value}")

for arg, value in segmentation_train_args.items():
    command_parts.append(f"--{arg} {value}")

command = " ".join(command_parts)
print(command)

print("done!")



python /app/tasks/segmentation/trainning_function/transformer_train.py --encoder root-interval --seed 42 --wandb_run_name transformer_run_1 --out /app/segmentation_out --source_input_dim 3 --model_dim 16 --feedforward_dim 32 --num_classes 14 --num_heads 8 --num_layers 3 --decoder_max_length 500 --init_method xavier --max_epochs 150 --batch_size 32 --device cuda --lr 0.001 --warmup 100 --factor 0.5 --patience 5 --max_iters 1 --dropout 0.1 --input_dropout 0.1
done!


In [4]:
# Excecute the Linux command
!{command}
print("done!")

[34m[1mwandb[0m: Currently logged in as: [33mcretaceousmart[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: wandb version 0.16.0 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.15.12
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/app/tasks/segmentation/wandb/run-20231123_170237-t6nte5ns[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mtransformer_run_1[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/cretaceousmart/Segmentation_with_Transformer[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/cretaceousmart/Segmentation_with_Transformer/runs/t6nte5ns[0m
 67%|██████████████████████████▋             | 595/890 [00:00<00:00, 846.81it/s]Track 974 not parsable
100%|████████████████████████████████████████| 890/890 [00:01<00:00, 831.19it/s]
  rank_zero_warn(

### End2End evalutation (Need be run in a Madmom env)

In [1]:
from tqdm.notebook import tqdm

import pandas as pd
import numpy as np
import pytorch_lightning as pl
from scipy.spatial.distance import cosine

import sys
sys.path.insert(0, '/app')
from evaluate import load_pitchclass2vec_model

from collections import defaultdict
from itertools import groupby
import re
RANDOM_SEED = 42
pl.seed_everything(seed=RANDOM_SEED)

from madmom.features.chords import CNNChordFeatureProcessor, CRFChordRecognitionProcessor
from madmom.processors import SequentialProcessor

# 创建一个特征提取器实例
feature_processor = CNNChordFeatureProcessor()

# 创建一个和弦识别器实例
chord_recognizer = CRFChordRecognitionProcessor()

# 将两个处理器串联成一个序列处理器
sequential_processor = SequentialProcessor([feature_processor, chord_recognizer])


# 音频文件路径
audio_file = r"/app/jie_test_music/Drake - Passionfruit.mp3"
# 应用处理器到音频文件上，识别和弦
chords = sequential_processor(audio_file)   
print("done")

Global seed set to 42


done
done


In [2]:

from pitchclass2vec.pitchclass2vec import NaiveEmbeddingModel
import pitchclass2vec.encoding as encoding
import torch

def prepare_input_seq(chord_list,embedding_model):
    embedded_chords = list()
    for c in chord_list:
        try:
            embedded_chords.append(embedding_model[c])    
        except:
            embedded_chords.append(embedding_model["N"])
    return embedded_chords


chord_list = [e[2] for e in chords]
encoder = encoding.RootIntervalDataset
embedding_model = NaiveEmbeddingModel(encoder)

source = prepare_input_seq(chord_list=chord_list,embedding_model=embedding_model)
source = torch.tensor(source).clone().detach()

In [3]:
source.shape

torch.Size([117, 3])

In [19]:
# Load the pretrained Segmentation model
from tasks.segmentation.deeplearning_models.transformer import TransformerModel
model = TransformerModel.load_from_checkpoint(checkpoint_path='/app/segmentation_out/transformer_run_1.ckpt')
model.eval()  # 设置为评估模式
model.to('cuda')
print("done")

done


In [20]:
# 准备输入序列
source_seq = source.unsqueeze(0).to('cuda')

# 生成序列
predicted_seq = model.generate_sequence(source_seq)

In [30]:
label_to_int = {
      '<PAD>': 0,
      '<SOS>': 1,
      '<EOS>': 13,
      'bridge': 2,
      'chorus': 3,
      'instrumental': 4,
      'interlude': 5,
      'intro': 6,
      'other': 7,
      'outro': 8,
      'refrain': 9,
      'theme': 10,
      'transition': 11,
      'verse': 12
    }
int_to_label = {idx: label for label, idx in label_to_int.items()}

[int_to_label[e] for e in predicted_seq.squeeze(0).tolist()]

# int_to_label



['verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'verse',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'chorus',
 'ch