In [None]:
from tqdm.notebook import tqdm

import pandas as pd
import numpy as np

import pytorch_lightning as pl
from pitchclass2vec import encoding, model
from pitchclass2vec.pitchclass2vec import Pitchclass2VecModel
from tasks.segmentation.data import BillboardDataset, SegmentationDataModule
from tasks.segmentation.functional import LSTMBaselineModel

from evaluate import load_pitchclass2vec_model

RANDOM_SEED = 42
pl.seed_everything(seed=RANDOM_SEED)
print("done")

In [30]:
# https://jams.readthedocs.io/en/stable/generated/jams.load.html
# len(jam.annotations) 为 3: ['chord_harte', 'key_mode', 'timesig']

import jams
path = "/app/choco_dataset/v1.0.0/jams/ireal-pro_1941.jams"
jam = jams.load(path,validate=False)
namespaces = [ str(a.namespace) for a in jam.annotations ]
chord_namespace = "chord_harte" if "chord_harte" in namespaces else "chord"

target_annotation_idx = namespaces.index(chord_namespace)
annotation = jam.annotations[target_annotation_idx]



## Train

In [None]:
import os

import pitchclass2vec.model as model
import pitchclass2vec.encoding as encoding
from pitchclass2vec.data import ChocoDataModule

train_args = {
    'choco_arg': "/app/choco_dataset/v1.0.0/",
    'out_arg': "/app/out",
    'encoding_arg': "root-interval",
    'model_arg': "fasttext"
}

import os
# 设置环境变量，选择选项3（不适用WandB账户）
os.environ["WANDB_MODE"] = "disabled"

print("done")

In [None]:
choco_arg = train_args['choco_arg']
out_arg = train_args['out_arg']
encoding_arg = train_args['encoding_arg']
model_arg = train_args['model_arg']

command = f"python /app/train.py --choco {choco_arg} --out {out_arg} --encoding {encoding_arg} --model {model_arg}"
print(command)


In [None]:
!{command}
print("done")

# Segmentation baseline

In [None]:
EXP = [
    #("text", "fasttext", "out/fasttext_best/model.ckpt"),
    ("timed-root-interval", "emb-weighted-fasttext", "out/rootinterval_best/model.ckpt"),
    #("rdf", "randomwalk-rdf2vec", "out/rdf2vec_best/model.ckpt"),
]

  
experiments_df = pd.DataFrame(columns=[
    "encoding", "model", "path", "test_p_precision", "test_p_recall",  "test_p_f1",  "test_under",  "test_over",  "test_under_over_f1"
])

In [None]:
import logging
logging.disable(logging.CRITICAL)


for exp in tqdm(EXP):    
    p2v = load_pitchclass2vec_model(*exp)
    data = SegmentationDataModule(BillboardDataset, p2v, 256)
        
    lstm_model = LSTMBaselineModel(embedding_dim=p2v.vector_size, hidden_size=256, num_layers=5, dropout=0.2, learning_rate=0.001)
    trainer = pl.Trainer(max_epochs=150, accelerator="auto", devices=1,
                         enable_progress_bar=False)
    trainer.fit(lstm_model, data)
    test_metrics = trainer.test(lstm_model, data)
    experiments_df = experiments_df.append({
        "encoding": exp[0], "model": exp[1], "path": exp[2],
        **test_metrics[0]
    }, ignore_index=True)

In [None]:
experiments_df