# Model Exploration

In [1]:
import os
import pickle
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
from dotenv import load_dotenv
from sklearn.preprocessing import StandardScaler

from evaluation.experiment import Experiment
from integrations.databricks.auth import setup_mlflow
from models.config import *
from models.setup import experiment_setup
from utils.dataset import Dataset

load_dotenv()

setup_mlflow()
mlflow_path = os.getenv('MLFLOW_MODEL_EXP')
warnings.simplefilter(action='ignore', category=UserWarning)

SEED = 42

In [2]:
DATA_PATH = Path(os.getenv('PROJECT_ROOT'), '.') / 'data'
EXPERIMENT_PATH = Path(os.getenv('PROJECT_ROOT'), '.') / 'experiments'
TRACK_DATASET_FILE = DATA_PATH / 'track_dataset_nk.pkl'

with open(TRACK_DATASET_FILE, 'rb') as file:
    track_dataset = pickle.load(file)

In [3]:
def scale_numeric_data(train_array: np.ndarray, test_array: np.ndarray, feature_names: list, scaled_feature_names: list = None) -> tuple:
    
    scaler = StandardScaler()
    train_df = pd.DataFrame(train_array, columns=feature_names)
    test_df = pd.DataFrame(test_array, columns=feature_names)
    
    if scaled_feature_names is None:
        scaled_feature_names = ['duration_ms', 'tempo', 'loudness']
    
    train_df[scaled_feature_names] = scaler.fit_transform(train_df[scaled_feature_names])
    test_df[scaled_feature_names] = scaler.transform(test_df[scaled_feature_names])
    
    return train_df.to_numpy(), test_df.to_numpy()

In [4]:
def build_cv_experiments(n_splits: int, dataset: Dataset, random_state=SEED):
    train_data, train_labels, _, test_data, test_labels, _ = dataset.split_data(cross_val=True, n_splits=n_splits, iterative=True, random_state=random_state, force_split=True)
    
    fold_experiments= []
    for i in range(n_splits):
        train_data[i], test_data[i] = scale_numeric_data(train_data[i], test_data[i], dataset.feature_names)    
        fold_characteristics = dataset.get_dataset_characteristics(i)
        fold_experiment = Experiment(train_data[i], train_labels[i], test_data[i], test_labels[i], fold_characteristics)
        fold_experiments.append(fold_experiment)
        
    return fold_experiments

In [5]:
def run_cv_experiments(experiments, experiment_configs, model_names, tags, save_models, log_results):
    for i, experiment in enumerate(experiments):
        fold_names = [model_name + f'_{i + 1}' for model_name in model_names]
        fold_tags = {model_name + f'_{i + 1}': tags[model_name] for model_name in model_names}
        for model_name in fold_tags:
            fold_tags[model_name]['Fold'] = f'{i+ 1} / {len(experiments)}'
        
        fold_models, fold_configs = experiment_setup(experiment_configs, fold_names)
        experiment.run_experiment(fold_models, fold_configs, mlflow_path, fold_tags, save_models=save_models, log_results=log_results)

In [6]:
baseline_names = ['br_baseline', 'cc_baseline', 'lp_baseline']
baseline_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10'
    }
    for name in baseline_names
}

In [6]:
N_SPLITS = 5

In [8]:
# baseline_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [9]:
# run_cv_experiments(baseline_experiments, baseline_experiment_configs, baseline_names, baseline_tags, save_models=True, log_results=True)

In [7]:
def save_experiments(experiments, filename: str):
    filename = filename + '.pkl' if filename[-4:] != '.pkl' else filename
    with open(EXPERIMENT_PATH / filename, 'wb') as f:
        pickle.dump(experiments, f)
        
def load_experiments(name: str):
    filename = name + '.pkl' if name[-4:] != '.pkl' else name
    with open(EXPERIMENT_PATH / filename, 'rb') as f:
        experiments = pickle.load(f)
    return experiments

In [11]:
# save_experiments(baseline_experiments, 'baseline_experiments')
baseline_experiments = load_experiments('baseline_experiments')

In [8]:
def average_cv_metrics(experiments, model_name_root):
    sum_metrics = None
    for i, experiment in enumerate(experiments):
        fold_model_name = model_name_root + f'_{i + 1}'
        fold_metrics = experiment.results[fold_model_name].metrics
        if sum_metrics is None:
            sum_metrics = fold_metrics.copy()
        else:
            for key in sum_metrics.keys():
                sum_metrics[key] += fold_metrics[key]
    
    return {key: float(f'{value / len(experiments):.3f}') for key, value in sum_metrics.items()}

In [13]:
print(average_cv_metrics(baseline_experiments, 'br_baseline'))

{'weighted_jaccard': 0.177, 'hamming_loss': 0.052, 'precision_micro_avg': 0.717, 'recall_micro_avg': 0.203, 'f1_micro_avg': 0.316, 'precision_macro_avg': 0.406, 'recall_macro_avg': 0.145, 'f1_macro_avg': 0.19, 'precision_weighted_avg': 0.527, 'recall_weighted_avg': 0.203, 'f1_weighted_avg': 0.263, 'precision_samples_avg': 0.255, 'recall_samples_avg': 0.24, 'f1_samples_avg': 0.24}


In [14]:
print(average_cv_metrics(baseline_experiments, 'cc_baseline'))

{'weighted_jaccard': 0.234, 'hamming_loss': 0.056, 'precision_micro_avg': 0.553, 'recall_micro_avg': 0.312, 'f1_micro_avg': 0.399, 'precision_macro_avg': 0.406, 'recall_macro_avg': 0.22, 'f1_macro_avg': 0.25, 'precision_weighted_avg': 0.512, 'recall_weighted_avg': 0.312, 'f1_weighted_avg': 0.344, 'precision_samples_avg': 0.407, 'recall_samples_avg': 0.35, 'f1_samples_avg': 0.364}


In [15]:
print(average_cv_metrics(baseline_experiments, 'lp_baseline'))

{'weighted_jaccard': 0.303, 'hamming_loss': 0.055, 'precision_micro_avg': 0.548, 'recall_micro_avg': 0.412, 'f1_micro_avg': 0.47, 'precision_macro_avg': 0.433, 'recall_macro_avg': 0.316, 'f1_macro_avg': 0.34, 'precision_weighted_avg': 0.509, 'recall_weighted_avg': 0.412, 'f1_weighted_avg': 0.439, 'precision_samples_avg': 0.551, 'recall_samples_avg': 0.466, 'f1_samples_avg': 0.49}


In [16]:
baseline_0_1_C_names = ['br_baseline_0_1_C', 'cc_baseline_0_1_C', 'lp_baseline_0_1_C']
baseline_0_1_C_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'C': '0.1'
    }
    for name in baseline_0_1_C_names
}

baseline_10_C_names = ['br_baseline_10_C', 'cc_baseline_10_C', 'lp_baseline_10_C']
baseline_10_C_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'C': '10'
    }
    for name in baseline_10_C_names
}

In [17]:
# baseline_0_1_C_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
# 
# baseline_10_C_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [18]:
# run_cv_experiments(baseline_0_1_C_experiments, baseline_0_1_C_experiment_configs, baseline_0_1_C_names, baseline_0_1_C_tags, save_models=True, log_results=True)

In [19]:
# run_cv_experiments(baseline_10_C_experiments, baseline_10_C_experiment_configs, baseline_10_C_names, baseline_10_C_tags, save_models=True, log_results=True)

In [20]:
# save_experiments(baseline_0_1_C_experiments, 'baseline_0_1_C_experiments')
# save_experiments(baseline_10_C_experiments, 'baseline_10_C_experiments')

baseline_0_1_C_experiments = load_experiments('baseline_0_1_C_experiments')
baseline_10_C_experiments = load_experiments('baseline_10_C_experiments')

In [21]:
print(average_cv_metrics(baseline_0_1_C_experiments, 'br_baseline_0_1_C'))

{'weighted_jaccard': 0.104, 'hamming_loss': 0.055, 'precision_micro_avg': 0.737, 'recall_micro_avg': 0.115, 'f1_micro_avg': 0.199, 'precision_macro_avg': 0.319, 'recall_macro_avg': 0.077, 'f1_macro_avg': 0.107, 'precision_weighted_avg': 0.472, 'recall_weighted_avg': 0.115, 'f1_weighted_avg': 0.16, 'precision_samples_avg': 0.148, 'recall_samples_avg': 0.141, 'f1_samples_avg': 0.14}


In [22]:
print(average_cv_metrics(baseline_0_1_C_experiments, 'cc_baseline_0_1_C'))

{'weighted_jaccard': 0.151, 'hamming_loss': 0.054, 'precision_micro_avg': 0.664, 'recall_micro_avg': 0.173, 'f1_micro_avg': 0.274, 'precision_macro_avg': 0.376, 'recall_macro_avg': 0.117, 'f1_macro_avg': 0.155, 'precision_weighted_avg': 0.527, 'recall_weighted_avg': 0.173, 'f1_weighted_avg': 0.231, 'precision_samples_avg': 0.23, 'recall_samples_avg': 0.203, 'f1_samples_avg': 0.21}


In [23]:
print(average_cv_metrics(baseline_0_1_C_experiments, 'lp_baseline_0_1_C'))

{'weighted_jaccard': 0.259, 'hamming_loss': 0.058, 'precision_micro_avg': 0.509, 'recall_micro_avg': 0.375, 'f1_micro_avg': 0.432, 'precision_macro_avg': 0.347, 'recall_macro_avg': 0.259, 'f1_macro_avg': 0.27, 'precision_weighted_avg': 0.445, 'recall_weighted_avg': 0.375, 'f1_weighted_avg': 0.382, 'precision_samples_avg': 0.509, 'recall_samples_avg': 0.422, 'f1_samples_avg': 0.448}


In [24]:
print(average_cv_metrics(baseline_10_C_experiments, 'br_baseline_10_C'))

{'weighted_jaccard': 0.195, 'hamming_loss': 0.051, 'precision_micro_avg': 0.705, 'recall_micro_avg': 0.226, 'f1_micro_avg': 0.342, 'precision_macro_avg': 0.43, 'recall_macro_avg': 0.169, 'f1_macro_avg': 0.216, 'precision_weighted_avg': 0.543, 'recall_weighted_avg': 0.226, 'f1_weighted_avg': 0.287, 'precision_samples_avg': 0.281, 'recall_samples_avg': 0.265, 'f1_samples_avg': 0.264}


In [25]:
print(average_cv_metrics(baseline_10_C_experiments, 'cc_baseline_10_C'))

{'weighted_jaccard': 0.245, 'hamming_loss': 0.063, 'precision_micro_avg': 0.456, 'recall_micro_avg': 0.343, 'f1_micro_avg': 0.391, 'precision_macro_avg': 0.387, 'recall_macro_avg': 0.27, 'f1_macro_avg': 0.266, 'precision_weighted_avg': 0.503, 'recall_weighted_avg': 0.343, 'f1_weighted_avg': 0.362, 'precision_samples_avg': 0.443, 'recall_samples_avg': 0.384, 'f1_samples_avg': 0.397}


In [26]:
print(average_cv_metrics(baseline_10_C_experiments, 'lp_baseline_10_C'))

{'weighted_jaccard': 0.315, 'hamming_loss': 0.054, 'precision_micro_avg': 0.553, 'recall_micro_avg': 0.425, 'f1_micro_avg': 0.481, 'precision_macro_avg': 0.438, 'recall_macro_avg': 0.342, 'f1_macro_avg': 0.368, 'precision_weighted_avg': 0.519, 'recall_weighted_avg': 0.425, 'f1_weighted_avg': 0.455, 'precision_samples_avg': 0.559, 'recall_samples_avg': 0.48, 'f1_samples_avg': 0.501}


In [27]:
baseline_100_C_names = ['br_baseline_100_C', 'cc_baseline_100_C', 'lp_baseline_100_C']
baseline_100_C_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'C': '100'
    }
    for name in baseline_100_C_names
}

In [28]:
# baseline_100_C_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [29]:
# run_cv_experiments(baseline_100_C_experiments, baseline_100_C_experiment_configs, baseline_100_C_names, baseline_100_C_tags, save_models=True, log_results=True)

In [30]:
# save_experiments(baseline_100_C_experiments, 'baseline_100_C_experiments')

baseline_100_C_experiments = load_experiments('baseline_100_C_experiments')

In [31]:
print(average_cv_metrics(baseline_100_C_experiments, 'br_baseline_100_C'))

{'weighted_jaccard': 0.197, 'hamming_loss': 0.051, 'precision_micro_avg': 0.699, 'recall_micro_avg': 0.23, 'f1_micro_avg': 0.346, 'precision_macro_avg': 0.451, 'recall_macro_avg': 0.173, 'f1_macro_avg': 0.221, 'precision_weighted_avg': 0.557, 'recall_weighted_avg': 0.23, 'f1_weighted_avg': 0.291, 'precision_samples_avg': 0.285, 'recall_samples_avg': 0.269, 'f1_samples_avg': 0.267}


In [32]:
print(average_cv_metrics(baseline_100_C_experiments, 'cc_baseline_100_C'))

{'weighted_jaccard': 0.247, 'hamming_loss': 0.064, 'precision_micro_avg': 0.448, 'recall_micro_avg': 0.346, 'f1_micro_avg': 0.39, 'precision_macro_avg': 0.384, 'recall_macro_avg': 0.278, 'f1_macro_avg': 0.266, 'precision_weighted_avg': 0.504, 'recall_weighted_avg': 0.346, 'f1_weighted_avg': 0.364, 'precision_samples_avg': 0.445, 'recall_samples_avg': 0.386, 'f1_samples_avg': 0.4}


In [33]:
print(average_cv_metrics(baseline_100_C_experiments, 'lp_baseline_100_C'))

{'weighted_jaccard': 0.318, 'hamming_loss': 0.054, 'precision_micro_avg': 0.55, 'recall_micro_avg': 0.429, 'f1_micro_avg': 0.482, 'precision_macro_avg': 0.44, 'recall_macro_avg': 0.348, 'f1_macro_avg': 0.373, 'precision_weighted_avg': 0.523, 'recall_weighted_avg': 0.429, 'f1_weighted_avg': 0.459, 'precision_samples_avg': 0.558, 'recall_samples_avg': 0.482, 'f1_samples_avg': 0.502}


In [34]:
baseline_saga_10_C_names = ['br_baseline_saga_10_C', 'cc_baseline_saga_10_C', 'lp_baseline_saga_10_C']
baseline_saga_10_C_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'C': '10',
        'solver': 'saga'
    }
    for name in baseline_saga_10_C_names
}

In [35]:
# baseline_saga_10_C_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [36]:
# run_cv_experiments(baseline_saga_10_C_experiments, baseline_saga_10_C_experiment_configs, baseline_saga_10_C_names, baseline_saga_10_C_tags, save_models=True, log_results=True)

In [9]:
# save_experiments(baseline_saga_10_C_experiments, 'baseline_saga_10_C_experiments')

baseline_saga_10_C_experiments = load_experiments('baseline_saga_10_C_experiments')

In [10]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'br_baseline_saga_10_C'))

{'weighted_jaccard': 0.195, 'hamming_loss': 0.051, 'precision_micro_avg': 0.703, 'recall_micro_avg': 0.227, 'f1_micro_avg': 0.343, 'precision_macro_avg': 0.43, 'recall_macro_avg': 0.168, 'f1_macro_avg': 0.215, 'precision_weighted_avg': 0.552, 'recall_weighted_avg': 0.227, 'f1_weighted_avg': 0.288, 'precision_samples_avg': 0.282, 'recall_samples_avg': 0.266, 'f1_samples_avg': 0.265}


In [11]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'cc_baseline_saga_10_C'))

{'weighted_jaccard': 0.247, 'hamming_loss': 0.063, 'precision_micro_avg': 0.457, 'recall_micro_avg': 0.344, 'f1_micro_avg': 0.393, 'precision_macro_avg': 0.385, 'recall_macro_avg': 0.27, 'f1_macro_avg': 0.265, 'precision_weighted_avg': 0.506, 'recall_weighted_avg': 0.344, 'f1_weighted_avg': 0.364, 'precision_samples_avg': 0.444, 'recall_samples_avg': 0.385, 'f1_samples_avg': 0.399}


In [12]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'lp_baseline_saga_10_C'))

{'weighted_jaccard': 0.317, 'hamming_loss': 0.054, 'precision_micro_avg': 0.555, 'recall_micro_avg': 0.426, 'f1_micro_avg': 0.482, 'precision_macro_avg': 0.445, 'recall_macro_avg': 0.342, 'f1_macro_avg': 0.369, 'precision_weighted_avg': 0.523, 'recall_weighted_avg': 0.426, 'f1_weighted_avg': 0.457, 'precision_samples_avg': 0.56, 'recall_samples_avg': 0.48, 'f1_samples_avg': 0.502}


In [36]:
baseline_linearsvc_names = ['br_baseline_linearsvc', 'cc_baseline_linearsvc', 'lp_baseline_linearsvc']
baseline_linearsvc_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10'
    }
    for name in baseline_linearsvc_names
}

baseline_random_forest_names = ['br_baseline_random_forest', 'cc_baseline_random_forest', 'lp_baseline_random_forest']
baseline_random_forest_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
    }
    for name in baseline_random_forest_names
}

baseline_gradient_boost_names = ['br_baseline_gradient_boost', 'cc_baseline_gradient_boost', 'lp_baseline_gradient_boost']
baseline_gradient_boost_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
    }
    for name in baseline_gradient_boost_names
}

In [44]:
# baseline_linearsvc_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
# baseline_random_forest_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
# baseline_gradient_boost_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [43]:
# run_cv_experiments(baseline_linearsvc_experiments, baseline_linearsvc_experiment_configs, baseline_linearsvc_names, baseline_linearsvc_tags, save_models=True, log_results=True)

In [44]:
# run_cv_experiments(baseline_random_forest_experiments, baseline_random_forest_experiment_configs, baseline_random_forest_names, baseline_random_forest_tags, save_models=False, log_results=True)

In [45]:
# run_cv_experiments(baseline_gradient_boost_experiments, baseline_gradient_boost_experiment_configs, baseline_gradient_boost_names, baseline_gradient_boost_tags, save_models=False, log_results=True)

In [43]:
# run_cv_experiments(baseline_gradient_boost_experiments, baseline_gradient_boost_experiment_configs, baseline_gradient_boost_names, baseline_gradient_boost_tags, save_models=False, log_results=True)

In [41]:
# save_experiments(baseline_linearsvc_experiments, 'baseline_linearsvc_experiments')
# save_experiments(baseline_random_forest_experiments, 'baseline_random_forest_experiments')
# save_experiments(baseline_gradient_boost_experiments, 'baseline_gradient_boost_experiments')

baseline_linearsvc_experiments = load_experiments('baseline_linearsvc_experiments')
baseline_random_forest_experiments = load_experiments('baseline_random_forest_experiments')
baseline_gradient_boost_experiments = load_experiments('baseline_gradient_boost_experiments')

In [47]:
print(average_cv_metrics(baseline_linearsvc_experiments, 'br_baseline_linearsvc'))

{'weighted_jaccard': 0.157, 'hamming_loss': 0.052, 'precision_micro_avg': 0.749, 'recall_micro_avg': 0.179, 'f1_micro_avg': 0.289, 'precision_macro_avg': 0.322, 'recall_macro_avg': 0.124, 'f1_macro_avg': 0.161, 'precision_weighted_avg': 0.466, 'recall_weighted_avg': 0.179, 'f1_weighted_avg': 0.23, 'precision_samples_avg': 0.232, 'recall_samples_avg': 0.215, 'f1_samples_avg': 0.218}


In [48]:
print(average_cv_metrics(baseline_linearsvc_experiments, 'cc_baseline_linearsvc'))

{'weighted_jaccard': 0.209, 'hamming_loss': 0.067, 'precision_micro_avg': 0.408, 'recall_micro_avg': 0.305, 'f1_micro_avg': 0.349, 'precision_macro_avg': 0.301, 'recall_macro_avg': 0.241, 'f1_macro_avg': 0.212, 'precision_weighted_avg': 0.457, 'recall_weighted_avg': 0.305, 'f1_weighted_avg': 0.308, 'precision_samples_avg': 0.406, 'recall_samples_avg': 0.345, 'f1_samples_avg': 0.361}


In [49]:
print(average_cv_metrics(baseline_linearsvc_experiments, 'lp_baseline_linearsvc'))

{'weighted_jaccard': 0.275, 'hamming_loss': 0.057, 'precision_micro_avg': 0.527, 'recall_micro_avg': 0.39, 'f1_micro_avg': 0.449, 'precision_macro_avg': 0.361, 'recall_macro_avg': 0.287, 'f1_macro_avg': 0.29, 'precision_weighted_avg': 0.47, 'recall_weighted_avg': 0.39, 'f1_weighted_avg': 0.4, 'precision_samples_avg': 0.529, 'recall_samples_avg': 0.442, 'f1_samples_avg': 0.468}


In [50]:
print(average_cv_metrics(baseline_random_forest_experiments, 'br_baseline_random_forest'))

{'weighted_jaccard': 0.306, 'hamming_loss': 0.044, 'precision_micro_avg': 0.802, 'recall_micro_avg': 0.339, 'f1_micro_avg': 0.476, 'precision_macro_avg': 0.717, 'recall_macro_avg': 0.241, 'f1_macro_avg': 0.323, 'precision_weighted_avg': 0.781, 'recall_weighted_avg': 0.339, 'f1_weighted_avg': 0.438, 'precision_samples_avg': 0.425, 'recall_samples_avg': 0.379, 'f1_samples_avg': 0.391}


In [51]:
print(average_cv_metrics(baseline_random_forest_experiments, 'cc_baseline_random_forest'))

{'weighted_jaccard': 0.327, 'hamming_loss': 0.045, 'precision_micro_avg': 0.743, 'recall_micro_avg': 0.375, 'f1_micro_avg': 0.499, 'precision_macro_avg': 0.68, 'recall_macro_avg': 0.263, 'f1_macro_avg': 0.34, 'precision_weighted_avg': 0.728, 'recall_weighted_avg': 0.375, 'f1_weighted_avg': 0.465, 'precision_samples_avg': 0.476, 'recall_samples_avg': 0.418, 'f1_samples_avg': 0.434}


In [52]:
print(average_cv_metrics(baseline_random_forest_experiments, 'lp_baseline_random_forest'))

{'weighted_jaccard': 0.375, 'hamming_loss': 0.048, 'precision_micro_avg': 0.614, 'recall_micro_avg': 0.491, 'f1_micro_avg': 0.545, 'precision_macro_avg': 0.559, 'recall_macro_avg': 0.408, 'f1_macro_avg': 0.448, 'precision_weighted_avg': 0.603, 'recall_weighted_avg': 0.491, 'f1_weighted_avg': 0.527, 'precision_samples_avg': 0.613, 'recall_samples_avg': 0.544, 'f1_samples_avg': 0.561}


In [51]:
print(average_cv_metrics(baseline_gradient_boost_experiments, 'br_baseline_gradient_boost'))

{'weighted_jaccard': 0.295, 'hamming_loss': 0.047, 'precision_micro_avg': 0.712, 'recall_micro_avg': 0.341, 'f1_micro_avg': 0.461, 'precision_macro_avg': 0.566, 'recall_macro_avg': 0.268, 'f1_macro_avg': 0.337, 'precision_weighted_avg': 0.661, 'recall_weighted_avg': 0.341, 'f1_weighted_avg': 0.424, 'precision_samples_avg': 0.427, 'recall_samples_avg': 0.389, 'f1_samples_avg': 0.394}


In [52]:
print(average_cv_metrics(baseline_gradient_boost_experiments, 'cc_baseline_gradient_boost'))

{'weighted_jaccard': 0.313, 'hamming_loss': 0.048, 'precision_micro_avg': 0.668, 'recall_micro_avg': 0.374, 'f1_micro_avg': 0.479, 'precision_macro_avg': 0.533, 'recall_macro_avg': 0.287, 'f1_macro_avg': 0.35, 'precision_weighted_avg': 0.627, 'recall_weighted_avg': 0.374, 'f1_weighted_avg': 0.45, 'precision_samples_avg': 0.473, 'recall_samples_avg': 0.423, 'f1_samples_avg': 0.433}


In [53]:
print(average_cv_metrics(baseline_gradient_boost_experiments, 'lp_baseline_gradient_boost'))

{'weighted_jaccard': 0.312, 'hamming_loss': 0.062, 'precision_micro_avg': 0.476, 'recall_micro_avg': 0.447, 'f1_micro_avg': 0.461, 'precision_macro_avg': 0.4, 'recall_macro_avg': 0.365, 'f1_macro_avg': 0.376, 'precision_weighted_avg': 0.476, 'recall_weighted_avg': 0.447, 'f1_weighted_avg': 0.458, 'precision_samples_avg': 0.532, 'recall_samples_avg': 0.492, 'f1_samples_avg': 0.493}


In [9]:
baseline_rf_50_est_names = ['br_baseline_rf_50_est', 'cc_baseline_rf_50_est', 'lp_baseline_rf_50_est']
baseline_rf_50_est_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'n_estimators': '50'
    }
    for name in baseline_rf_50_est_names
}

baseline_rf_15_depth_names = ['br_baseline_rf_15_depth', 'cc_baseline_rf_15_depth', 'lp_baseline_rf_15_depth']
baseline_rf_15_depth_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'max_depth': '15'
    }
    for name in baseline_rf_15_depth_names
}

baseline_rf_50_est_15_depth_names = ['br_baseline_rf_50_est_15_depth', 'cc_baseline_rf_50_est_15_depth', 'lp_baseline_rf_50_est_15_depth']
baseline_rf_50_est_15_depth_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'n_estimators': '50',
        'max_depth': '15'
    }
    for name in baseline_rf_50_est_15_depth_names
}

In [13]:
# baseline_rf_50_est_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
# baseline_rf_15_depth_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
# baseline_rf_50_est_15_depth_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [14]:
# run_cv_experiments(baseline_rf_50_est_experiments, baseline_rf_50_est_experiment_configs, baseline_rf_50_est_names, baseline_rf_50_est_tags, save_models=False, log_results=True)

In [15]:
# run_cv_experiments(baseline_rf_15_depth_experiments, baseline_rf_15_depth_experiment_configs, baseline_rf_15_depth_names, baseline_rf_15_depth_tags, save_models=False, log_results=True)

In [16]:
# run_cv_experiments(baseline_rf_50_est_15_depth_experiments, baseline_rf_50_est_15_depth_experiment_configs, baseline_rf_50_est_15_depth_names, baseline_rf_50_est_15_depth_tags, save_models=False, log_results=True)

In [14]:
# save_experiments(baseline_rf_50_est_experiments, 'baseline_rf_50_est_experiments')
# save_experiments(baseline_rf_15_depth_experiments, 'baseline_rf_15_depth_experiments')
# save_experiments(baseline_rf_50_est_15_depth_experiments, 'baseline_rf_50_est_15_depth_experiments')

baseline_rf_50_est_experiments = load_experiments('baseline_rf_50_est_experiments')
baseline_rf_15_depth_experiments = load_experiments('baseline_rf_15_depth_experiments')
baseline_rf_50_est_15_depth_experiments = load_experiments('baseline_rf_50_est_15_depth_experiments')

In [16]:
print(average_cv_metrics(baseline_rf_50_est_experiments, 'br_baseline_rf_50_est'))

{'weighted_jaccard': 0.305, 'hamming_loss': 0.044, 'precision_micro_avg': 0.793, 'recall_micro_avg': 0.338, 'f1_micro_avg': 0.474, 'precision_macro_avg': 0.706, 'recall_macro_avg': 0.241, 'f1_macro_avg': 0.323, 'precision_weighted_avg': 0.77, 'recall_weighted_avg': 0.338, 'f1_weighted_avg': 0.437, 'precision_samples_avg': 0.424, 'recall_samples_avg': 0.378, 'f1_samples_avg': 0.39}


In [17]:
print(average_cv_metrics(baseline_rf_50_est_experiments, 'cc_baseline_rf_50_est'))

{'weighted_jaccard': 0.327, 'hamming_loss': 0.045, 'precision_micro_avg': 0.742, 'recall_micro_avg': 0.375, 'f1_micro_avg': 0.498, 'precision_macro_avg': 0.657, 'recall_macro_avg': 0.26, 'f1_macro_avg': 0.336, 'precision_weighted_avg': 0.716, 'recall_weighted_avg': 0.375, 'f1_weighted_avg': 0.464, 'precision_samples_avg': 0.474, 'recall_samples_avg': 0.417, 'f1_samples_avg': 0.432}


In [18]:
print(average_cv_metrics(baseline_rf_50_est_experiments, 'lp_baseline_rf_50_est'))

{'weighted_jaccard': 0.372, 'hamming_loss': 0.049, 'precision_micro_avg': 0.607, 'recall_micro_avg': 0.487, 'f1_micro_avg': 0.54, 'precision_macro_avg': 0.551, 'recall_macro_avg': 0.399, 'f1_macro_avg': 0.44, 'precision_weighted_avg': 0.595, 'recall_weighted_avg': 0.487, 'f1_weighted_avg': 0.523, 'precision_samples_avg': 0.607, 'recall_samples_avg': 0.54, 'f1_samples_avg': 0.557}


In [19]:
print(average_cv_metrics(baseline_rf_15_depth_experiments, 'br_baseline_rf_15_depth'))

{'weighted_jaccard': 0.296, 'hamming_loss': 0.045, 'precision_micro_avg': 0.795, 'recall_micro_avg': 0.328, 'f1_micro_avg': 0.464, 'precision_macro_avg': 0.674, 'recall_macro_avg': 0.232, 'f1_macro_avg': 0.308, 'precision_weighted_avg': 0.762, 'recall_weighted_avg': 0.328, 'f1_weighted_avg': 0.423, 'precision_samples_avg': 0.419, 'recall_samples_avg': 0.372, 'f1_samples_avg': 0.384}


In [20]:
print(average_cv_metrics(baseline_rf_15_depth_experiments, 'cc_baseline_rf_15_depth'))

{'weighted_jaccard': 0.316, 'hamming_loss': 0.045, 'precision_micro_avg': 0.751, 'recall_micro_avg': 0.36, 'f1_micro_avg': 0.486, 'precision_macro_avg': 0.642, 'recall_macro_avg': 0.249, 'f1_macro_avg': 0.321, 'precision_weighted_avg': 0.714, 'recall_weighted_avg': 0.36, 'f1_weighted_avg': 0.449, 'precision_samples_avg': 0.465, 'recall_samples_avg': 0.405, 'f1_samples_avg': 0.421}


In [21]:
print(average_cv_metrics(baseline_rf_15_depth_experiments, 'lp_baseline_rf_15_depth'))

{'weighted_jaccard': 0.366, 'hamming_loss': 0.049, 'precision_micro_avg': 0.613, 'recall_micro_avg': 0.478, 'f1_micro_avg': 0.537, 'precision_macro_avg': 0.566, 'recall_macro_avg': 0.391, 'f1_macro_avg': 0.432, 'precision_weighted_avg': 0.604, 'recall_weighted_avg': 0.478, 'f1_weighted_avg': 0.516, 'precision_samples_avg': 0.612, 'recall_samples_avg': 0.535, 'f1_samples_avg': 0.556}


In [22]:
print(average_cv_metrics(baseline_rf_50_est_15_depth_experiments, 'br_baseline_rf_50_est_15_depth'))

{'weighted_jaccard': 0.299, 'hamming_loss': 0.045, 'precision_micro_avg': 0.789, 'recall_micro_avg': 0.333, 'f1_micro_avg': 0.468, 'precision_macro_avg': 0.699, 'recall_macro_avg': 0.236, 'f1_macro_avg': 0.315, 'precision_weighted_avg': 0.763, 'recall_weighted_avg': 0.333, 'f1_weighted_avg': 0.428, 'precision_samples_avg': 0.424, 'recall_samples_avg': 0.376, 'f1_samples_avg': 0.388}


In [23]:
print(average_cv_metrics(baseline_rf_50_est_15_depth_experiments, 'cc_baseline_rf_50_est_15_depth'))

{'weighted_jaccard': 0.315, 'hamming_loss': 0.045, 'precision_micro_avg': 0.742, 'recall_micro_avg': 0.36, 'f1_micro_avg': 0.485, 'precision_macro_avg': 0.654, 'recall_macro_avg': 0.253, 'f1_macro_avg': 0.327, 'precision_weighted_avg': 0.712, 'recall_weighted_avg': 0.36, 'f1_weighted_avg': 0.449, 'precision_samples_avg': 0.464, 'recall_samples_avg': 0.406, 'f1_samples_avg': 0.421}


In [24]:
print(average_cv_metrics(baseline_rf_50_est_15_depth_experiments, 'lp_baseline_rf_50_est_15_depth'))

{'weighted_jaccard': 0.364, 'hamming_loss': 0.049, 'precision_micro_avg': 0.607, 'recall_micro_avg': 0.477, 'f1_micro_avg': 0.534, 'precision_macro_avg': 0.534, 'recall_macro_avg': 0.388, 'f1_macro_avg': 0.427, 'precision_weighted_avg': 0.595, 'recall_weighted_avg': 0.477, 'f1_weighted_avg': 0.514, 'precision_samples_avg': 0.606, 'recall_samples_avg': 0.531, 'f1_samples_avg': 0.551}


In [17]:
baseline_rf_10_est_15_depth_names = ['br_baseline_rf_10_est_15_depth', 'cc_baseline_rf_10_est_15_depth', 'lp_baseline_rf_10_est_15_depth']
baseline_rf_10_est_15_depth_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'n_estimators': '10',
        'max_depth': '15'
    }
    for name in baseline_rf_10_est_15_depth_names
}

baseline_rf_50_est_10_depth_names = ['br_baseline_rf_50_est_10_depth', 'cc_baseline_rf_50_est_10_depth', 'lp_baseline_rf_50_est_10_depth']
baseline_rf_50_est_10_depth_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'n_estimators': '50',
        'max_depth': '10'
    }
    for name in baseline_rf_50_est_10_depth_names
}

baseline_rf_10_est_10_depth_names = ['br_baseline_rf_10_est_10_depth', 'cc_baseline_rf_10_est_10_depth', 'lp_baseline_rf_10_est_10_depth']
baseline_rf_10_est_10_depth_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'n_estimators': '10',
        'max_depth': '10'
    }
    for name in baseline_rf_10_est_10_depth_names
}

In [47]:
# baseline_rf_10_est_15_depth_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
# baseline_rf_50_est_10_depth_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
# baseline_rf_10_est_10_depth_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [48]:
# run_cv_experiments(baseline_rf_10_est_15_depth_experiments, baseline_rf_10_est_15_depth_experiment_configs, baseline_rf_10_est_15_depth_names, baseline_rf_10_est_15_depth_tags, save_models=False, log_results=True)

In [49]:
# run_cv_experiments(baseline_rf_50_est_10_depth_experiments, baseline_rf_50_est_10_depth_experiment_configs, baseline_rf_50_est_10_depth_names, baseline_rf_50_est_10_depth_tags, save_models=False, log_results=True)

In [50]:
# run_cv_experiments(baseline_rf_10_est_10_depth_experiments, baseline_rf_10_est_10_depth_experiment_configs, baseline_rf_10_est_10_depth_names, baseline_rf_10_est_10_depth_tags, save_models=False, log_results=True)

In [28]:
# save_experiments(baseline_rf_10_est_15_depth_experiments, 'baseline_rf_10_est_15_depth_experiments')
# save_experiments(baseline_rf_50_est_10_depth_experiments, 'baseline_rf_50_est_10_depth_experiments')
# save_experiments(baseline_rf_10_est_10_depth_experiments, 'baseline_rf_10_est_10_depth_experiments')

baseline_rf_10_est_15_depth_experiments = load_experiments('baseline_rf_10_est_15_depth_experiments')
baseline_rf_50_est_10_depth_experiments = load_experiments('baseline_rf_50_est_10_depth_experiments')
baseline_rf_10_est_10_depth_experiments = load_experiments('baseline_rf_10_est_10_depth_experiments')

In [23]:
print(average_cv_metrics(baseline_rf_10_est_15_depth_experiments, 'br_baseline_rf_10_est_15_depth'))

{'weighted_jaccard': 0.29, 'hamming_loss': 0.047, 'precision_micro_avg': 0.718, 'recall_micro_avg': 0.332, 'f1_micro_avg': 0.454, 'precision_macro_avg': 0.635, 'recall_macro_avg': 0.235, 'f1_macro_avg': 0.313, 'precision_weighted_avg': 0.681, 'recall_weighted_avg': 0.332, 'f1_weighted_avg': 0.423, 'precision_samples_avg': 0.413, 'recall_samples_avg': 0.371, 'f1_samples_avg': 0.379}


In [24]:
print(average_cv_metrics(baseline_rf_10_est_15_depth_experiments, 'cc_baseline_rf_10_est_15_depth'))

{'weighted_jaccard': 0.304, 'hamming_loss': 0.048, 'precision_micro_avg': 0.685, 'recall_micro_avg': 0.357, 'f1_micro_avg': 0.469, 'precision_macro_avg': 0.633, 'recall_macro_avg': 0.256, 'f1_macro_avg': 0.332, 'precision_weighted_avg': 0.657, 'recall_weighted_avg': 0.357, 'f1_weighted_avg': 0.441, 'precision_samples_avg': 0.45, 'recall_samples_avg': 0.399, 'f1_samples_avg': 0.41}


In [25]:
print(average_cv_metrics(baseline_rf_10_est_15_depth_experiments, 'lp_baseline_rf_10_est_15_depth'))

{'weighted_jaccard': 0.339, 'hamming_loss': 0.054, 'precision_micro_avg': 0.554, 'recall_micro_avg': 0.458, 'f1_micro_avg': 0.501, 'precision_macro_avg': 0.455, 'recall_macro_avg': 0.362, 'f1_macro_avg': 0.392, 'precision_weighted_avg': 0.536, 'recall_weighted_avg': 0.458, 'f1_weighted_avg': 0.487, 'precision_samples_avg': 0.563, 'recall_samples_avg': 0.504, 'f1_samples_avg': 0.517}


In [30]:
print(average_cv_metrics(baseline_rf_50_est_10_depth_experiments, 'br_baseline_rf_50_est_10_depth'))

{'weighted_jaccard': 0.257, 'hamming_loss': 0.046, 'precision_micro_avg': 0.802, 'recall_micro_avg': 0.284, 'f1_micro_avg': 0.419, 'precision_macro_avg': 0.674, 'recall_macro_avg': 0.202, 'f1_macro_avg': 0.266, 'precision_weighted_avg': 0.753, 'recall_weighted_avg': 0.284, 'f1_weighted_avg': 0.366, 'precision_samples_avg': 0.371, 'recall_samples_avg': 0.33, 'f1_samples_avg': 0.341}


In [31]:
print(average_cv_metrics(baseline_rf_50_est_10_depth_experiments, 'cc_baseline_rf_50_est_10_depth'))

{'weighted_jaccard': 0.26, 'hamming_loss': 0.047, 'precision_micro_avg': 0.787, 'recall_micro_avg': 0.289, 'f1_micro_avg': 0.422, 'precision_macro_avg': 0.641, 'recall_macro_avg': 0.203, 'f1_macro_avg': 0.268, 'precision_weighted_avg': 0.722, 'recall_weighted_avg': 0.289, 'f1_weighted_avg': 0.373, 'precision_samples_avg': 0.385, 'recall_samples_avg': 0.336, 'f1_samples_avg': 0.35}


In [32]:
print(average_cv_metrics(baseline_rf_50_est_10_depth_experiments, 'lp_baseline_rf_50_est_10_depth'))

{'weighted_jaccard': 0.327, 'hamming_loss': 0.052, 'precision_micro_avg': 0.58, 'recall_micro_avg': 0.437, 'f1_micro_avg': 0.498, 'precision_macro_avg': 0.514, 'recall_macro_avg': 0.331, 'f1_macro_avg': 0.363, 'precision_weighted_avg': 0.574, 'recall_weighted_avg': 0.437, 'f1_weighted_avg': 0.465, 'precision_samples_avg': 0.58, 'recall_samples_avg': 0.497, 'f1_samples_avg': 0.521}


In [33]:
print(average_cv_metrics(baseline_rf_10_est_10_depth_experiments, 'br_baseline_rf_10_est_10_depth'))

{'weighted_jaccard': 0.262, 'hamming_loss': 0.047, 'precision_micro_avg': 0.754, 'recall_micro_avg': 0.296, 'f1_micro_avg': 0.425, 'precision_macro_avg': 0.665, 'recall_macro_avg': 0.216, 'f1_macro_avg': 0.287, 'precision_weighted_avg': 0.714, 'recall_weighted_avg': 0.296, 'f1_weighted_avg': 0.379, 'precision_samples_avg': 0.38, 'recall_samples_avg': 0.341, 'f1_samples_avg': 0.349}


In [34]:
print(average_cv_metrics(baseline_rf_10_est_10_depth_experiments, 'cc_baseline_rf_10_est_10_depth'))

{'weighted_jaccard': 0.263, 'hamming_loss': 0.048, 'precision_micro_avg': 0.737, 'recall_micro_avg': 0.3, 'f1_micro_avg': 0.426, 'precision_macro_avg': 0.621, 'recall_macro_avg': 0.207, 'f1_macro_avg': 0.271, 'precision_weighted_avg': 0.686, 'recall_weighted_avg': 0.3, 'f1_weighted_avg': 0.381, 'precision_samples_avg': 0.393, 'recall_samples_avg': 0.342, 'f1_samples_avg': 0.356}


In [35]:
print(average_cv_metrics(baseline_rf_10_est_10_depth_experiments, 'lp_baseline_rf_10_est_10_depth'))

{'weighted_jaccard': 0.315, 'hamming_loss': 0.054, 'precision_micro_avg': 0.552, 'recall_micro_avg': 0.426, 'f1_micro_avg': 0.481, 'precision_macro_avg': 0.472, 'recall_macro_avg': 0.325, 'f1_macro_avg': 0.355, 'precision_weighted_avg': 0.525, 'recall_weighted_avg': 0.426, 'f1_weighted_avg': 0.454, 'precision_samples_avg': 0.559, 'recall_samples_avg': 0.481, 'f1_samples_avg': 0.502}


In [21]:
baseline_gb_0_05_lr_200_est_names = ['br_baseline_gb_0_05_lr_200_est_depth', 'cc_baseline_gb_0_05_lr_200_est_depth', 'lp_baseline_gb_0_05_lr_200_est_depth']
baseline_gb_0_05_lr_200_est_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'learning_rate': '0.05',
        'n_estimators': '200'
    }
    for name in baseline_gb_0_05_lr_200_est_names
}

baseline_gb_0_2_lr_50_est_names = ['br_baseline_gb_0_2_lr_50_est_depth', 'cc_baseline_gb_0_2_lr_50_est_depth', 'lp_baseline_gb_0_2_lr_50_est_depth']
baseline_gb_0_2_lr_50_est_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'learning_rate': '0.2',
        'n_estimators': '50'
    }
    for name in baseline_gb_0_2_lr_50_est_names
}

In [9]:
# baseline_gb_0_05_lr_200_est_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
# baseline_gb_0_2_lr_50_est_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [10]:
# run_cv_experiments(baseline_gb_0_05_lr_200_est_experiments, baseline_gb_0_05_lr_200_est_experiment_configs, baseline_gb_0_05_lr_200_est_names, baseline_gb_0_05_lr_200_est_tags, save_models=False, log_results=True)

In [11]:
# run_cv_experiments(baseline_gb_0_2_lr_50_est_experiments, baseline_gb_0_2_lr_50_est_experiment_configs, baseline_gb_0_2_lr_50_est_names, baseline_gb_0_2_lr_50_est_tags, save_models=False, log_results=True)

In [24]:
# save_experiments(baseline_gb_0_05_lr_200_est_experiments, 'baseline_gb_0_05_lr_200_est_experiments')
# save_experiments(baseline_gb_0_2_lr_50_est_experiments, 'baseline_gb_0_2_lr_50_est_experiments')

baseline_gb_0_05_lr_200_est_experiments = load_experiments('baseline_gb_0_05_lr_200_est_experiments')
baseline_gb_0_2_lr_50_est_experiments = load_experiments('baseline_gb_0_2_lr_50_est_experiments')

In [20]:
print(average_cv_metrics(baseline_gb_0_05_lr_200_est_experiments, 'br_baseline_gb_0_05_lr_200_est_depth'))

{'weighted_jaccard': 0.292, 'hamming_loss': 0.047, 'precision_micro_avg': 0.729, 'recall_micro_avg': 0.336, 'f1_micro_avg': 0.46, 'precision_macro_avg': 0.62, 'recall_macro_avg': 0.264, 'f1_macro_avg': 0.336, 'precision_weighted_avg': 0.685, 'recall_weighted_avg': 0.336, 'f1_weighted_avg': 0.42, 'precision_samples_avg': 0.423, 'recall_samples_avg': 0.386, 'f1_samples_avg': 0.391}


In [15]:
print(average_cv_metrics(baseline_gb_0_05_lr_200_est_experiments, 'cc_baseline_gb_0_05_lr_200_est_depth'))

{'weighted_jaccard': 0.31, 'hamming_loss': 0.048, 'precision_micro_avg': 0.675, 'recall_micro_avg': 0.369, 'f1_micro_avg': 0.477, 'precision_macro_avg': 0.571, 'recall_macro_avg': 0.286, 'f1_macro_avg': 0.353, 'precision_weighted_avg': 0.638, 'recall_weighted_avg': 0.369, 'f1_weighted_avg': 0.445, 'precision_samples_avg': 0.47, 'recall_samples_avg': 0.419, 'f1_samples_avg': 0.43}


In [16]:
print(average_cv_metrics(baseline_gb_0_05_lr_200_est_experiments, 'lp_baseline_gb_0_05_lr_200_est_depth'))

{'weighted_jaccard': 0.33, 'hamming_loss': 0.058, 'precision_micro_avg': 0.507, 'recall_micro_avg': 0.463, 'f1_micro_avg': 0.484, 'precision_macro_avg': 0.432, 'recall_macro_avg': 0.371, 'f1_macro_avg': 0.391, 'precision_weighted_avg': 0.505, 'recall_weighted_avg': 0.463, 'f1_weighted_avg': 0.478, 'precision_samples_avg': 0.558, 'recall_samples_avg': 0.511, 'f1_samples_avg': 0.515}


In [25]:
print(average_cv_metrics(baseline_gb_0_2_lr_50_est_experiments, 'br_baseline_gb_0_2_lr_50_est_depth'))

{'weighted_jaccard': 0.3, 'hamming_loss': 0.048, 'precision_micro_avg': 0.691, 'recall_micro_avg': 0.35, 'f1_micro_avg': 0.464, 'precision_macro_avg': 0.527, 'recall_macro_avg': 0.271, 'f1_macro_avg': 0.339, 'precision_weighted_avg': 0.641, 'recall_weighted_avg': 0.35, 'f1_weighted_avg': 0.432, 'precision_samples_avg': 0.43, 'recall_samples_avg': 0.396, 'f1_samples_avg': 0.398}


In [26]:
print(average_cv_metrics(baseline_gb_0_2_lr_50_est_experiments, 'cc_baseline_gb_0_2_lr_50_est_depth'))

{'weighted_jaccard': 0.312, 'hamming_loss': 0.049, 'precision_micro_avg': 0.643, 'recall_micro_avg': 0.377, 'f1_micro_avg': 0.476, 'precision_macro_avg': 0.5, 'recall_macro_avg': 0.288, 'f1_macro_avg': 0.347, 'precision_weighted_avg': 0.607, 'recall_weighted_avg': 0.377, 'f1_weighted_avg': 0.449, 'precision_samples_avg': 0.472, 'recall_samples_avg': 0.426, 'f1_samples_avg': 0.433}


In [27]:
print(average_cv_metrics(baseline_gb_0_2_lr_50_est_experiments, 'lp_baseline_gb_0_2_lr_50_est_depth'))

{'weighted_jaccard': 0.11, 'hamming_loss': 0.118, 'precision_micro_avg': 0.176, 'recall_micro_avg': 0.226, 'f1_micro_avg': 0.195, 'precision_macro_avg': 0.141, 'recall_macro_avg': 0.163, 'f1_macro_avg': 0.13, 'precision_weighted_avg': 0.189, 'recall_weighted_avg': 0.226, 'f1_weighted_avg': 0.183, 'precision_samples_avg': 0.191, 'recall_samples_avg': 0.227, 'f1_samples_avg': 0.195}


In [12]:
baseline_gb_0_05_lr_500_est_names = ['br_baseline_gb_0_05_lr_500_est', 'cc_baseline_gb_0_05_lr_500_est', 'lp_baseline_gb_0_05_lr_500_est']
baseline_gb_0_05_lr_500_est_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'learning_rate': '0.05',
        'n_estimators': '500'
    }
    for name in baseline_gb_0_05_lr_500_est_names
}

baseline_gb_0_03_lr_500_est_names = ['br_baseline_gb_0_03_lr_500_est', 'cc_baseline_gb_0_03_lr_500_est', 'lp_baseline_gb_0_03_lr_500_est']
baseline_gb_0_03_lr_500_est_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'learning_rate': '0.03',
        'n_estimators': '500'
    }
    for name in baseline_gb_0_03_lr_500_est_names
}

baseline_gb_0_01_lr_500_est_names = ['br_baseline_gb_0_01_lr_500_est', 'cc_baseline_gb_0_01_lr_500_est', 'lp_baseline_gb_0_01_lr_500_est']
baseline_gb_0_01_lr_500_est_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'learning_rate': '0.01',
        'n_estimators': '500'
    }
    for name in baseline_gb_0_01_lr_500_est_names
}

In [29]:
# baseline_gb_0_05_lr_500_est_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
# baseline_gb_0_03_lr_500_est_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
# baseline_gb_0_01_lr_500_est_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [30]:
# run_cv_experiments(baseline_gb_0_05_lr_500_est_experiments, baseline_gb_0_05_lr_500_est_experiment_configs, baseline_gb_0_05_lr_500_est_names, baseline_gb_0_05_lr_500_est_tags, save_models=False, log_results=True)

In [31]:
# run_cv_experiments(baseline_gb_0_03_lr_500_est_experiments, baseline_gb_0_03_lr_500_est_experiment_configs, baseline_gb_0_03_lr_500_est_names, baseline_gb_0_03_lr_500_est_tags, save_models=False, log_results=True)

In [32]:
# run_cv_experiments(baseline_gb_0_01_lr_500_est_experiments, baseline_gb_0_01_lr_500_est_experiment_configs, baseline_gb_0_01_lr_500_est_names, baseline_gb_0_01_lr_500_est_tags, save_models=False, log_results=True)

In [17]:
# save_experiments(baseline_gb_0_05_lr_500_est_experiments, 'baseline_gb_0_05_lr_500_est_experiments')
# save_experiments(baseline_gb_0_03_lr_500_est_experiments, 'baseline_gb_0_03_lr_500_est_experiments')
# save_experiments(baseline_gb_0_01_lr_500_est_experiments, 'baseline_gb_0_01_lr_500_est_experiments')

baseline_gb_0_05_lr_500_est_experiments = load_experiments('baseline_gb_0_05_lr_500_est_experiments')
baseline_gb_0_03_lr_500_est_experiments = load_experiments('baseline_gb_0_03_lr_500_est_experiments')
baseline_gb_0_01_lr_500_est_experiments = load_experiments('baseline_gb_0_01_lr_500_est_experiments')

In [18]:
print(average_cv_metrics(baseline_gb_0_05_lr_500_est_experiments, 'br_baseline_gb_0_05_lr_500_est'))

{'weighted_jaccard': 0.316, 'hamming_loss': 0.046, 'precision_micro_avg': 0.704, 'recall_micro_avg': 0.369, 'f1_micro_avg': 0.484, 'precision_macro_avg': 0.576, 'recall_macro_avg': 0.285, 'f1_macro_avg': 0.357, 'precision_weighted_avg': 0.655, 'recall_weighted_avg': 0.369, 'f1_weighted_avg': 0.451, 'precision_samples_avg': 0.454, 'recall_samples_avg': 0.418, 'f1_samples_avg': 0.421}


In [19]:
print(average_cv_metrics(baseline_gb_0_05_lr_500_est_experiments, 'cc_baseline_gb_0_05_lr_500_est'))

{'weighted_jaccard': 0.34, 'hamming_loss': 0.048, 'precision_micro_avg': 0.636, 'recall_micro_avg': 0.422, 'f1_micro_avg': 0.507, 'precision_macro_avg': 0.552, 'recall_macro_avg': 0.334, 'f1_macro_avg': 0.392, 'precision_weighted_avg': 0.616, 'recall_weighted_avg': 0.422, 'f1_weighted_avg': 0.483, 'precision_samples_avg': 0.529, 'recall_samples_avg': 0.472, 'f1_samples_avg': 0.484}


In [20]:
print(average_cv_metrics(baseline_gb_0_05_lr_500_est_experiments, 'lp_baseline_gb_0_05_lr_500_est'))

{'weighted_jaccard': 0.288, 'hamming_loss': 0.08, 'precision_micro_avg': 0.443, 'recall_micro_avg': 0.436, 'f1_micro_avg': 0.431, 'precision_macro_avg': 0.359, 'recall_macro_avg': 0.345, 'f1_macro_avg': 0.335, 'precision_weighted_avg': 0.43, 'recall_weighted_avg': 0.436, 'f1_weighted_avg': 0.419, 'precision_samples_avg': 0.478, 'recall_samples_avg': 0.471, 'f1_samples_avg': 0.451}


In [23]:
print(average_cv_metrics(baseline_gb_0_03_lr_500_est_experiments, 'br_baseline_gb_0_03_lr_500_est'))

{'weighted_jaccard': 0.305, 'hamming_loss': 0.046, 'precision_micro_avg': 0.725, 'recall_micro_avg': 0.352, 'f1_micro_avg': 0.474, 'precision_macro_avg': 0.612, 'recall_macro_avg': 0.275, 'f1_macro_avg': 0.351, 'precision_weighted_avg': 0.674, 'recall_weighted_avg': 0.352, 'f1_weighted_avg': 0.437, 'precision_samples_avg': 0.439, 'recall_samples_avg': 0.4, 'f1_samples_avg': 0.406}


In [24]:
print(average_cv_metrics(baseline_gb_0_03_lr_500_est_experiments, 'cc_baseline_gb_0_03_lr_500_est'))

{'weighted_jaccard': 0.327, 'hamming_loss': 0.048, 'precision_micro_avg': 0.66, 'recall_micro_avg': 0.396, 'f1_micro_avg': 0.495, 'precision_macro_avg': 0.582, 'recall_macro_avg': 0.31, 'f1_macro_avg': 0.378, 'precision_weighted_avg': 0.633, 'recall_weighted_avg': 0.396, 'f1_weighted_avg': 0.467, 'precision_samples_avg': 0.5, 'recall_samples_avg': 0.445, 'f1_samples_avg': 0.457}


In [25]:
print(average_cv_metrics(baseline_gb_0_03_lr_500_est_experiments, 'lp_baseline_gb_0_03_lr_500_est'))

{'weighted_jaccard': 0.347, 'hamming_loss': 0.055, 'precision_micro_avg': 0.54, 'recall_micro_avg': 0.476, 'f1_micro_avg': 0.506, 'precision_macro_avg': 0.463, 'recall_macro_avg': 0.394, 'f1_macro_avg': 0.42, 'precision_weighted_avg': 0.528, 'recall_weighted_avg': 0.476, 'f1_weighted_avg': 0.498, 'precision_samples_avg': 0.577, 'recall_samples_avg': 0.524, 'f1_samples_avg': 0.532}


In [26]:
print(average_cv_metrics(baseline_gb_0_01_lr_500_est_experiments, 'br_baseline_gb_0_01_lr_500_est'))

{'weighted_jaccard': 0.254, 'hamming_loss': 0.047, 'precision_micro_avg': 0.76, 'recall_micro_avg': 0.287, 'f1_micro_avg': 0.417, 'precision_macro_avg': 0.623, 'recall_macro_avg': 0.227, 'f1_macro_avg': 0.297, 'precision_weighted_avg': 0.696, 'recall_weighted_avg': 0.287, 'f1_weighted_avg': 0.368, 'precision_samples_avg': 0.37, 'recall_samples_avg': 0.335, 'f1_samples_avg': 0.341}


In [27]:
print(average_cv_metrics(baseline_gb_0_01_lr_500_est_experiments, 'cc_baseline_gb_0_01_lr_500_est'))

{'weighted_jaccard': 0.267, 'hamming_loss': 0.048, 'precision_micro_avg': 0.727, 'recall_micro_avg': 0.306, 'f1_micro_avg': 0.431, 'precision_macro_avg': 0.604, 'recall_macro_avg': 0.237, 'f1_macro_avg': 0.305, 'precision_weighted_avg': 0.668, 'recall_weighted_avg': 0.306, 'f1_weighted_avg': 0.388, 'precision_samples_avg': 0.399, 'recall_samples_avg': 0.353, 'f1_samples_avg': 0.365}


In [28]:
print(average_cv_metrics(baseline_gb_0_01_lr_500_est_experiments, 'lp_baseline_gb_0_01_lr_500_est'))

{'weighted_jaccard': 0.339, 'hamming_loss': 0.055, 'precision_micro_avg': 0.539, 'recall_micro_avg': 0.465, 'f1_micro_avg': 0.499, 'precision_macro_avg': 0.455, 'recall_macro_avg': 0.386, 'f1_macro_avg': 0.411, 'precision_weighted_avg': 0.527, 'recall_weighted_avg': 0.465, 'f1_weighted_avg': 0.489, 'precision_samples_avg': 0.572, 'recall_samples_avg': 0.514, 'f1_samples_avg': 0.524}


In [9]:
non_transform_names = ['decision_tree_default', 'random_forest_default', 'knn_default', 'mlknn_default']
non_transform_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys', 'All params default']
    }
    for name in non_transform_names
}

In [10]:
# non_transform_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

Dataset has already been split, proceeding will overwrite split data.
`force_split`=True, Proceeding with new split.


In [11]:
# run_cv_experiments(non_transform_experiments, non_transform_experiment_configs, non_transform_names, non_transform_tags, save_models=False, log_results=True)

Running model: decision_tree_default_1
decision_tree_default_1 complete.
Running model: random_forest_default_1
random_forest_default_1 complete.
Running model: knn_default_1
knn_default_1 complete.
Running model: mlknn_default_1
mlknn_default_1 complete.
Running model: decision_tree_default_2
decision_tree_default_2 complete.
Running model: random_forest_default_2
random_forest_default_2 complete.
Running model: knn_default_2
knn_default_2 complete.
Running model: mlknn_default_2
mlknn_default_2 complete.
Running model: decision_tree_default_3
decision_tree_default_3 complete.
Running model: random_forest_default_3
random_forest_default_3 complete.
Running model: knn_default_3
knn_default_3 complete.
Running model: mlknn_default_3
mlknn_default_3 complete.
Running model: decision_tree_default_4
decision_tree_default_4 complete.
Running model: random_forest_default_4
random_forest_default_4 complete.
Running model: knn_default_4
knn_default_4 complete.
Running model: mlknn_default_4
ml

In [12]:
# save_experiments(non_transform_experiments, 'non_transform_experiments')

non_transform_experiments = load_experiments('non_transform_experiments')

In [13]:
print(average_cv_metrics(non_transform_experiments, 'decision_tree_default'))

{'weighted_jaccard': 0.28, 'hamming_loss': 0.069, 'precision_micro_avg': 0.418, 'recall_micro_avg': 0.427, 'f1_micro_avg': 0.422, 'precision_macro_avg': 0.343, 'recall_macro_avg': 0.345, 'f1_macro_avg': 0.342, 'precision_weighted_avg': 0.421, 'recall_weighted_avg': 0.427, 'f1_weighted_avg': 0.423, 'precision_samples_avg': 0.451, 'recall_samples_avg': 0.456, 'f1_samples_avg': 0.435}


In [14]:
print(average_cv_metrics(non_transform_experiments, 'random_forest_default'))

{'weighted_jaccard': 0.291, 'hamming_loss': 0.044, 'precision_micro_avg': 0.821, 'recall_micro_avg': 0.319, 'f1_micro_avg': 0.46, 'precision_macro_avg': 0.718, 'recall_macro_avg': 0.215, 'f1_macro_avg': 0.285, 'precision_weighted_avg': 0.801, 'recall_weighted_avg': 0.319, 'f1_weighted_avg': 0.415, 'precision_samples_avg': 0.406, 'recall_samples_avg': 0.36, 'f1_samples_avg': 0.372}


In [15]:
print(average_cv_metrics(non_transform_experiments, 'knn_default'))

{'weighted_jaccard': 0.241, 'hamming_loss': 0.055, 'precision_micro_avg': 0.563, 'recall_micro_avg': 0.299, 'f1_micro_avg': 0.39, 'precision_macro_avg': 0.477, 'recall_macro_avg': 0.228, 'f1_macro_avg': 0.289, 'precision_weighted_avg': 0.521, 'recall_weighted_avg': 0.299, 'f1_weighted_avg': 0.368, 'precision_samples_avg': 0.369, 'recall_samples_avg': 0.333, 'f1_samples_avg': 0.338}


In [16]:
print(average_cv_metrics(non_transform_experiments, 'mlknn_default'))

{'weighted_jaccard': 0.232, 'hamming_loss': 0.054, 'precision_micro_avg': 0.597, 'recall_micro_avg': 0.28, 'f1_micro_avg': 0.381, 'precision_macro_avg': 0.504, 'recall_macro_avg': 0.213, 'f1_macro_avg': 0.275, 'precision_weighted_avg': 0.539, 'recall_weighted_avg': 0.28, 'f1_weighted_avg': 0.353, 'precision_samples_avg': 0.354, 'recall_samples_avg': 0.318, 'f1_samples_avg': 0.324}
