# Model Exploration

In [1]:
import os
import pickle
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
from dotenv import load_dotenv
from sklearn.preprocessing import StandardScaler

from evaluation.experiment import Experiment
from integrations.databricks.auth import setup_mlflow
from models.config import *
from models.setup import experiment_setup
from utils.dataset import Dataset

load_dotenv()

setup_mlflow()
mlflow_path = os.getenv('MLFLOW_MODEL_EXP')
warnings.simplefilter(action='ignore', category=UserWarning)

SEED = 42

In [2]:
DATA_PATH = Path(os.getenv('PROJECT_ROOT'), '.') / 'data'
EXPERIMENT_PATH = Path(os.getenv('PROJECT_ROOT'), '.') / 'experiments'
TRACK_DATASET_FILE = DATA_PATH / 'track_dataset_nk.pkl'

with open(TRACK_DATASET_FILE, 'rb') as file:
    track_dataset = pickle.load(file)

In [3]:
def scale_numeric_data(train_array: np.ndarray, test_array: np.ndarray, feature_names: list, scaled_feature_names: list = None) -> tuple:
    
    scaler = StandardScaler()
    train_df = pd.DataFrame(train_array, columns=feature_names)
    test_df = pd.DataFrame(test_array, columns=feature_names)
    
    if scaled_feature_names is None:
        scaled_feature_names = ['duration_ms', 'tempo', 'loudness']
    
    train_df[scaled_feature_names] = scaler.fit_transform(train_df[scaled_feature_names])
    test_df[scaled_feature_names] = scaler.transform(test_df[scaled_feature_names])
    
    return train_df.to_numpy(), test_df.to_numpy()

In [4]:
def build_cv_experiments(n_splits: int, dataset: Dataset, random_state=SEED):
    train_data, train_labels, _, test_data, test_labels, _ = dataset.split_data(cross_val=True, n_splits=n_splits, iterative=True, random_state=random_state, force_split=True)
    
    fold_experiments= []
    for i in range(n_splits):
        train_data[i], test_data[i] = scale_numeric_data(train_data[i], test_data[i], dataset.feature_names)    
        fold_characteristics = dataset.get_dataset_characteristics(i)
        fold_experiment = Experiment(train_data[i], train_labels[i], test_data[i], test_labels[i], fold_characteristics)
        fold_experiments.append(fold_experiment)
        
    return fold_experiments

In [5]:
def run_cv_experiments(experiments, experiment_configs, model_names, tags, save_models, log_results):
    for i, experiment in enumerate(experiments):
        fold_names = [model_name + f'_{i + 1}' for model_name in model_names]
        fold_tags = {model_name + f'_{i + 1}': tags[model_name] for model_name in model_names}
        for model_name in fold_tags:
            fold_tags[model_name]['Fold'] = f'{i+ 1} / {len(experiments)}'
        
        fold_models, fold_configs = experiment_setup(experiment_configs, fold_names)
        experiment.run_experiment(fold_models, fold_configs, mlflow_path, fold_tags, save_models=save_models, log_results=log_results)

In [6]:
baseline_names = ['br_baseline', 'cc_baseline', 'lp_baseline']
baseline_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10'
    }
    for name in baseline_names
}

In [7]:
N_SPLITS = 5

In [8]:
# baseline_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [9]:
# run_cv_experiments(baseline_experiments, baseline_experiment_configs, baseline_names, baseline_tags, save_models=True, log_results=True)

In [10]:
def save_experiments(experiments, filename: str):
    filename = filename + '.pkl' if filename[-4:] != '.pkl' else filename
    with open(EXPERIMENT_PATH / filename, 'wb') as f:
        pickle.dump(experiments, f)
        
def load_experiments(name: str):
    filename = name + '.pkl' if name[-4:] != '.pkl' else name
    with open(EXPERIMENT_PATH / filename, 'rb') as f:
        experiments = pickle.load(f)
    return experiments

In [11]:
# save_experiments(baseline_experiments, 'baseline_experiments')
baseline_experiments = load_experiments('baseline_experiments')

In [12]:
def average_cv_metrics(experiments, model_name_root):
    sum_metrics = None
    for i, experiment in enumerate(experiments):
        fold_model_name = model_name_root + f'_{i + 1}'
        fold_metrics = experiment.results[fold_model_name].metrics
        if sum_metrics is None:
            sum_metrics = fold_metrics.copy()
        else:
            for key in sum_metrics.keys():
                sum_metrics[key] += fold_metrics[key]
    
    return {key: float(f'{value / len(experiments):.3f}') for key, value in sum_metrics.items()}

In [13]:
print(average_cv_metrics(baseline_experiments, 'br_baseline'))

{'weighted_jaccard': 0.177, 'hamming_loss': 0.052, 'precision_micro_avg': 0.717, 'recall_micro_avg': 0.203, 'f1_micro_avg': 0.316, 'precision_macro_avg': 0.406, 'recall_macro_avg': 0.145, 'f1_macro_avg': 0.19, 'precision_weighted_avg': 0.527, 'recall_weighted_avg': 0.203, 'f1_weighted_avg': 0.263, 'precision_samples_avg': 0.255, 'recall_samples_avg': 0.24, 'f1_samples_avg': 0.24}


In [14]:
print(average_cv_metrics(baseline_experiments, 'cc_baseline'))

{'weighted_jaccard': 0.234, 'hamming_loss': 0.056, 'precision_micro_avg': 0.553, 'recall_micro_avg': 0.312, 'f1_micro_avg': 0.399, 'precision_macro_avg': 0.406, 'recall_macro_avg': 0.22, 'f1_macro_avg': 0.25, 'precision_weighted_avg': 0.512, 'recall_weighted_avg': 0.312, 'f1_weighted_avg': 0.344, 'precision_samples_avg': 0.407, 'recall_samples_avg': 0.35, 'f1_samples_avg': 0.364}


In [15]:
print(average_cv_metrics(baseline_experiments, 'lp_baseline'))

{'weighted_jaccard': 0.303, 'hamming_loss': 0.055, 'precision_micro_avg': 0.548, 'recall_micro_avg': 0.412, 'f1_micro_avg': 0.47, 'precision_macro_avg': 0.433, 'recall_macro_avg': 0.316, 'f1_macro_avg': 0.34, 'precision_weighted_avg': 0.509, 'recall_weighted_avg': 0.412, 'f1_weighted_avg': 0.439, 'precision_samples_avg': 0.551, 'recall_samples_avg': 0.466, 'f1_samples_avg': 0.49}


In [16]:
baseline_0_1_C_names = ['br_baseline_0_1_C', 'cc_baseline_0_1_C', 'lp_baseline_0_1_C']
baseline_0_1_C_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'C': '0.1'
    }
    for name in baseline_0_1_C_names
}

baseline_10_C_names = ['br_baseline_10_C', 'cc_baseline_10_C', 'lp_baseline_10_C']
baseline_10_C_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'C': '10'
    }
    for name in baseline_10_C_names
}

In [17]:
# baseline_0_1_C_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
# 
# baseline_10_C_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [18]:
# run_cv_experiments(baseline_0_1_C_experiments, baseline_0_1_C_experiment_configs, baseline_0_1_C_names, baseline_0_1_C_tags, save_models=True, log_results=True)

In [19]:
# run_cv_experiments(baseline_10_C_experiments, baseline_10_C_experiment_configs, baseline_10_C_names, baseline_10_C_tags, save_models=True, log_results=True)

In [20]:
# save_experiments(baseline_0_1_C_experiments, 'baseline_0_1_C_experiments')
# save_experiments(baseline_10_C_experiments, 'baseline_10_C_experiments')

baseline_0_1_C_experiments = load_experiments('baseline_0_1_C_experiments')
baseline_10_C_experiments = load_experiments('baseline_10_C_experiments')

In [21]:
print(average_cv_metrics(baseline_0_1_C_experiments, 'br_baseline_0_1_C'))

{'weighted_jaccard': 0.104, 'hamming_loss': 0.055, 'precision_micro_avg': 0.737, 'recall_micro_avg': 0.115, 'f1_micro_avg': 0.199, 'precision_macro_avg': 0.319, 'recall_macro_avg': 0.077, 'f1_macro_avg': 0.107, 'precision_weighted_avg': 0.472, 'recall_weighted_avg': 0.115, 'f1_weighted_avg': 0.16, 'precision_samples_avg': 0.148, 'recall_samples_avg': 0.141, 'f1_samples_avg': 0.14}


In [22]:
print(average_cv_metrics(baseline_0_1_C_experiments, 'cc_baseline_0_1_C'))

{'weighted_jaccard': 0.151, 'hamming_loss': 0.054, 'precision_micro_avg': 0.664, 'recall_micro_avg': 0.173, 'f1_micro_avg': 0.274, 'precision_macro_avg': 0.376, 'recall_macro_avg': 0.117, 'f1_macro_avg': 0.155, 'precision_weighted_avg': 0.527, 'recall_weighted_avg': 0.173, 'f1_weighted_avg': 0.231, 'precision_samples_avg': 0.23, 'recall_samples_avg': 0.203, 'f1_samples_avg': 0.21}


In [23]:
print(average_cv_metrics(baseline_0_1_C_experiments, 'lp_baseline_0_1_C'))

{'weighted_jaccard': 0.259, 'hamming_loss': 0.058, 'precision_micro_avg': 0.509, 'recall_micro_avg': 0.375, 'f1_micro_avg': 0.432, 'precision_macro_avg': 0.347, 'recall_macro_avg': 0.259, 'f1_macro_avg': 0.27, 'precision_weighted_avg': 0.445, 'recall_weighted_avg': 0.375, 'f1_weighted_avg': 0.382, 'precision_samples_avg': 0.509, 'recall_samples_avg': 0.422, 'f1_samples_avg': 0.448}


In [24]:
print(average_cv_metrics(baseline_10_C_experiments, 'br_baseline_10_C'))

{'weighted_jaccard': 0.195, 'hamming_loss': 0.051, 'precision_micro_avg': 0.705, 'recall_micro_avg': 0.226, 'f1_micro_avg': 0.342, 'precision_macro_avg': 0.43, 'recall_macro_avg': 0.169, 'f1_macro_avg': 0.216, 'precision_weighted_avg': 0.543, 'recall_weighted_avg': 0.226, 'f1_weighted_avg': 0.287, 'precision_samples_avg': 0.281, 'recall_samples_avg': 0.265, 'f1_samples_avg': 0.264}


In [25]:
print(average_cv_metrics(baseline_10_C_experiments, 'cc_baseline_10_C'))

{'weighted_jaccard': 0.245, 'hamming_loss': 0.063, 'precision_micro_avg': 0.456, 'recall_micro_avg': 0.343, 'f1_micro_avg': 0.391, 'precision_macro_avg': 0.387, 'recall_macro_avg': 0.27, 'f1_macro_avg': 0.266, 'precision_weighted_avg': 0.503, 'recall_weighted_avg': 0.343, 'f1_weighted_avg': 0.362, 'precision_samples_avg': 0.443, 'recall_samples_avg': 0.384, 'f1_samples_avg': 0.397}


In [26]:
print(average_cv_metrics(baseline_10_C_experiments, 'lp_baseline_10_C'))

{'weighted_jaccard': 0.315, 'hamming_loss': 0.054, 'precision_micro_avg': 0.553, 'recall_micro_avg': 0.425, 'f1_micro_avg': 0.481, 'precision_macro_avg': 0.438, 'recall_macro_avg': 0.342, 'f1_macro_avg': 0.368, 'precision_weighted_avg': 0.519, 'recall_weighted_avg': 0.425, 'f1_weighted_avg': 0.455, 'precision_samples_avg': 0.559, 'recall_samples_avg': 0.48, 'f1_samples_avg': 0.501}


In [27]:
baseline_100_C_names = ['br_baseline_100_C', 'cc_baseline_100_C', 'lp_baseline_100_C']
baseline_100_C_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'C': '100'
    }
    for name in baseline_100_C_names
}

In [28]:
# baseline_100_C_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [29]:
# run_cv_experiments(baseline_100_C_experiments, baseline_100_C_experiment_configs, baseline_100_C_names, baseline_100_C_tags, save_models=True, log_results=True)

In [30]:
# save_experiments(baseline_100_C_experiments, 'baseline_100_C_experiments')

baseline_100_C_experiments = load_experiments('baseline_100_C_experiments')

In [31]:
print(average_cv_metrics(baseline_100_C_experiments, 'br_baseline_100_C'))

{'weighted_jaccard': 0.197, 'hamming_loss': 0.051, 'precision_micro_avg': 0.699, 'recall_micro_avg': 0.23, 'f1_micro_avg': 0.346, 'precision_macro_avg': 0.451, 'recall_macro_avg': 0.173, 'f1_macro_avg': 0.221, 'precision_weighted_avg': 0.557, 'recall_weighted_avg': 0.23, 'f1_weighted_avg': 0.291, 'precision_samples_avg': 0.285, 'recall_samples_avg': 0.269, 'f1_samples_avg': 0.267}


In [32]:
print(average_cv_metrics(baseline_100_C_experiments, 'cc_baseline_100_C'))

{'weighted_jaccard': 0.247, 'hamming_loss': 0.064, 'precision_micro_avg': 0.448, 'recall_micro_avg': 0.346, 'f1_micro_avg': 0.39, 'precision_macro_avg': 0.384, 'recall_macro_avg': 0.278, 'f1_macro_avg': 0.266, 'precision_weighted_avg': 0.504, 'recall_weighted_avg': 0.346, 'f1_weighted_avg': 0.364, 'precision_samples_avg': 0.445, 'recall_samples_avg': 0.386, 'f1_samples_avg': 0.4}


In [33]:
print(average_cv_metrics(baseline_100_C_experiments, 'lp_baseline_100_C'))

{'weighted_jaccard': 0.318, 'hamming_loss': 0.054, 'precision_micro_avg': 0.55, 'recall_micro_avg': 0.429, 'f1_micro_avg': 0.482, 'precision_macro_avg': 0.44, 'recall_macro_avg': 0.348, 'f1_macro_avg': 0.373, 'precision_weighted_avg': 0.523, 'recall_weighted_avg': 0.429, 'f1_weighted_avg': 0.459, 'precision_samples_avg': 0.558, 'recall_samples_avg': 0.482, 'f1_samples_avg': 0.502}


In [34]:
baseline_saga_10_C_names = ['br_baseline_saga_10_C', 'cc_baseline_saga_10_C', 'lp_baseline_saga_10_C']
baseline_saga_10_C_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
        'C': '10',
        'solver': 'saga'
    }
    for name in baseline_saga_10_C_names
}

In [35]:
# baseline_saga_10_C_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

Dataset has already been split, proceeding will overwrite split data.
`force_split`=True, Proceeding with new split.


In [36]:
# run_cv_experiments(baseline_saga_10_C_experiments, baseline_saga_10_C_experiment_configs, baseline_saga_10_C_names, baseline_saga_10_C_tags, save_models=True, log_results=True)

Running model: br_baseline_saga_10_C_1


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

br_baseline_saga_10_C_1 complete.
Running model: cc_baseline_saga_10_C_1


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

cc_baseline_saga_10_C_1 complete.
Running model: lp_baseline_saga_10_C_1


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

lp_baseline_saga_10_C_1 complete.
Running model: br_baseline_saga_10_C_2


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

br_baseline_saga_10_C_2 complete.
Running model: cc_baseline_saga_10_C_2


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

cc_baseline_saga_10_C_2 complete.
Running model: lp_baseline_saga_10_C_2


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

lp_baseline_saga_10_C_2 complete.
Running model: br_baseline_saga_10_C_3


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

br_baseline_saga_10_C_3 complete.
Running model: cc_baseline_saga_10_C_3


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

cc_baseline_saga_10_C_3 complete.
Running model: lp_baseline_saga_10_C_3


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

lp_baseline_saga_10_C_3 complete.
Running model: br_baseline_saga_10_C_4


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

br_baseline_saga_10_C_4 complete.
Running model: cc_baseline_saga_10_C_4


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

cc_baseline_saga_10_C_4 complete.
Running model: lp_baseline_saga_10_C_4


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

lp_baseline_saga_10_C_4 complete.
Running model: br_baseline_saga_10_C_5


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

br_baseline_saga_10_C_5 complete.
Running model: cc_baseline_saga_10_C_5


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

cc_baseline_saga_10_C_5 complete.
Running model: lp_baseline_saga_10_C_5


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

lp_baseline_saga_10_C_5 complete.


In [37]:
# save_experiments(baseline_saga_10_C_experiments, 'baseline_saga_10_C_experiments')

baseline_saga_10_C_experiments = load_experiments('baseline_saga_10_C_experiments')

In [38]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'br_baseline_saga_10_C'))

{'weighted_jaccard': 0.195, 'hamming_loss': 0.051, 'precision_micro_avg': 0.703, 'recall_micro_avg': 0.227, 'f1_micro_avg': 0.343, 'precision_macro_avg': 0.43, 'recall_macro_avg': 0.168, 'f1_macro_avg': 0.215, 'precision_weighted_avg': 0.552, 'recall_weighted_avg': 0.227, 'f1_weighted_avg': 0.288, 'precision_samples_avg': 0.282, 'recall_samples_avg': 0.266, 'f1_samples_avg': 0.265}


In [39]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'cc_baseline_saga_10_C'))

{'weighted_jaccard': 0.247, 'hamming_loss': 0.063, 'precision_micro_avg': 0.457, 'recall_micro_avg': 0.344, 'f1_micro_avg': 0.393, 'precision_macro_avg': 0.385, 'recall_macro_avg': 0.27, 'f1_macro_avg': 0.265, 'precision_weighted_avg': 0.506, 'recall_weighted_avg': 0.344, 'f1_weighted_avg': 0.364, 'precision_samples_avg': 0.444, 'recall_samples_avg': 0.385, 'f1_samples_avg': 0.399}


In [40]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'lp_baseline_saga_10_C'))

{'weighted_jaccard': 0.317, 'hamming_loss': 0.054, 'precision_micro_avg': 0.555, 'recall_micro_avg': 0.426, 'f1_micro_avg': 0.482, 'precision_macro_avg': 0.445, 'recall_macro_avg': 0.342, 'f1_macro_avg': 0.369, 'precision_weighted_avg': 0.523, 'recall_weighted_avg': 0.426, 'f1_weighted_avg': 0.457, 'precision_samples_avg': 0.56, 'recall_samples_avg': 0.48, 'f1_samples_avg': 0.502}


In [None]:
baseline_linearsvc_names = ['br_baseline_linearsvc', 'cc_baseline_linearsvc', 'lp_baseline_linearsvc']
baseline_linearsvc_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10'
    }
    for name in baseline_linearsvc_names
}

baseline_random_forest_names = ['br_baseline_random_forest', 'cc_baseline_random_forest', 'lp_baseline_random_forest']
baseline_random_forest_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
    }
    for name in baseline_random_forest_names
}

baseline_gradient_boost_names = ['br_baseline_gradient_boost', 'cc_baseline_gradient_boost', 'lp_baseline_gradient_boost']
baseline_gradient_boost_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10',
    }
    for name in baseline_gradient_boost_names
}

In [None]:
baseline_linearsvc_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
baseline_random_forest_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)
baseline_gradient_boost_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

In [None]:
run_cv_experiments(baseline_linearsvc_experiments, baseline_linearsvc_experiment_configs, baseline_linearsvc_names, baseline_linearsvc_tags, save_models=True, log_results=True)

In [None]:
run_cv_experiments(baseline_random_forest_experiments, baseline_random_forest_experiment_configs, baseline_saga_10_C_names, baseline_random_forest_tags, save_models=True, log_results=True)

In [None]:
run_cv_experiments(baseline_gradient_boost_experiments, baseline_gradient_boost_experiment_configs, baseline_gradient_boost_names, baseline_gradient_boost_tags, save_models=True, log_results=True)

In [None]:
save_experiments(baseline_linearsvc_experiments, 'baseline_linearsvc_experiments')
save_experiments(baseline_random_forest_experiments, 'baseline_random_forest_experiments')
save_experiments(baseline_gradient_boost_experiments, 'baseline_gradient_boost_experiments')

# baseline_linearsvc_experiments = load_experiments('baseline_linearsvc_experiments')
# baseline_random_forest_experiments = load_experiments('baseline_random_forest_experiments')
# baseline_gradient_boost_experiments = load_experiments('baseline_gradient_boost_experiments')

In [None]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'br_baseline_linearsvc'))

In [None]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'cc_baseline_linearsvc'))

In [None]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'lp_baseline_linearsvc'))

In [None]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'br_baseline_random_forest'))

In [None]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'cc_baseline_random_forest'))

In [None]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'lp_baseline_random_forest'))

In [None]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'br_baseline_gradient_boost'))

In [None]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'cc_baseline_gradient_boost'))

In [None]:
print(average_cv_metrics(baseline_saga_10_C_experiments, 'lp_baseline_gradient_boost'))