# Model Exploration

In [1]:
import os
import pickle
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
from dotenv import load_dotenv
from sklearn.preprocessing import StandardScaler

from evaluation.experiment import Experiment
from integrations.databricks.auth import setup_mlflow
from models.config import baseline_experiment_configs
from models.setup import experiment_setup
from utils.dataset import Dataset

load_dotenv()

setup_mlflow()
mlflow_path = os.getenv('MLFLOW_MODEL_EXP')
warnings.simplefilter(action='ignore', category=UserWarning)

SEED = 42

In [2]:
DATA_PATH = Path(os.getenv('PROJECT_ROOT'), '.') / 'data'
TRACK_DATASET_FILE = DATA_PATH / 'track_dataset_nk.pkl'

with open(TRACK_DATASET_FILE, 'rb') as file:
    track_dataset = pickle.load(file)

In [3]:
def scale_numeric_data(train_array: np.ndarray, test_array: np.ndarray, feature_names: list, scaled_feature_names: list = None) -> tuple:
    
    scaler = StandardScaler()
    train_df = pd.DataFrame(train_array, columns=feature_names)
    test_df = pd.DataFrame(test_array, columns=feature_names)
    
    if scaled_feature_names is None:
        scaled_feature_names = ['duration_ms', 'tempo', 'loudness']
    
    train_df[scaled_feature_names] = scaler.fit_transform(train_df[scaled_feature_names])
    test_df[scaled_feature_names] = scaler.transform(test_df[scaled_feature_names])
    
    return train_df.to_numpy(), test_df.to_numpy()

In [4]:
def build_cv_experiments(n_splits: int, dataset: Dataset, random_state=SEED):
    train_data, train_labels, _, test_data, test_labels, _ = dataset.split_data(cross_val=True, n_splits=n_splits, iterative=True, random_state=random_state, force_split=True)
    
    fold_experiments= []
    for i in range(n_splits):
        train_data[i], test_data[i] = scale_numeric_data(train_data[i], test_data[i], dataset.feature_names)    
        fold_characteristics = dataset.get_dataset_characteristics(i)
        fold_experiment = Experiment(train_data[i], train_labels[i], test_data[i], test_labels[i], fold_characteristics)
        fold_experiments.append(fold_experiment)
        
    return fold_experiments

In [5]:
def run_cv_experiments(experiments, experiment_configs, model_names, tags, save_models, log_results):
    for i, experiment in enumerate(experiments):
        fold_names = [model_name + f'_{i + 1}' for model_name in model_names]
        fold_tags = {model_name + f'_{i + 1}': tags[model_name] for model_name in model_names}
        for model_name in fold_tags:
            fold_tags[model_name]['Fold'] = f'{i+ 1} / {len(experiments)}'
        
        fold_models, fold_configs = experiment_setup(experiment_configs, fold_names)
        experiment.run_experiment(fold_models, fold_configs, mlflow_path, fold_tags, save_models=save_models, log_results=log_results)

In [6]:
baseline_names = ['br_baseline', 'cc_baseline', 'lp_baseline']
baseline_tags = {
    name: {
        'Comments': ['Baseline', 'Time signatures mapped to 4/4', 'No Keys'],
        'Label Threshold': '10'
    }
    for name in baseline_names
}

In [7]:
N_SPLITS = 5
baseline_experiments = build_cv_experiments(n_splits=N_SPLITS, dataset=track_dataset, random_state=SEED)

Dataset has already been split, proceeding will overwrite split data.
`force_split`=True, Proceeding with new split.


In [8]:
run_cv_experiments(baseline_experiments, baseline_experiment_configs, baseline_names, baseline_tags, save_models=True, log_results=True)

2024/03/29 15:03:47 INFO mlflow.tracking.fluent: Experiment with name '/Users/ishanchowdhur@gmail.com/model_exploration' does not exist. Creating a new experiment.


Running model: br_baseline_1


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

br_baseline_1 complete.
Running model: cc_baseline_1


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

cc_baseline_1 complete.
Running model: lp_baseline_1


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

lp_baseline_1 complete.
Running model: br_baseline_2


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

br_baseline_2 complete.
Running model: cc_baseline_2


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

cc_baseline_2 complete.
Running model: lp_baseline_2


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

lp_baseline_2 complete.
Running model: br_baseline_3


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

br_baseline_3 complete.
Running model: cc_baseline_3


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

cc_baseline_3 complete.
Running model: lp_baseline_3


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

lp_baseline_3 complete.
Running model: br_baseline_4


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

br_baseline_4 complete.
Running model: cc_baseline_4


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

cc_baseline_4 complete.
Running model: lp_baseline_4


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

lp_baseline_4 complete.
Running model: br_baseline_5


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

br_baseline_5 complete.
Running model: cc_baseline_5


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

cc_baseline_5 complete.
Running model: lp_baseline_5


Uploading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]

lp_baseline_5 complete.


In [24]:
def average_cv_metrics(experiments, model_name_root):
    sum_metrics = None
    for i, experiment in enumerate(experiments):
        fold_model_name = model_name_root + f'_{i + 1}'
        fold_metrics = experiment.results[fold_model_name].metrics
        if sum_metrics is None:
            sum_metrics = fold_metrics.copy()
        else:
            for key in sum_metrics.keys():
                sum_metrics[key] += fold_metrics[key]
    
    return {key: float(f'{value / len(experiments):.3f}') for key, value in sum_metrics.items()}

In [26]:
print(average_cv_metrics(baseline_experiments, 'br_baseline'))

{'weighted_jaccard': 0.177, 'hamming_loss': 0.052, 'precision_micro_avg': 0.717, 'recall_micro_avg': 0.203, 'f1_micro_avg': 0.316, 'precision_macro_avg': 0.406, 'recall_macro_avg': 0.145, 'f1_macro_avg': 0.19, 'precision_weighted_avg': 0.527, 'recall_weighted_avg': 0.203, 'f1_weighted_avg': 0.263, 'precision_samples_avg': 0.255, 'recall_samples_avg': 0.24, 'f1_samples_avg': 0.24}


In [27]:
print(average_cv_metrics(baseline_experiments, 'cc_baseline'))

{'weighted_jaccard': 0.234, 'hamming_loss': 0.056, 'precision_micro_avg': 0.553, 'recall_micro_avg': 0.312, 'f1_micro_avg': 0.399, 'precision_macro_avg': 0.406, 'recall_macro_avg': 0.22, 'f1_macro_avg': 0.25, 'precision_weighted_avg': 0.512, 'recall_weighted_avg': 0.312, 'f1_weighted_avg': 0.344, 'precision_samples_avg': 0.407, 'recall_samples_avg': 0.35, 'f1_samples_avg': 0.364}


In [25]:
print(average_cv_metrics(baseline_experiments, 'lp_baseline'))

{'weighted_jaccard': 0.303, 'hamming_loss': 0.055, 'precision_micro_avg': 0.548, 'recall_micro_avg': 0.412, 'f1_micro_avg': 0.47, 'precision_macro_avg': 0.433, 'recall_macro_avg': 0.316, 'f1_macro_avg': 0.34, 'precision_weighted_avg': 0.509, 'recall_weighted_avg': 0.412, 'f1_weighted_avg': 0.439, 'precision_samples_avg': 0.551, 'recall_samples_avg': 0.466, 'f1_samples_avg': 0.49}
