## Set up

In [1]:
from os import listdir
import pandas as pd

## Combine performance from multiple models

In [2]:
# get relevant files listed in performance directory
performance_directory = '../data/derived/performance'
performance_files = listdir(performance_directory)
kmeans_files = [file for file in performance_files if 'kmeans' in file]

# remove summary from file list if it already exists
if 'kmeans_summary.csv' in kmeans_files:
    kmeans_files.remove('kmeans_summary.csv')

# initialize dictionary of empty lists to track metrics across models
summary_dict = {'vector_name':[],
                'hyperparameter_name':[],
                'hyperparameter_value':[],
                'metric_name':[],
                'metric_value':[]}

# iterate over files
for file in kmeans_files:
    
    # get vector and hyperparameter names from file name
    _, vector_string, hyperparameter_string = file.split('_')

    # get vector name from vector string
    vector_name = vector_string[len('vector'):]

    # get hyperparameter value from hyperparameter string
    hyperparameter_name = 'clusters'
    hyperparameter_value = hyperparameter_string[len(hyperparameter_name):-(len('.csv'))]

    # get metrics dataframe
    metrics_df = pd.read_csv(performance_directory + '/' + file)

    # iterate over metrics
    for metric_name in metrics_df['metric']:

        # get metric value
        metric_value = metrics_df[metrics_df['metric'] == metric_name]['value'].values[0]

        # save information to summary dictionary
        summary_dict['vector_name'].append(vector_name)
        summary_dict['hyperparameter_name'].append(hyperparameter_name)
        summary_dict['hyperparameter_value'].append(hyperparameter_value)
        summary_dict['metric_name'].append(metric_name)
        summary_dict['metric_value'].append(metric_value)

# create dataframe from summary dictionary
summary_df = pd.DataFrame(summary_dict)

# write summary to CSV
filepath_out = performance_directory + '/kmeans_summary.csv'
summary_df.to_csv(filepath_out, index=False)