In [None]:
import os
import csv
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
bootstraps = [str(i) for i in range(0,10)]

# plots AUCs +/- STDEV for each model
# Inputs: model names as a list, Pandas DataFrame of exported results CSV from Google Spreadsheets

def plot_performance(model_names: list, results_df: pd.DataFrame):
    with open(results_fpath, 'r') as f:
        results = list(csv.reader(f))
    f.close()
    
    stats = {model: {'aucs': [], 'mean': 0, 'std': 0} for model in model_names}

    for model in model_names:
        stats[model]['aucs'] = list(float(auc) for auc in results_df.loc[bootstraps, model].values)
        stats[model]['mean'] = float(results_df.loc['mean', model])
        stats[model]['std'] = float(results_df.loc['std', model])
        
    means = [stats[model]['mean'] for model in model_names]
    stds = [stats[model]['std'] for model in model_names]
    
    plt.rcParams.update({'font.size': 20})
    fig_width = len(model_names) * 3
    fig, ax = plt.subplots(figsize=(fig_width,8))
    
    x_pos = np.arange(len(model_names))
    bar = sns.barplot(x=x_pos, y=means, yerr=stds)
    ax.set_ylabel('AUC', fontsize=20)
    ax.set_ylim([0.4, 0.9])
    ax.set_xticks(range(len(model_names)))
    ax.set_xticklabels(model_names, fontsize=20)
    ax.set_title('Model Performance')
    ax.yaxis.grid(True)

    offset = -0.02 * len(model_names)
    for i, v in enumerate(means):
        plt.text(x_pos[i]+offset, 0.41, f'{v:0.3f}')

    plt.xticks(rotation=90)
    sns.set_style("whitegrid")
    sns.set_context("poster")
    plt.tight_layout()
    plt.show()
  
results_fpath = os.path.expanduser("~/Dropbox/sts_ecg/results/STS ECG models and results - architecture.csv")
results_df = pd.read_csv(results_fpath, index_col=0)
plot_performance(['metadata-intervals-age-sex','metadata-sex','metadata-age','logistic regression on metadata'], results_df)