In [1]:
import pandas as pd
from glob import glob
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
files = glob('*.csv')

model_performance = []
for f in files:
    
    if 'sub' in f:
        model = 'Substitution'
    else:
        model = 'Tree'
    if 'fulltree' in f:
        resolution = 'Full tree'
    else:
        resolution = 'Interserotype'
    if 'nonorm' in f:
        normalization = 'No'
    else:
        normalization = 'Yes'
    
    mean_performance = pd.read_csv(f).mean()
    rmse = mean_performance['rms_error']
    r_squared = mean_performance['r_squared']
    
    model_performance.append({
        'Model': model,
        'Resolution': resolution,
        r'$v_a$ and $p_b$': normalization,
        'RMSE': rmse,
        r'Pearson R^2': r_squared
    })

In [3]:
model_performance = pd.DataFrame(model_performance)
model_performance = model_performance.reindex(columns = ['Model', 'Resolution', r'$v_a$ and $p_b$', 'RMSE', r'Pearson R^2'])
model_performance = model_performance.sort_values(['Model', 'Resolution', r'$v_a$ and $p_b$'])
model_performance = model_performance.round(2)
table = model_performance.to_latex()

In [4]:
open('./model_performance_summary.txt', 'w').write(table)