# Multi-fidelity Modelling Analysis

In [25]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from ast import literal_eval
from pandas.plotting import parallel_coordinates

In [26]:
df = pd.read_hdf("../exp_data/multifidelity_experiments.h5", index=True)
df = df.reset_index(drop=False)
df = df.map(lambda x: literal_eval(str(x)))
df_mf = df[['index', 'mean_mae', 'mean_mse', 'mean_rmse', 'std_mae', 'std_mse', 'std_rmse']]
from pandas.plotting import parallel_coordinates

In [27]:
df_mf

Unnamed: 0,index,mean_mae,mean_mse,mean_rmse,std_mae,std_mse,std_rmse
0,"(0.1, 5, 50)",2.4725,7.9557,2.7974,0.3275,1.9512,0.3613
1,"(0.1, 5, 150)",3.4598,14.9036,3.8547,0.2213,1.5874,0.2125
2,"(0.1, 5, 200)",3.3547,14.2694,3.7753,0.1207,0.9643,0.1277
3,"(0.1, 5, 400)",3.4083,14.5192,3.8065,0.1760,1.3064,0.1721
4,"(0.1, 10, 50)",2.3819,7.3030,2.7003,0.0973,0.5794,0.1062
...,...,...,...,...,...,...,...
75,"(1.5, 50, 400)",3.4952,15.3384,3.9153,0.1027,0.7373,0.0943
76,"(1.5, 100, 50)",3.1307,12.5598,3.5421,0.0953,0.8132,0.1141
77,"(1.5, 100, 150)",3.4293,14.8624,3.8540,0.0836,0.7336,0.0959
78,"(1.5, 100, 200)",3.4421,15.0227,3.8757,0.0566,0.2762,0.0359


In [28]:
df_mf = df_mf.assign(**dict(zip(['noise', r'% high fidelity', 'num low fidelity'], zip(*df_mf["index"]))))

In [29]:
df_mf = df_mf[['noise', r'% high fidelity', 'num low fidelity','mean_mse', 'std_mse', 'mean_rmse', 'std_rmse', 'mean_mae', 'std_mae']]

In [30]:
df_mf.describe()

Unnamed: 0,noise,% high fidelity,num low fidelity,mean_mse,std_mse,mean_rmse,std_rmse,mean_mae,std_mae
count,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0,80.0
mean,0.775,37.0,200.0,13.381085,0.867302,3.632048,0.12294,3.22969,0.117269
std,0.529509,35.378603,128.279758,2.74241,0.639888,0.409715,0.09249,0.382794,0.087112
min,0.1,5.0,50.0,6.8139,0.1908,2.59,0.0249,2.2665,0.0132
25%,0.4,10.0,125.0,13.3359,0.425475,3.650175,0.0569,3.2585,0.05495
50%,0.75,20.0,175.0,14.58855,0.73545,3.8184,0.09805,3.39485,0.0954
75%,1.125,50.0,250.0,15.071625,1.1113,3.881075,0.1676,3.4637,0.15785
max,1.5,100.0,400.0,16.3569,3.3918,4.0397,0.4416,3.6401,0.427


In [31]:
def shorten_and_round_list(x, metric):
    return f"{np.round(x.loc[f'mean_{metric}'],2)}({np.round(x.loc[f'std_{metric}'], 2)})"

In [32]:
df_final = df_mf[['noise', r'% high fidelity', 'num low fidelity']]
df_final["MAE"] = df_mf.apply(lambda x: shorten_and_round_list(x, "mae"), axis=1)
df_final["MSE"] = df_mf.apply(lambda x: shorten_and_round_list(x, "mse"), axis=1)
df_final["RMSE"] = df_mf.apply(lambda x: shorten_and_round_list(x, "rmse"), axis=1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [33]:
s = df_final.style.format(precision=1).highlight_min(axis=0, subset=["MAE", "MSE", "RMSE"], props='bfseries: ;').hide(subset=None, level=None, names=False)

In [34]:
with open("../tables/multi_fidelity_results.tex", "w+") as f:
    f.write(s.to_latex(siunitx=True, environment='longtable', hrules=True))

In [41]:
import plotly.express as px
pd.DataFrame.iteritems = pd.DataFrame.items

fig = px.parallel_coordinates(
    df_mf,
    # color=df_mf.index,
    color='mean_mae', 
    dimensions=['noise', r'% high fidelity', 'num low fidelity', 'mean_mae'],                              
    color_continuous_scale=px.colors.diverging.Tealrose, 
    color_continuous_midpoint=2, 
    template='seaborn', 
    range_color=[df_mf.mean_mae.min(), df_mf.mean_mae.max()]
)

fig.write_image("../figs/mf_mean_mae.pdf", format="pdf")
fig.write_image("../figs/mf_mean_mee.png", format="png")
fig.show()

In [44]:
import plotly.express as px
pd.DataFrame.iteritems = pd.DataFrame.items

fig = px.parallel_coordinates(
    df_mf,
    # color=df_mf.index,
    color='mean_mse', 
    dimensions=['noise', r'% high fidelity', 'num low fidelity', 'mean_mse'],                              
    color_continuous_scale=px.colors.diverging.Tealrose, 
    color_continuous_midpoint=2, 
    template='seaborn', 
    range_color=[df_mf.mean_mse.min(), df_mf.mean_mse.max()]
)

fig.write_image("../figs/mf_mean_mse.pdf", format="pdf")
fig.write_image("../figs/mf_mean_mse.png", format="png")
fig.show()

In [45]:
import plotly.express as px
pd.DataFrame.iteritems = pd.DataFrame.items

fig = px.parallel_coordinates(
    df_mf,
    # color=df_mf.index,
    color='mean_rmse', 
    dimensions=['noise', r'% high fidelity', 'num low fidelity', 'mean_rmse'],                              
    color_continuous_scale=px.colors.diverging.Tealrose, 
    color_continuous_midpoint=2, 
    template='seaborn', 
    range_color=[df.mean_rmse.min(), df_mf.mean_rmse.max()]
)

fig.write_image("../figs/mf_mean_rmse.pdf", format="pdf")
fig.write_image("../figs/mf_mean_rmse.png", format="png")
fig.show()