In [30]:
import pandas as pd
import numpy as onp

# reading csv file 
df = pd.read_csv("wandb_export_monotonic_actuator_hyperparameter_ablations.csv")
df

Unnamed: 0,Name,test_mse.mean,State,Created,Runtime,latent_encoder_depth,latent_encoder_width,poly_degree,pre_processor_layers,ring_emb_dim,...,test_68_ci.mean,test_95_ci.mean,test_99_ci.mean,test_median.mean,train_68_ci.mean,train_95_ci.mean,train_99_ci.mean,train_median.mean,train_mse.mean,test_rmse_in_newtons
0,fine-dragon-50,1.016934,finished,2025-02-07T00:02:19.000Z,227,2,8,1,,24,...,0.906005,2.115124,2.618660,0.590171,0.454300,1.330037,2.271893,0.305540,0.626296,4.485503
1,classic-lion-107,1.004674,finished,2025-02-07T02:20:08.000Z,253,3,16,1,,3,...,0.950567,2.018149,2.421202,0.587077,0.150320,0.481912,0.736487,0.097764,0.222715,4.458382
2,fiery-plant-110,0.992575,finished,2025-02-07T02:26:19.000Z,363,3,64,1,"[8,8]",3,...,0.909015,2.054288,2.494100,0.561363,0.421461,1.093472,1.754753,0.263747,0.520880,4.431455
3,visionary-tree-135,0.857359,finished,2025-02-07T03:35:09.000Z,266,4,16,1,,24,...,0.777185,1.756586,2.204646,0.504042,0.284978,0.817822,1.417046,0.178334,0.389050,4.118566
4,zesty-voice-142,0.925195,finished,2025-02-07T03:52:41.000Z,418,4,64,1,"[8,8]",24,...,0.860013,1.939736,2.363811,0.514379,0.418468,1.068150,1.716396,0.266918,0.512325,4.278401
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427,bumbling-eon-394,1.009468,finished,2025-02-07T14:37:21.000Z,271,4,8,3,,3,...,0.897983,2.092115,2.629394,0.581837,0.158865,0.451824,0.708596,0.101968,0.219328,4.469007
428,youthful-dust-381,1.119915,finished,2025-02-07T14:01:46.000Z,308,3,16,3,"[16,5]",12,...,1.033730,2.274918,2.886618,0.652922,0.138805,0.412820,0.651022,0.088732,0.198965,4.707142
429,dandy-mountain-335,1.218728,finished,2025-02-07T12:08:45.000Z,242,1,32,3,,12,...,1.137662,2.489352,3.096091,0.721056,0.492664,1.084435,2.020043,0.306977,0.579154,4.910415
430,legendary-butterfly-329,1.032172,finished,2025-02-07T11:54:51.000Z,238,1,32,3,,3,...,1.043256,2.047380,2.482769,0.652623,0.418600,1.029734,2.206488,0.269773,0.552347,4.518985


In [3]:
# for each hyperparameter combination, the following metrics are stored
df.keys()

Index(['Name', 'test_mse.mean', 'State', 'Created', 'Runtime',
       'latent_encoder_depth', 'latent_encoder_width', 'poly_degree',
       'pre_processor_layers', 'ring_emb_dim', 'num_ensemble', 'num_steps',
       'test_68_ci.mean', 'test_95_ci.mean', 'test_99_ci.mean',
       'test_median.mean', 'train_68_ci.mean', 'train_95_ci.mean',
       'train_99_ci.mean', 'train_median.mean', 'train_mse.mean',
       'test_rmse_in_newtons'],
      dtype='object')

The choices for hyperparameters are:
 * latent_encoder_depth : the depth (number of layers) of the base MLP used in the model.
    * Possible choices: 1, 2, 3 or 4.
 * latent_encoder_width : the width (number of neurons per layer) of the base MLP used in the model.
    * Possible choices: 8, 16, 32 or 64.
 * poly_degree : the degree of the polynomial relationship assumed between pressure and force.
    * Possible choices: 1, 2 or 3.
 * ring_emb_dim : the embeding dimension for ring parameters.
    * Possible choices: 3, 12 or 24.
 * pre_processor_layers : the layers used in the ring encoder network. If None, no network was used.
    * Possible choices: None, [8,8] or [16,5].

In total, this results in 4x4x3x3x3=432 possible combinations we've considered. In all cases, we train an ensemble of 32 networks initialized independently.

In [32]:
# helper function to print sun summaries
newtons_per_pound = 4.448222
def print_run_summary(run):
    print(f"Average test RMSE is {run['test_rmse_in_newtons'] : .2f}N.")
    print(f"Average train RMSE is {onp.sqrt(run['train_mse.mean'])*newtons_per_pound : .2f}N.")
    print(f"This network assumes force is represented as a polynomial of degree {run['poly_degree']} in pressure.")
    print(f"Base network has {run['latent_encoder_depth']} hidden layers of {run['latent_encoder_width']} neurons each.")
    if type(run['pre_processor_layers']) is not str:
        print(f"Ring encoder projects ring parameter to dimension {run['ring_emb_dim']} and then directly fed to base network.")
    else:
        print(f"Ring encoder projects ring parameter to dimension {run['ring_emb_dim']} and then passed through an MLP with layers {run['pre_processor_layers']} before being fed to base network.")


In [37]:
# Sorting hyperparameter combinations based on test RMSE (Root Mean Squared Error)
sorted_df = df.sort_values(by='test_rmse_in_newtons')

sorted_df

Unnamed: 0,Name,test_mse.mean,State,Created,Runtime,latent_encoder_depth,latent_encoder_width,poly_degree,pre_processor_layers,ring_emb_dim,...,test_68_ci.mean,test_95_ci.mean,test_99_ci.mean,test_median.mean,train_68_ci.mean,train_95_ci.mean,train_99_ci.mean,train_median.mean,train_mse.mean,test_rmse_in_newtons
94,spring-rain-82,0.819317,finished,2025-02-07T01:17:37.000Z,315,3,8,1,"[16,5]",24,...,0.790536,1.636451,2.086749,0.497220,0.184208,0.558271,0.897409,0.119551,0.262100,4.026158
336,cool-sound-441,0.823516,finished,2025-02-07T18:03:23.000Z,354,4,64,3,,12,...,0.727296,1.725247,2.183609,0.485298,0.116035,0.378755,0.634907,0.067907,0.181086,4.036462
92,electric-spaceship-161,0.828480,finished,2025-02-07T04:46:35.000Z,355,4,64,1,,12,...,0.738968,1.720348,2.121925,0.494977,0.198888,0.539792,0.932129,0.129528,0.262394,4.048608
142,vivid-hill-89,0.841595,finished,2025-02-07T01:33:17.000Z,270,3,32,1,,24,...,0.746018,1.708808,2.163088,0.498310,0.201069,0.531418,0.823867,0.128520,0.255608,4.080528
36,sage-universe-140,0.842341,finished,2025-02-07T03:47:44.000Z,296,4,32,1,,12,...,0.761374,1.721026,2.118988,0.525291,0.132179,0.483827,0.801092,0.082322,0.220350,4.082336
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,hearty-serenity-40,1.420541,finished,2025-02-06T23:40:54.000Z,248,1,16,1,"[8,8]",12,...,1.110673,3.110321,4.278088,0.738621,0.593838,2.537765,4.442648,0.369185,1.078154,5.301413
121,logical-forest-33,1.427621,finished,2025-02-06T23:25:54.000Z,220,1,64,1,,24,...,1.169463,3.152013,4.317991,0.739200,0.640523,2.587191,4.538185,0.390064,1.112992,5.314608
128,hearty-bush-31,1.479341,finished,2025-02-06T23:21:11.000Z,282,1,64,1,"[16,5]",24,...,1.340529,3.068322,4.096285,0.900703,0.485091,1.862692,3.514407,0.294247,0.848155,5.410022
70,genial-shape-15,1.499719,finished,2025-02-06T22:50:02.000Z,194,1,8,1,,24,...,1.165224,3.320995,4.453530,0.784931,0.753749,3.166394,5.360965,0.489414,1.327310,5.447154


In [39]:
# Looking at the best 10 hyperparameter combinations based on test RMSE (Root Mean Squared Error)
for i in range(10):
    print(f"Number {i+1} Combination (out of 432):")
    print_run_summary(sorted_df.iloc[i])
    print('--------------------------')

Number 1 Combination (out of 432):
Average test RMSE is  4.03N.
Average train RMSE is  2.28N.
This network assumes force is represented as a polynomial of degree 1 in pressure.
Base network has 3 hidden layers of 8 neurons each.
Ring encoder projects ring parameter to dimension 24 and then passed through an MLP with layers [16,5] before being fed to base network.
--------------------------
Number 2 Combination (out of 432):
Average test RMSE is  4.04N.
Average train RMSE is  1.89N.
This network assumes force is represented as a polynomial of degree 3 in pressure.
Base network has 4 hidden layers of 64 neurons each.
Ring encoder projects ring parameter to dimension 12 and then directly fed to base network.
--------------------------
Number 3 Combination (out of 432):
Average test RMSE is  4.05N.
Average train RMSE is  2.28N.
This network assumes force is represented as a polynomial of degree 1 in pressure.
Base network has 4 hidden layers of 64 neurons each.
Ring encoder projects ring p