In [1]:
import json
import os
import pandas as pd

In [2]:
def get_tag(architecture, method):
    return f"{architecture}_{method}"

def load_results(base_folder, architecture, method):
    tag = get_tag(architecture, method)
    experiment_dicts = []

    for experiment_id in sorted(os.listdir(os.path.join(base_folder, tag))):
        if experiment_id.startswith("_"):
            continue


        config_filename = os.path.join(base_folder, tag, experiment_id, "config.json")
        metrics_filename = os.path.join(base_folder, tag, experiment_id, "metrics.json")
        with open(config_filename) as fin:
            config = json.load(fin)
        with open(metrics_filename) as fin:
            metrics = json.load(fin)
        
        if not metrics: 
            print(f"Couldn't load experiment {experiment_id}")
            continue

        model_params = config["model_params"]
        training_params = config["training_params"] if "training_params" in config else {}
        metrics = {
            "train_rel_l2": metrics["eval_train_metric"]["values"][0],
            "val_rel_l2": metrics["eval_val_metric"]["values"][0],
            "test_rel_l2": metrics["eval_test_metric"]["values"][0],
            "time_spent": metrics["training_time"]["values"][0]
        }
        experiment_dicts.append({"id": experiment_id, **model_params,
                                **training_params, **metrics, "seed": config["seed"]})

    df = pd.DataFrame.from_dict(experiment_dicts)
    return df

def get_grid_columns(architecture, method):
    adam_columns = ["learning_rate", "n_layers", "batch_size"]
    swim_columns = ["regularization_scale", "n_layers"]
    fno_columns  = ["n_hidden_channels", "n_modes"]
    fcn_columns = ["layer_width"]
    deeponet_columns = in_fourier_columns = ["layer_width", "n_modes"]

    columns_dict = {
        "adam": adam_columns,
        "swim": swim_columns,
        "fno": fno_columns,
        "fcn": fcn_columns,
        "deeponet": deeponet_columns,
        "in_fourier": in_fourier_columns
    }

    columns = columns_dict[architecture] + columns_dict[method]
    if architecture == "fno" and method == "swim":
        columns += ["layer_width"]
    return columns  

def aggregate_results(df, grid_columns, metric_columns):
    aggregated = df.groupby(grid_columns)[metric_columns].mean()
    aggregated = aggregated.reset_index()
    return aggregated

def print_grid(df, grid_columns):
    for column in grid_columns:
        counts = df.groupby(column).size()
        proper = len(counts.unique()) == 1
        if proper:
            print(f"{column}: proper grid for {df[column].unique()}")
        else:
            print(f"{column}: something is wrong")
            print(counts)

In [3]:
base_folder = "../experiments"
metric_columns = ["train_rel_l2", "val_rel_l2", "test_rel_l2", "time_spent"]
verbose = True

architectures = ["fcn", "in_fourier", "deeponet", "fno"]
methods = ["adam", "swim"]

best_df = []

for architecture in architectures:
    for method in methods:
        df = load_results(base_folder, architecture, method)
        df = df.fillna("n/a")
        grid_columns = get_grid_columns(architecture, method)
        df.drop(labels="id", axis=1, inplace=True)
        df.reset_index(drop=True, inplace=True)
        aggregated = aggregate_results(df, grid_columns, metric_columns)
        aggregated["architecture"] = architecture
        aggregated["method"] = method

        print(f"{architecture=}, {method=}")
        print(f"Total: {len(df)} experiments.")
        print(f"Total aggregated: {len(aggregated)} experiments.")
        print(f"Total time: {df['time_spent'].sum() / 3600:.2f} hours.")
        df.to_csv(f"{get_tag(architecture, method)}_full.csv")
        aggregated.to_csv(f"{get_tag(architecture, method)}_mean.csv")

        best_aggregated = aggregated.loc[aggregated["val_rel_l2"].idxmin()]
        best_df.append(best_aggregated)

        if verbose:
            print_grid(df, grid_columns)
            print()
            print(best_aggregated)
            mask = df[grid_columns] == best_aggregated[grid_columns]
            mask = mask.all(axis=1)
            best_idxs = df[mask].index
            print()
            display(df.loc[best_idxs])
        print()

best_df = pd.concat(best_df, axis=1).T

architecture='fcn', method='adam'
Total: 180 experiments.
Total aggregated: 60 experiments.
Total time: 50.11 hours.
layer_width: proper grid for [4098  512 1024  256 2048]
learning_rate: proper grid for [5.e-04 5.e-05 1.e-04 1.e-05]
n_layers: proper grid for [1 4 2]
batch_size: proper grid for [64]

layer_width            1024
learning_rate       0.00005
n_layers                  2
batch_size               64
train_rel_l2       0.004398
val_rel_l2         0.004516
test_rel_l2         0.00448
time_spent       643.535432
architecture            fcn
method                 adam
Name: 28, dtype: object



Unnamed: 0,architecture,input_dim,layer_width,n_layers,batch_size,learning_rate,n_epochs,patience,print_every,weight_decay,train_rel_l2,val_rel_l2,test_rel_l2,time_spent,seed
5,fcn,256,1024,2,64,5e-05,2000,100,1,0.0,0.004687,0.004801,0.004764,557.902243,537118242
50,fcn,256,1024,2,64,5e-05,2000,100,1,0.0,0.00371,0.003847,0.003822,801.863134,460881177
86,fcn,256,1024,2,64,5e-05,2000,100,1,0.0,0.004797,0.0049,0.004853,570.840918,464913237



architecture='fcn', method='swim'
Total: 135 experiments.
Total aggregated: 45 experiments.
Total time: 0.23 hours.
layer_width: proper grid for [1024 2048 4096  256  512]
regularization_scale: proper grid for [1.e-10 1.e-06 1.e-08]
n_layers: proper grid for [2 1 4]

layer_width                  4096
regularization_scale          0.0
n_layers                        1
train_rel_l2             0.000273
val_rel_l2               0.000888
test_rel_l2              0.000915
time_spent              20.015505
architecture                  fcn
method                       swim
Name: 36, dtype: object



Unnamed: 0,architecture,input_dim,layer_width,n_layers,regularization_scale,train_rel_l2,val_rel_l2,test_rel_l2,time_spent,seed
2,fcn_swim,256,4096,1,1e-10,0.000276,0.000886,0.000917,17.328067,69874027
12,fcn_swim,256,4096,1,1e-10,0.000272,0.000886,0.000907,19.61193,481839053
21,fcn_swim,256,4096,1,1e-10,0.000272,0.000891,0.000922,23.106519,771787435



architecture='in_fourier', method='adam'
Total: 540 experiments.
Total aggregated: 180 experiments.
Total time: 399.15 hours.
layer_width: proper grid for [ 512 2048  256 4098 1024]
n_modes: proper grid for [ 8 32 16]
learning_rate: proper grid for [1.e-05 1.e-04 5.e-04 5.e-05]
n_layers: proper grid for [2 4 1]
batch_size: proper grid for [64]

layer_width             1024
n_modes                   16
learning_rate         0.0001
n_layers                   1
batch_size                64
train_rel_l2        0.002968
val_rel_l2          0.003325
test_rel_l2         0.003297
time_spent       1725.116813
architecture      in_fourier
method                  adam
Name: 90, dtype: object



Unnamed: 0,architecture,input_dim,layer_width,n_layers,n_modes,batch_size,learning_rate,n_epochs,patience,print_every,weight_decay,train_rel_l2,val_rel_l2,test_rel_l2,time_spent,seed
164,in_fourier,256,1024,1,16,64,0.0001,5000,200,1,0.0,0.003231,0.003608,0.003566,1405.434342,912849683
265,in_fourier,256,1024,1,16,64,0.0001,5000,200,1,0.0,0.002932,0.00328,0.003255,1593.957174,256844516
284,in_fourier,256,1024,1,16,64,0.0001,5000,200,1,0.0,0.002742,0.003087,0.00307,2175.958924,770301090



architecture='in_fourier', method='swim'
Total: 405 experiments.
Total aggregated: 135 experiments.
Total time: 0.54 hours.
layer_width: proper grid for [ 256  512 4096 2048 1024]
n_modes: proper grid for [32 16  8]
regularization_scale: proper grid for [1.e-10 1.e-06 1.e-08]
n_layers: proper grid for [1 4 2]

layer_width                   4096
n_modes                         16
regularization_scale           0.0
n_layers                         1
train_rel_l2              0.000447
val_rel_l2                0.001066
test_rel_l2               0.001088
time_spent               15.611909
architecture            in_fourier
method                        swim
Name: 117, dtype: object



Unnamed: 0,architecture,input_dim,layer_width,n_layers,n_modes,regularization_scale,train_rel_l2,val_rel_l2,test_rel_l2,time_spent,seed
302,in_fourier_swim,256,4096,1,16,1e-10,0.000447,0.001061,0.001085,15.379822,879312636
349,in_fourier_swim,256,4096,1,16,1e-10,0.000451,0.001086,0.001106,16.366146,824059326
383,in_fourier_swim,256,4096,1,16,1e-10,0.000443,0.00105,0.001075,15.08976,149249688



architecture='deeponet', method='adam'
Total: 540 experiments.
Total aggregated: 180 experiments.
Total time: 144.72 hours.
layer_width: proper grid for [2048 4098 1024  512  256]
n_modes: proper grid for [ 8 16 32]
learning_rate: proper grid for [5.e-05 5.e-04 1.e-04 1.e-05]
n_layers: proper grid for [4 2 1]
batch_size: proper grid for ['n/a']

layer_width             2048
n_modes                   32
learning_rate        0.00001
n_layers                   4
batch_size               n/a
train_rel_l2         0.00136
val_rel_l2          0.001639
test_rel_l2          0.00162
time_spent       4217.436366
architecture        deeponet
method                  adam
Name: 134, dtype: object



Unnamed: 0,architecture,input_dim,layer_width,n_layers,n_modes,batch_size,learning_rate,n_epochs,patience,weight_decay,train_rel_l2,val_rel_l2,test_rel_l2,time_spent,seed
140,deeponet,256,2048,4,32,,1e-05,90000,4500,0,0.001366,0.001648,0.001624,4254.221551,33050049
155,deeponet,256,2048,4,32,,1e-05,90000,4500,0,0.001369,0.001642,0.001633,4103.091957,804004940
196,deeponet,256,2048,4,32,,1e-05,90000,4500,0,0.001346,0.001628,0.001604,4294.99559,535726003



architecture='deeponet', method='swim'
Total: 405 experiments.
Total aggregated: 135 experiments.
Total time: 1.22 hours.
layer_width: proper grid for [ 256  512 4096 2048 1024]
n_modes: proper grid for [32  8 16]
regularization_scale: proper grid for [1.e-10 1.e-06 1.e-08]
n_layers: proper grid for [1 2 4]

layer_width                  4096
n_modes                        16
regularization_scale          0.0
n_layers                        1
train_rel_l2             0.000535
val_rel_l2               0.000834
test_rel_l2              0.000852
time_spent              20.613144
architecture             deeponet
method                       swim
Name: 117, dtype: object



Unnamed: 0,architecture,input_dim,layer_width,n_layers,n_modes,regularization_scale,train_rel_l2,val_rel_l2,test_rel_l2,time_spent,seed
5,deeponet_swim,256,4096,1,16,1e-10,0.000534,0.000828,0.000851,20.205654,429678827
34,deeponet_swim,256,4096,1,16,1e-10,0.000536,0.000841,0.000852,20.502618,987678834
63,deeponet_swim,256,4096,1,16,1e-10,0.000535,0.000833,0.000853,21.131159,898112117



architecture='fno', method='adam'
Total: 216 experiments.
Total aggregated: 72 experiments.
Total time: 114.10 hours.
n_hidden_channels: proper grid for [32 16]
n_modes: proper grid for [ 8 32 16]
learning_rate: proper grid for [0.0001 0.005  0.0005 0.001 ]
n_layers: proper grid for [1 2 4]
batch_size: proper grid for [64]

n_hidden_channels             16
n_modes                       32
learning_rate              0.005
n_layers                       4
batch_size                    64
train_rel_l2            0.000361
val_rel_l2              0.000379
test_rel_l2             0.000378
time_spent           3119.290006
architecture                 fno
method                      adam
Name: 35, dtype: object



Unnamed: 0,architecture,n_hidden_channels,n_layers,n_modes,batch_size,learning_rate,n_epochs,patience,weight_decay,train_rel_l2,val_rel_l2,test_rel_l2,time_spent,seed
20,fno,16,4,32,64,0.005,2000,100,0.0,0.000348,0.000364,0.00036,3110.757023,168706123
21,fno,16,4,32,64,0.005,2000,100,0.0,0.000361,0.000381,0.000379,3494.247692,246876690
31,fno,16,4,32,64,0.005,2000,100,0.0,0.000375,0.000391,0.000394,2752.865303,660343239



architecture='fno', method='swim'
Total: 810 experiments.
Total aggregated: 270 experiments.
Total time: 51.37 hours.
n_hidden_channels: proper grid for [16 32]
n_modes: proper grid for [16  8 32]
regularization_scale: proper grid for [1.e-06 1.e-10 1.e-08]
n_layers: proper grid for [4 1 2]
layer_width: proper grid for [ 512 4096  256 2048 1024]

n_hidden_channels               32
n_modes                          8
regularization_scale           0.0
n_layers                         1
layer_width                   4096
train_rel_l2              0.000275
val_rel_l2                0.000898
test_rel_l2               0.000935
time_spent              387.424962
architecture                   fno
method                        swim
Name: 139, dtype: object



Unnamed: 0,architecture,layer_width,n_hidden_channels,n_layers,n_modes,regularization_scale,train_rel_l2,val_rel_l2,test_rel_l2,time_spent,seed
4,fno_swim,4096,32,1,8,1e-10,0.000276,0.00091,0.000944,362.902512,997135158
63,fno_swim,4096,32,1,8,1e-10,0.000274,0.000889,0.000919,348.563952,156838376
124,fno_swim,4096,32,1,8,1e-10,0.000275,0.000894,0.000943,450.808422,407566386





In [4]:
best_df.query("method=='adam'")

Unnamed: 0,layer_width,learning_rate,n_layers,batch_size,train_rel_l2,val_rel_l2,test_rel_l2,time_spent,architecture,method,regularization_scale,n_modes,n_hidden_channels
28,1024.0,5e-05,2,64.0,0.004398,0.004516,0.00448,643.535432,fcn,adam,,,
90,1024.0,0.0001,1,64.0,0.002968,0.003325,0.003297,1725.116813,in_fourier,adam,,16.0,
134,2048.0,1e-05,4,,0.00136,0.001639,0.00162,4217.436366,deeponet,adam,,32.0,
35,,0.005,4,64.0,0.000361,0.000379,0.000378,3119.290006,fno,adam,,32.0,16.0


In [5]:
best_df.query("method=='swim'")

Unnamed: 0,layer_width,learning_rate,n_layers,batch_size,train_rel_l2,val_rel_l2,test_rel_l2,time_spent,architecture,method,regularization_scale,n_modes,n_hidden_channels
36,4096,,1,,0.000273,0.000888,0.000915,20.015505,fcn,swim,0.0,,
117,4096,,1,,0.000447,0.001066,0.001088,15.611909,in_fourier,swim,0.0,16.0,
117,4096,,1,,0.000535,0.000834,0.000852,20.613144,deeponet,swim,0.0,16.0,
139,4096,,1,,0.000275,0.000898,0.000935,387.424962,fno,swim,0.0,8.0,32.0
