In [1]:
import numpy as np
import pandas as pd

# Generic Functions

In [5]:
def get_full_mse_df(npy_file):
    file = np.load(npy_file, allow_pickle=True)
    df = pd.DataFrame(file.T)
    df["iters"] = df.index + 1
    df = df[["iters"] + [col for col in df.columns if col != "iters"]]
    return df

def get_summary_mse_df(full_df, n_reps=10):
    df = {}
    min_mse_list = []
    min_mse_iters_list = []
    final_mse_list = []
    final_mse_iters = []
    for i in range(n_reps):
        mse_list = full_df[i].to_list()
        min_mse = np.min(mse_list)
        min_mse_iters = np.argmin(mse_list)+1
        final_mse = mse_list[-1]
        min_mse_list.append(min_mse)
        min_mse_iters_list.append(min_mse_iters)
        final_mse_list.append(final_mse)
        final_mse_iters.append(len(mse_list))
    df = pd.DataFrame({"min_val_mse": min_mse_list, "min_mse_iters": min_mse_iters_list, 
                       "final_val_mse": final_mse_list, "final_mse_iters": final_mse_iters})
    return df

# 1. EXP

## 1.1 No noise

We obtain very good results across all seeds.

In [3]:
# Get the full validation MSE data frame
exp_full = get_full_mse_df("rand_reps_exp.npy")
exp_full

Unnamed: 0,iters,0,1,2,3,4,5,6,7,8,9
0,1,4.746557e-01,4.746606e-01,4.746538e-01,4.746460e-01,4.746497e-01,4.746439e-01,4.746561e-01,4.746522e-01,4.746467e-01,4.746547e-01
1,2,4.400572e-01,4.400290e-01,4.400440e-01,4.401067e-01,4.401218e-01,4.401127e-01,4.400697e-01,4.401096e-01,4.401856e-01,4.400700e-01
2,3,3.427650e-01,3.426855e-01,3.428038e-01,3.429307e-01,3.429035e-01,3.429601e-01,3.427519e-01,3.428769e-01,3.430994e-01,3.428497e-01
3,4,1.779996e-01,1.779640e-01,1.780993e-01,1.782105e-01,1.781251e-01,1.782709e-01,1.780350e-01,1.781716e-01,1.783883e-01,1.782191e-01
4,5,2.074710e-02,2.066731e-02,2.079214e-02,2.081237e-02,2.075213e-02,2.084445e-02,2.072191e-02,2.081529e-02,2.084613e-02,2.085380e-02
...,...,...,...,...,...,...,...,...,...,...,...
1995,1996,1.676114e-13,2.782214e-13,6.466612e-11,2.408794e-14,2.812768e-11,4.142476e-13,3.408198e-13,1.026583e-14,7.651774e-11,8.673304e-11
1996,1997,1.676114e-13,2.782214e-13,6.466612e-11,2.408794e-14,2.812768e-11,4.142476e-13,3.408198e-13,1.026583e-14,7.651774e-11,8.673304e-11
1997,1998,1.676114e-13,2.782214e-13,6.466612e-11,2.408794e-14,2.812768e-11,4.142476e-13,3.408198e-13,1.026583e-14,7.651774e-11,8.673304e-11
1998,1999,1.676114e-13,4.382175e-13,6.466612e-11,2.408794e-14,2.812768e-11,4.142476e-13,3.408198e-13,1.026583e-14,7.651774e-11,8.673304e-11


In [4]:
# Get a summary of the MSE across seeds
exp_summary = get_summary_mse_df(exp_full)
exp_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,1.004337e-13,649,1.676114e-13,2000
1,2.727069e-13,648,4.382175e-13,2000
2,5.830798e-11,731,6.466612e-11,2000
3,2.408794e-14,1063,2.408794e-14,2000
4,2.308548e-11,859,2.812768e-11,2000
5,2.334651e-13,736,4.142476e-13,2000
6,2.776882e-13,738,3.408198e-13,2000
7,1.026583e-14,949,1.026583e-14,2000
8,7.093053e-11,884,7.651774e-11,2000
9,7.687641e-11,705,8.673304e-11,2000


## 1.2 Noise after tuning

Noise improves the results, lowering the mean validation MSE by an order of magnitude.

In [5]:
# Get a summary of the MSE across seeds
exp_full = get_full_mse_df("rand_reps_exp_noise_1.npy")
exp_summary = get_summary_mse_df(exp_full)
exp_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,2.586559e-13,833,3.727642e-13,2000
1,3.867184e-14,743,5.821585e-13,2000
2,7.70127e-12,746,9.191179e-12,2000
3,3.67696e-12,825,5.102898e-12,2000
4,4.728251e-13,909,5.293022e-13,2000
5,5.802772e-13,898,6.475366e-13,2000
6,1.655414e-14,542,5.807018e-13,2000
7,3.664431e-13,926,3.695142e-13,2000
8,5.022975e-14,738,4.158357e-13,2000
9,5.734974e-13,704,6.832889e-13,2000


## 1.3 Adam (no noise)

In [7]:
# Get a summary of the MSE across seeds
exp_full = get_full_mse_df("rand_reps_exp_adam.npy")
exp_summary = get_summary_mse_df(exp_full, n_reps=20)
exp_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,3.18235e-16,760,1.291466e-14,1000
1,3.18235e-16,758,3.18235e-16,1000
2,3.18235e-16,583,3.18235e-16,1000
3,3.18235e-16,754,1.291466e-14,1000
4,3.18235e-16,843,3.18235e-16,1000
5,3.18235e-16,737,1.294608e-14,1000
6,3.18235e-16,804,3.18235e-16,1000
7,3.18235e-16,719,3.18235e-16,1000
8,3.18235e-16,790,3.18235e-16,1000
9,3.18235e-16,813,3.18235e-16,1000


## 1.4 Adam (noise after tuning)

In [19]:
# Get a summary of the MSE across seeds
exp_full = get_full_mse_df("rand_reps_exp_adam_noise.npy")
exp_summary = get_summary_mse_df(exp_full, n_reps=20)
exp_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,3.18235e-16,760,4.948368e-14,1000
1,3.18235e-16,800,3.18235e-16,1000
2,3.18235e-16,827,3.18235e-16,1000
3,3.18235e-16,781,1.294608e-14,1000
4,3.18235e-16,711,1.294608e-14,1000
5,3.18235e-16,795,1.294608e-14,1000
6,3.18235e-16,805,1.291466e-14,1000
7,3.18235e-16,736,3.18235e-16,1000
8,3.18235e-16,802,3.18235e-16,1000
9,3.18235e-16,827,3.18235e-16,1000


# 2. SHO

## 2.1 No noise

We obtain generally very good results but observe two seeds that never improve during training.

In [6]:
# Get a summary of the MSE across seeds
sho_full = get_full_mse_df("rand_reps_sho.npy")
sho_summary = get_summary_mse_df(sho_full)
sho_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,8.249775e-11,4029,4.255164e-10,10000
1,4.853613e-11,2880,5.102058e-10,10000
2,5.275599e-11,8734,2.705398e-10,10000
3,2.544008e-11,8978,2.752317e-10,10000
4,3.127805e-12,6542,1.093761e-10,10000
5,3.773394e-11,5244,5.315696e-10,10000
6,0.0384667,1018,0.4819122,10000
7,1.22259e-11,5243,1.070111e-10,10000
8,0.001605145,9918,0.001608532,10000
9,5.634251e-12,8863,5.726357e-12,10000


## 2.2 Noise after tuning

Noise improves stability of results, fixing the two seeds that performed poorly.

In [7]:
# Get a summary of the MSE across seeds
sho_full = get_full_mse_df("rand_reps_sho_noise_1.npy")
sho_summary = get_summary_mse_df(sho_full)
sho_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,4.84577e-11,3645,3.738518e-10,10000
1,1.262656e-11,5537,3.613082e-11,10000
2,1.265906e-10,4614,1.649324e-10,10000
3,1.639964e-11,2948,1.63422e-10,10000
4,3.147885e-11,3560,3.168681e-10,10000
5,3.890491e-11,2498,1.417493e-10,10000
6,1.745135e-11,4819,3.134744e-11,10000
7,5.686159e-11,2408,2.173837e-10,10000
8,5.640133e-11,5849,5.685734e-11,10000
9,1.125001e-11,5821,2.973742e-11,10000


## 2.3 Adam (no noise)

In [10]:
# Get a summary of the MSE across seeds
sho_full = get_full_mse_df("rand_reps_sho_adam.npy")
sho_summary = get_summary_mse_df(sho_full, n_reps=20)
sho_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,6.949882e-13,9640,2.646104e-12,10000
1,1.059259e-12,9841,9.952952e-12,10000
2,2.800808e-12,9212,5.77231e-11,10000
3,1.161548e-12,9237,4.432507e-12,10000
4,8.455382e-13,9955,1.800132e-12,10000
5,1.034651e-12,9301,2.239698e-12,10000
6,1.020633e-12,8834,3.868005e-11,10000
7,1.568049e-12,9681,8.094686e-12,10000
8,6.69556e-13,9917,1.932253e-12,10000
9,6.603821e-13,9552,1.280856e-12,10000


## 2.4 Adam (noise after tuning)

In [18]:
# Get a summary of the MSE across seeds
sho_full = get_full_mse_df("rand_reps_sho_adam_noise.npy")
sho_summary = get_summary_mse_df(sho_full, n_reps=20)
sho_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,9.539417e-13,9959,5.610926e-12,10000
1,1.102549e-12,9982,5.762891e-12,10000
2,3.191839e-13,9682,1.90726e-12,10000
3,7.990339e-13,9835,1.126042e-11,10000
4,1.721623e-12,8985,1.03239e-11,10000
5,2.79375e-12,9887,2.985531e-11,10000
6,8.44957e-13,9633,2.089088e-12,10000
7,4.482795e-13,9665,1.122717e-11,10000
8,8.3773e-13,9239,1.172766e-11,10000
9,1.158776e-12,9915,4.474249e-12,10000


# 3. NLO

## 3.1 No noise

In [8]:
# Get a summary of the MSE across seeds
nlo_full = get_full_mse_df("rand_reps_nlo.npy")
nlo_summary = get_summary_mse_df(nlo_full)
nlo_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,2.012188e-08,19565,2.487858e-08,20000
1,2.059821e-08,19760,2.794884e-08,20000
2,1.85795e-08,19560,3.264189e-08,20000
3,2.106556e-08,19685,3.357819e-08,20000
4,1.908642e-08,19928,2.902942e-08,20000
5,1.946937e-08,19783,2.553012e-08,20000
6,1.920052e-08,18827,3.193488e-08,20000
7,1.89341e-08,18812,3.297774e-08,20000
8,2.076334e-08,19205,2.788512e-08,20000
9,2.094348e-08,18953,2.380631e-08,20000


## 3.2 Noise after tuning

In [9]:
# Get a summary of the MSE across seeds
nlo_full = get_full_mse_df("rand_reps_nlo_noise_1.npy")
nlo_summary = get_summary_mse_df(nlo_full)
nlo_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,3.06702e-08,19997,4.039383e-08,20000
1,2.506759e-08,19626,4.16063e-08,20000
2,2.539452e-08,19822,4.003948e-08,20000
3,1.66953e-08,19181,2.651417e-08,20000
4,2.02534e-08,19921,2.22347e-08,20000
5,1.822054e-08,19657,2.705967e-08,20000
6,2.251306e-08,19973,3.488466e-08,20000
7,1.463856e-08,19331,2.43131e-08,20000
8,2.677572e-08,19958,3.263035e-08,20000
9,2.839763e-08,19720,4.181555e-08,20000


## 3.3 Adam (no noise)

In [12]:
# Get a summary of the MSE across seeds
nlo_full = get_full_mse_df("rand_reps_nlo_adam.npy")
nlo_summary = get_summary_mse_df(nlo_full, n_reps=20)
nlo_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,3.032193e-12,8939,3.971227e-12,20000
1,6.134351e-12,7956,1.39206e-11,20000
2,2.730815e-12,8652,8.541823e-12,20000
3,1.463018e-12,9031,2.173762e-12,20000
4,8.591035e-13,9883,1.983389e-12,20000
5,2.966206e-12,8043,1.304809e-11,20000
6,0.03185039,78,1.051502,20000
7,1.897671e-12,9506,2.77965e-12,20000
8,1.559392e-12,10389,1.851765e-12,20000
9,6.431882e-10,8652,7.880649e-10,20000


## 3.4 Adam (noise after tuning)

In [20]:
# Get a summary of the MSE across seeds
nlo_full = get_full_mse_df("rand_reps_nlo_adam_noise.npy")
nlo_summary = get_summary_mse_df(nlo_full, n_reps=20)
nlo_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,1.374536e-12,9531,1.944244e-12,20000
1,3.891232e-12,9292,5.529722e-12,20000
2,4.820108e-12,8506,1.052817e-11,20000
3,3.752197e-12,8699,6.965036e-12,20000
4,8.735825e-12,7488,1.502813e-11,20000
5,2.563437e-12,9606,4.076467e-12,20000
6,2.301169e-11,8412,6.29429e-11,20000
7,9.646977e-12,7923,1.283425e-11,20000
8,4.922947e-12,8420,1.13149e-11,20000
9,0.03548549,41,1.05031,20000


# 4. POS

## 4.1 No noise

We obtain very good and consistent results across all seeds.

In [10]:
# Get a summary of the MSE across seeds
pos_full = get_full_mse_df("rand_reps_pos.npy")
pos_summary = get_summary_mse_df(pos_full)
pos_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,7.625418e-12,3984,8.49173e-12,4000
1,5.861788e-12,3969,5.963139e-12,4000
2,8.61701e-12,3981,9.597004e-12,4000
3,1.076432e-11,3937,1.103256e-11,4000
4,1.346913e-11,3976,1.600508e-11,4000
5,1.079694e-11,3950,1.255131e-11,4000
6,2.888076e-12,3981,3.012331e-12,4000
7,5.713636e-12,3874,5.973069e-12,4000
8,1.248126e-11,3997,1.312948e-11,4000
9,4.187312e-11,3983,4.507396e-11,4000


## 4.2 Noise after tuning

In this case, adding noise lowered solution accuracy, although the results are stable.

In [11]:
# Get a summary of the MSE across seeds
pos_full = get_full_mse_df("rand_reps_pos_noise_1.npy")
pos_summary = get_summary_mse_df(pos_full)
pos_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,1.203856e-09,3989,1.215394e-09,4000
1,4.947863e-10,3559,5.00595e-10,4000
2,1.806374e-10,3803,1.912235e-10,4000
3,1.363557e-10,3929,1.440514e-10,4000
4,4.396103e-10,3970,4.471216e-10,4000
5,8.872748e-10,3849,9.099729e-10,4000
6,6.134688e-10,3986,6.166037e-10,4000
7,1.333717e-10,3997,1.34159e-10,4000
8,9.69219e-10,3989,9.913105e-10,4000
9,1.117513e-09,3924,1.127788e-09,4000


## 4.3 Adam (no noise)

In [11]:
# Get a summary of the MSE across seeds
pos_full = get_full_mse_df("rand_reps_pos_adam.npy")
pos_summary = get_summary_mse_df(pos_full, n_reps=20)
pos_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,1.050902e-12,2568,1.436877e-12,4000
1,1.707731e-12,2425,2.146807e-12,4000
2,7.787284e-13,2475,1.124654e-12,4000
3,1.082065e-12,2309,1.140577e-12,4000
4,9.097692e-13,2395,1.005552e-12,4000
5,5.054767e-13,2373,5.399321e-13,4000
6,1.063744e-12,2232,2.098965e-12,4000
7,8.850637e-13,2531,1.581557e-12,4000
8,8.161408e-13,2292,8.734907e-13,4000
9,1.625556e-12,2420,2.124601e-12,4000


## 4.4 Adam (noise after tuning)

In [17]:
# Get a summary of the MSE across seeds
pos_full = get_full_mse_df("rand_reps_pos_adam_noise.npy")
pos_summary = get_summary_mse_df(pos_full, n_reps=20)
pos_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,7.243365e-13,2428,7.368961e-13,4000
1,9.406646e-13,2429,1.018812e-12,4000
2,3.482194e-12,2627,3.516865e-12,4000
3,8.932084e-13,2729,9.842007e-13,4000
4,1.024856e-12,2149,3.209966e-12,4000
5,1.155631e-12,2511,1.444044e-12,4000
6,6.549532e-13,2323,6.722455e-13,4000
7,4.322165e-13,2472,4.972379e-13,4000
8,6.443785e-13,2437,1.155068e-12,4000
9,1.164365e-12,2327,2.764802e-12,4000


# 5. SIR

## 5.1 No noise (Adam optimizer)

In [21]:
# Get a summary of the MSE across seeds
sir_full = get_full_mse_df("rand_reps_sir_adam.npy")
sir_summary = get_summary_mse_df(sir_full, n_reps=20)
sir_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,1.165608e-09,26379,1.333045e-09,30000
1,0.001662751,3975,0.3875408,30000
2,0.002692391,29183,0.002790096,30000
3,1.980816e-09,29898,2.0466e-09,30000
4,2.755768e-08,26778,3.059807e-08,30000
5,1.993265e-07,15263,2.452545e-07,30000
6,6.597986e-07,22483,9.111996e-07,30000
7,1.541048e-08,20241,5.142203e-08,30000
8,1.707286e-08,29994,1.725338e-08,30000
9,2.210747e-09,27238,2.774004e-09,30000


## 5.2 Noise after tuning (Adam optimizer)

In [24]:
# Get a summary of the MSE across seeds
sir_full = get_full_mse_df("rand_reps_sir_adam_noise.npy")
sir_summary = get_summary_mse_df(sir_full, n_reps=20)
sir_summary

Unnamed: 0,min_val_mse,min_mse_iters,final_val_mse,final_mse_iters
0,1.458922e-07,26345,4.847688e-07,30000
1,2.953224e-08,29977,3.050655e-08,30000
2,5.172009e-09,29989,5.268113e-09,30000
3,1.055127e-07,29993,1.070018e-07,30000
4,9.305415e-09,29943,9.53778e-09,30000
5,1.542368e-08,29991,1.571501e-08,30000
6,5.377069e-07,29943,5.426709e-07,30000
7,0.0007139307,30000,0.0007139307,30000
8,8.494647e-09,29098,9.117068e-09,30000
9,4.053935e-08,29371,4.183498e-08,30000


We want to bridge the gap between Adam and SGD so that we can obtain good results using SGD. In PyTorch, the weight update for SGD with Nesterov's momentum is

$$
w_{t+1} = w_t - \eta*v_{t+1} \\
v_{t+1} = p*v_t + g_{t+1}
$$

where $p$ is the momentum. For Adam, the update is

$$
w_{t+1} = w_t - \frac{\eta}{\sqrt{v_t} + \epsilon}m_t \\
m_{t+1} = \frac{\beta_1 m_t + (1-\beta_1)g_t}{1-\beta^t_1} \\
v_{t+1} = \frac{\beta_2 v_t + (1-\beta_2)g^2_t}{1-\beta^t_2}
$$

For both Adam and SGD, we apply a `StepLR` scheduler that we use to decay $\eta$ by a factor of $\gamma$ every `step_size` iterations, i.e. $\eta_{t+1} = \gamma \eta_t.$

Pavlos suggestions:
- We want the effective learning rate of the two optimizers to look the same!
- Plot the effective learning rate vs iterations for SGD - the bad ones should go down really fast (see picture)
- Sanity check: record the effective learning rate of Adam at fixed iterations and run SGD, setting the effective learning rate of SGD to that of Adam (hack)
- Play around with the equations more - we want to find out how to use the parameters of SGD to mimic the parameters of Adam *exactly*.

1. Still need to get good hypers for RANS
2. How should we demonstrate the increased stability of DEQGAN + noise/early abandonment?
3. Transfer learning on hyperparameters: I have all these results of hyperparameter tuning, can we use them to learn good hyperparameters for the next experiments?
4. Next problems: COO, wave equation, burger's equation + ping David