In [22]:
import torch
from scipy.io import loadmat
import numpy as np

def w_v_norms(path):
    mat = loadmat(path)
    ws, vs = ['W_0', 'W_1'], ['V_0', 'V_1']
    norms = {'w': 0, 'v': 0}
    for w in ws:
        norms['w'] += np.sum(np.power(mat[w], 2))
    for v in vs:
        norms['v'] += np.sum(np.power(mat[v], 2))
    norms['rel. diff'] = np.abs(norms['w']-norms['v'])/norms['w']
    return norms

epochs_200k = [
    # builtin pytorch weight decay, WD=0.001
    "experiments/final/all_terms_more_training/D=4_R=1296_L=16_LR=0.001_WD=0.001_Term=3_Layers=2_Lam=0_E=200000.mat",
    # custom weight decay on all trainable parameters, WD=0.001
    "experiments/final/all_terms_more_training/D=4_R=1296_L=16_LR=0.001_WD=0_Term=4_Layers=2_Lam=0.001_E=200000.mat",
    # custom reg term 1 from paper, lambda=0.001
    "experiments/final/all_terms_more_training/D=4_R=1296_L=16_LR=0.001_WD=0_Term=1_Layers=2_Lam=0.01_E=200000.mat",
    # custom reg term 2 from paper, lambda=0.001
    "experiments/final/all_terms_more_training/D=4_R=1296_L=16_LR=0.001_WD=0_Term=2_Layers=2_Lam=0.01_E=200000.mat",
]

epochs_50k = [
    # builtin pytorch weight decay, WD=0.001
    "experiments/final/all_terms/D=4_R=1296_L=16_LR=0.001_WD=0.001_Term=3_Layers=2_Lam=0_E=50000.mat",
    # custom weight decay on all trainable parameters, WD=0.001
    "experiments/final/all_terms/D=4_R=1296_L=16_LR=0.001_WD=0_Term=4_Layers=2_Lam=0.001_E=50000.mat",
    # custom reg term 1 from paper, lambda=0.001
    "experiments/final/all_terms/D=4_R=1296_L=16_LR=0.001_WD=0_Term=1_Layers=2_Lam=0.01_E=50000.mat",
    # custom reg term 2 from paper, lambda=0.001
    "experiments/final/all_terms/D=4_R=1296_L=16_LR=0.001_WD=0_Term=2_Layers=2_Lam=0.01_E=50000.mat",
]

In [25]:
for path in epochs_200k:
    print(path)
    print(w_v_norms(path))
    print("========")

experiments/final/all_terms_more_training/D=4_R=1296_L=16_LR=0.001_WD=0.001_Term=3_Layers=2_Lam=0_E=200000.mat
{'w': 14.429049015045166, 'v': 15.239691734313965, 'rel. diff': 0.05618129915724466}
experiments/final/all_terms_more_training/D=4_R=1296_L=16_LR=0.001_WD=0_Term=4_Layers=2_Lam=0.001_E=200000.mat
{'w': 14.359792232513428, 'v': 15.233931541442871, 'rel. diff': 0.060874091684294564}
experiments/final/all_terms_more_training/D=4_R=1296_L=16_LR=0.001_WD=0_Term=1_Layers=2_Lam=0.01_E=200000.mat
{'w': 0.9124084152444993, 'v': 0.8236832364589475, 'rel. diff': 0.0972428325990133}
experiments/final/all_terms_more_training/D=4_R=1296_L=16_LR=0.001_WD=0_Term=2_Layers=2_Lam=0.01_E=200000.mat
{'w': 0.8928995443993699, 'v': 0.7939818986728824, 'rel. diff': 0.1107825021828484}


In [26]:
for path in epochs_50k:
    print(path)
    print(w_v_norms(path))
    print("========")

experiments/final/all_terms/D=4_R=1296_L=16_LR=0.001_WD=0.001_Term=3_Layers=2_Lam=0_E=50000.mat
{'w': 14.52377700805664, 'v': 15.26730728149414, 'rel. diff': 0.05119400229190026}
experiments/final/all_terms/D=4_R=1296_L=16_LR=0.001_WD=0_Term=4_Layers=2_Lam=0.001_E=50000.mat
{'w': 14.438959121704102, 'v': 15.22424030303955, 'rel. diff': 0.05438627360299428}
experiments/final/all_terms/D=4_R=1296_L=16_LR=0.001_WD=0_Term=1_Layers=2_Lam=0.01_E=50000.mat
{'w': 1.8037847115883778, 'v': 1.6314355605158823, 'rel. diff': 0.09554862615546185}
experiments/final/all_terms/D=4_R=1296_L=16_LR=0.001_WD=0_Term=2_Layers=2_Lam=0.01_E=50000.mat
{'w': 1.637637271712265, 'v': 1.446307821007565, 'rel. diff': 0.11683261855945148}
