In [2]:
from validphys.api import API
import sys

# Add the path to the library folder
sys.path.append('./lib')

from utils import XGRID, build_fk_matrix, regularize_matrix
from model import PDFmodel, generate_mse_loss
from gen_dicts import generate_dicts
from plot_utils import plot_eigvals
from validphys.api import API

import numpy as np
import pandas as pd

Using Keras backend


In [3]:
seed = 14132124

In [4]:
# List of DIS dataset
dataset_inputs = [
  #{'dataset': 'NMC_NC_NOTFIXED_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'NMC_NC_NOTFIXED_P_EM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'SLAC_NC_NOTFIXED_P_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'SLAC_NC_NOTFIXED_D_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'BCDMS_NC_NOTFIXED_P_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'BCDMS_NC_NOTFIXED_D_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'CHORUS_CC_NOTFIXED_PB_DW_NU-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'CHORUS_CC_NOTFIXED_PB_DW_NB-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'NUTEV_CC_NOTFIXED_FE_DW_NU-SIGMARED', 'cfac': ['MAS'], 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'NUTEV_CC_NOTFIXED_FE_DW_NB-SIGMARED', 'cfac': ['MAS'], 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_NC_318GEV_EM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_NC_225GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_NC_251GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_NC_300GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_NC_318GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_CC_318GEV_EM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_CC_318GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_NC_318GEV_EAVG_CHARM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
]

# Dictionary for validphys API
common_dict = dict(
    dataset_inputs=dataset_inputs,
    metadata_group="nnpdf31_process",
    use_cuts='internal',
    datacuts={'q2min': 3.49, 'w2min': 12.5},
    theoryid=40000000,
    t0pdfset='NNPDF40_nnlo_as_01180',
    use_t0=True
)

In [5]:
# Retrieve data from NNPDF
groups_data = API.procs_data(**common_dict)
tuple_of_dicts = generate_dicts(groups_data)
fk_table_dict = tuple_of_dicts.fk_tables
central_data_dict = tuple_of_dicts.central_data
FK = build_fk_matrix(fk_table_dict)

In [6]:
C = API.groups_covmat_no_table(**common_dict)

# Serialize covmat
C_index = C.index
C_col = C.columns
Cinv = np.linalg.inv(C)
Cinv = pd.DataFrame(Cinv, index=C_index, columns=C_col)

# Diagonalize covariance matric
eigvals_Cinv, R_Y = np.linalg.eigh(Cinv)
if eigvals_Cinv[-1] > eigvals_Cinv[0]:
    eigvals_Cinv = eigvals_Cinv[::-1]
    R_Y = R_Y[:,::-1]
D_Y = np.zeros_like(R_Y)
np.fill_diagonal(D_Y, eigvals_Cinv)

LHAPDF 6.5.4 loading /opt/homebrew/Caskroom/miniconda/base/envs/nnpdf/share/LHAPDF/NNPDF40_nnlo_as_01180/NNPDF40_nnlo_as_01180_0000.dat
NNPDF40_nnlo_as_01180 PDF set, member #0, version 1; LHAPDF ID = 331100


In [7]:
# Construct dataframe for predictions
Y = pd.DataFrame(np.zeros(Cinv.shape[0]), index=Cinv.index)
for exp_name, data in central_data_dict.items():
  if data.size == Y.loc[(slice(None), [exp_name], slice(None)), :].size:
    Y.loc[(slice(None), [exp_name], slice(None)), :] = data
  else:
    raise ValueError

In [8]:
nnpdf_model = PDFmodel(input=XGRID,
                       outputs=9,
                       architecture=[28,20],
                       activations=['tanh', 'tanh'],
                       kernel_initializer='RandomNormal',
                       user_ki_args={'mean': 0.0, 'stddev': 1.0},
                       seed=seed,
                       dtype='float64')
NTK = nnpdf_model.compute_ntk()

# Flatten NTK
prod = 1
oldshape = NTK.shape
for k in oldshape[2:]:
    prod *= k
NTK_flat = np.array(NTK).reshape(prod,-1)

# Compute predictions at initialization
f0 = nnpdf_model.predict(squeeze=True)

Load data from GD training

In [8]:
import pickle
with open('training.pkl', 'rb') as file:
    results = pickle.load(file)

pred_in_time = results[1]
pdfs_in_time = results[2]
learning_rate_gd = 0.00000001

# Computing matrices from notes
-------------------------------

In [21]:
tol = 1.e-3#np.finfo(np.float64).eps

### $M = (FK)^T C_Y^{-1} (FK) = RDR^T$

In [22]:
M = FK.T @ Cinv.to_numpy() @ FK
M, (eigvals_M, R) = regularize_matrix(M, tol=tol)

# Construct diagonal matrix
D = np.zeros_like(R)
np.fill_diagonal(D, eigvals_M)

### $\tilde{H} = D^{1/2} R^T \Theta R D^{1/2}$

In [23]:
ntk, (eigvals_ntk, R_ntk) = regularize_matrix(NTK_flat)
H_tilde = np.sqrt(D) @ R.T @ ntk @ R @ np.sqrt(D)
H_tilde, (eigvals_H_tilde, eigvecs_H_tilde) = regularize_matrix(H_tilde, tol=tol)

# Check if symmetric
print(f'Is symmetric: {np.allclose(H_tilde, H_tilde.T)}')

Is symmetric: True


### $\tilde{H_{\epsilon}} = D^{1/2} R^T \Theta R D^{1/2}$

In [24]:
H_eps_tilde = np.sqrt(D_Y) @ R_Y.T @ FK @ ntk @ FK.T @ R_Y @ np.sqrt(D_Y)
H_eps_tilde, (eigvals_H_eps_tilde, eigvecs_H_eps_tilde) = regularize_matrix(H_eps_tilde, tol=tol)

# Check if symmetric
print(f'Is symmetric: {np.allclose(H_eps_tilde, H_eps_tilde.T)}')

Is symmetric: True


### $b = \Theta (FK)^T C_Y^{-1} y \hspace{5mm} \textrm{and} \hspace{5mm} \tilde{b} = D^{1/2} R^T b$

In [25]:
b = ntk @ FK.T @ Cinv.to_numpy(dtype='float64') @ Y.to_numpy('float64')
b_tilde = np.sqrt(D) @ R.T @ b

# Plots of the eigenvalues

In [None]:
fig, axs = plot_eigvals(eigvals_H_eps_tilde, figsize=(10,8), title=r'$H_{\epsilon} = D^{1/2}_Y R^T_Y (FK) \Theta (FK)^T R_Y D^{1/2}_Y$')

In [None]:
fig, axs = plot_eigvals(eigvals_H_tilde, 
                        figsize=(10,8), 
                        title=r'$\tilde{H}= D^{1/2} R^T \Theta R D^{1/2}$,  $M = RDR^T$')
fig.savefig('../../../doc/figs/Htilde_eigvals.pdf')

In [None]:
fig, axs = plot_eigvals(eigvals_ntk, figsize=(10,8), title='')
axs.set_title(r'Eigenvalues of $\Theta$', fontsize=20)
fig.savefig('../../../doc/figs/ntk_eigvals.pdf')

In [None]:
fig, axs = plot_eigvals(eigvals_M, figsize=(10,8), title=r'Eigenvalues of $M = (FK)^T C_Y^{-1} (FK)$')
#axs.set_title(r'Eigenvalues of $\Theta$', fontsize=20)
#fig.savefig('../../../doc/figs/m_eigvals.pdf')

# Utility functions for the null space

In [26]:
import scipy as sp
def null_space_eig(eigvals, eigvecs, tol=None):
  if tol is None:
    tol = np.amax(eigvals, initial=0.) * np.finfo(eigvecs.dtype).eps
  num = np.sum(eigvals > tol, dtype=int) # Number of non-zero eigenvalues
  ker = eigvecs[:,num:]
  orth = eigvecs[:,:num]
  return ker, orth
  
def project_matrix(matrix, basis1, basis2):
  dimB1 = basis1.shape[1]
  dimB2 = basis2.shape[1]
  emb_space1 = basis1.shape[0]
  emb_space2 = basis2.shape[0]

  # Check if the bases are compatible with the matrix
  if matrix.shape[0] != emb_space1 or matrix.shape[1] != emb_space2:
    raise ValueError ('The matrix cannot be projected into the two bases.')
  
  #M_orth = np.zeros((dimB1, dimB2))
  #for i in range(dimB1):
  #  for j in range(dimB2):
  M_orth = basis1.T @ matrix @ basis2

  return M_orth

def project_vector(vector, basis):
  basis_dim = basis.shape[1]
  space_dim = basis.shape[0]
  if space_dim != vector.shape[0]:
    raise ValueError ('The matrix cannot be projected into the basis')
  
  res = [np.dot(vector, basis[:,i]) for i in range(basis_dim)]
  return res

Determine the $ker(M)$ and its orthogonal space. Then project the matrix M into the four components as explained in the notes.

We must be careful in this part. Indeed, we must decide the threshold for the smallest distinguishable eigenvalue. Looking at the eigenvalues of the matrix $M$ may help choose this value:

In [27]:
for i, val in enumerate(eigvals_M):
  print(f'{i+1} : {val}')

1 : 3124227.6108772354
2 : 1403346.4123894328
3 : 456262.382786244
4 : 340401.94392178574
5 : 102319.29860667067
6 : 93976.81790458383
7 : 74554.88176256222
8 : 69484.39975297164
9 : 51622.76207117809
10 : 44115.20273963305
11 : 26964.98519148161
12 : 24523.92813802907
13 : 21406.35767285671
14 : 18843.31711000429
15 : 15105.818770701035
16 : 14233.550076906415
17 : 9802.403011957407
18 : 7818.862748033741
19 : 6874.863591158355
20 : 6321.435594768886
21 : 4579.068123341692
22 : 3992.435884194028
23 : 3685.986052988151
24 : 3261.3024196549604
25 : 3032.1264836599976
26 : 2546.6484429102384
27 : 2466.190584104022
28 : 2352.9819779930367
29 : 2050.66766920035
30 : 1793.3076905141436
31 : 1779.1202599261612
32 : 1469.258396587751
33 : 1316.0734418302266
34 : 1241.37745256911
35 : 1154.7936746725159
36 : 998.018869304499
37 : 872.984787007436
38 : 742.4398080891076
39 : 650.6481224644775
40 : 638.3808183791555
41 : 608.9775263694085
42 : 490.8503281376732
43 : 463.29925988519204
44 : 409.8

In [28]:
ker_M, orth_M = null_space_eig(eigvals_M, R, tol)

In [29]:
orth_M.shape

(450, 149)

In [30]:
M_pp = project_matrix(M, orth_M, orth_M)
M_kk = project_matrix(M, ker_M, ker_M)
M_pk = project_matrix(M, orth_M, ker_M)
M_kp = project_matrix(M, ker_M, orth_M)

Given that the matrix $M$ is symmetric, only $M_{\bot\bot} \neq 0$, while the other three components should be zero. Also here, we need to be careful with what we consider a null value. I'll use the effective tolerance used in the extraction of the null-space (@TODO).

In [31]:
print(f'M_pp ?= 0 : {np.allclose(np.zeros_like(M_pp), M_pp, atol=tol)}')
print(f'M_kk ?= 0 : {np.allclose(np.zeros_like(M_kk), M_kk, atol=tol)}')
print(f'M_kp ?= 0 : {np.allclose(np.zeros_like(M_kp), M_kp, atol=tol)}')
print(f'M_pk ?= 0 : {np.allclose(np.zeros_like(M_pk), M_pk, atol=tol)}')

M_pp ?= 0 : False
M_kk ?= 0 : True
M_kp ?= 0 : True
M_pk ?= 0 : True


In the orthogonal space, the matrix $M$ should be invertible...

In [32]:
M_pp_inv = np.linalg.inv(M_pp)
print(f'M_pp_inv @ M_pp ?= Id: {np.allclose(M_pp_inv @ M_pp, np.eye(M_pp.shape[0]))}')
print(f'M_pp @ M_pp_inv ?= Id: {np.allclose(M_pp @ M_pp_inv, np.eye(M_pp.shape[0]))}')

M_pp_inv @ M_pp ?= Id: True
M_pp @ M_pp_inv ?= Id: True


------------------
## Projection for FK - WIP
The same projection should be applied to the FK tables. Note the $ker(FK) = ker(M)$ (see notes), and hence we don't need to compute the null-space of the FK tables. However, it is interesting to see that despite the two null-spaces should be the same, when we compute $ker(FK)$ numerically we obtain something different...

In [33]:
#FK, (s_FK, vh_FK) = regularize_matrix(FK, tol=np.finfo(FK.dtype).eps * np.amax(eigvals_M, initial=0.))
#ker_FK, orth_FK = nullspace(FK, orth_space=True, rcond=np.finfo('float64').eps * np.amax(eigvals_M, initial=0.) / np.amax(s_FK, initial=0.))
#print(f'ker(FK): {ker_FK.shape} != ker(M): {ker_M.shape}')

The two spaces are different even if we set the same relative condition number. The reason is that the function `scipy.linalg.null_space` defines the tolerance as `tol = rcond * max(s)`, where `s` are the singular values of the matrix. The order of magnitude of the highest singular value of $(FK)$ is different from the one of $M$, and this should be enough to explain the difference we observe. We should see the same answer provided we use the same tolerance for both extraction.

--------------

Also $(FK)$ must be projected in the two bases:

In [38]:
#FK, (s_FK, vh_FK) = regularize_matrix(FK, tol=np.finfo(FK.dtype).eps * np.amax(eigvals_M, initial=0.)/ np.amax(s_FK, initial=0.))
FK_p = project_matrix(FK, np.eye(FK.shape[0]), orth_M)
FK_k = project_matrix(FK, np.eye(FK.shape[0]), ker_M)
ntk_pp = project_matrix(ntk, orth_M, orth_M)

In [39]:
print(f'FK_k ?= 0 : {np.allclose(np.zeros_like(FK_k), FK_k, atol=tol)}')
print(f'FK_p ?= 0 : {np.allclose(np.zeros_like(FK_p), FK_p, atol=tol)}')

FK_k ?= 0 : True
FK_p ?= 0 : False


Note that $(FK) \Theta (FK)^T = (FK_{\bot}) \Theta_{\bot\bot} (FK_{\bot})^T$ because $(FK_K) = 0$

In [76]:
test1 = FK_p @ ntk_pp @ FK_p.T @ Cinv.to_numpy()
test2 = FK @ ntk @ FK.T @ Cinv.to_numpy()
eigvals = np.linalg.eigvals(Cinv.to_numpy())
np.allclose(test1,test2, atol=tol*eigvals.max())

True

In [77]:
tol*eigvals.max()

5420.607087270766

In [78]:
test1 - test2

array([[ 0.01063744,  0.04715807,  0.01105861, ...,  0.02134893,
         0.04356189, -0.01356432],
       [ 0.0095173 ,  0.03769981,  0.00960757, ...,  0.01288368,
         0.03984187, -0.01822565],
       [ 0.01095587,  0.04454752,  0.0110293 , ...,  0.0165962 ,
         0.04152658, -0.01633752],
       ...,
       [ 0.00809976,  0.03346447,  0.006441  , ...,  0.046501  ,
         0.10311208, -0.04812069],
       [ 0.0285553 ,  0.11427956,  0.02739389, ...,  0.08098131,
         0.12962646, -0.01893501],
       [-0.04895807, -0.21657117, -0.05359029, ..., -0.13770991,
        -0.06761168, -0.10561507]])

In [59]:
test1 = FK @ ntk @ FK.T @ Cinv.to_numpy() @ FK_p @ M_pp_inv @ FK_p.T @ Cinv.to_numpy()
test2 = FK_p @ ntk_pp @ FK_p.T @ Cinv.to_numpy() @ FK_p @ M_pp_inv @ FK_p.T @ Cinv.to_numpy()
np.allclose(test1,test2, atol=tol)

False

In [42]:
M_pp_recons = FK_p.T @ Cinv.to_numpy(dtype='float64') @ FK_p
M_kp_recons = FK_k.T @ Cinv.to_numpy(dtype='float64') @ FK_p
M_pk_recons = FK_p.T @ Cinv.to_numpy(dtype='float64') @ FK_k
M_kk_recons = FK_k.T @ Cinv.to_numpy(dtype='float64') @ FK_k
print(f'M_pp ?= M_pp_recons: {np.allclose(M_pp, M_pp_recons, atol=tol)}')
print(f'M_kp ?= M_kp_recons: {np.allclose(M_kp, M_kp_recons, atol=tol)}')
print(f'M_pk ?= M_pk_recons: {np.allclose(M_pk, M_pk_recons, atol=tol)}')
print(f'M_kk ?= M_kk_recons: {np.allclose(M_kk, M_kk_recons, atol=tol)}')

M_pp ?= M_pp_recons: True
M_kp ?= M_kp_recons: True
M_pk ?= M_pk_recons: True
M_kk ?= M_kk_recons: True


Compute the limiting solution $f_{\infty} = (M_{\bot\bot})^{-1} (FK_{\bot})^{T} C_Y^{-1} y$ and $\tilde{\epsilon}_{\infty}$.

In [43]:
f_inf = M_pp_inv @ FK_p.T @ Cinv.to_numpy(dtype='float64') @ Y.to_numpy(dtype='float64')[:,0]
eps_inf = Y.to_numpy(dtype='float64')[:,0] - FK_p @ f_inf
eps_tilde_inf = np.sqrt(D_Y) @ R_Y.T @ eps_inf

In [44]:
H_eps_tilde_p = np.sqrt(D_Y) @ R_Y.T @ FK_p @ ntk_pp @ FK_p.T @ R_Y @ np.sqrt(D_Y)
#H_eps_tilde_p, (eigvals_H_eps_tilde_p, eigvecs_H_eps_tilde_p) = regularize_matrix(H_eps_tilde_p)

# Check if symmetric
print(f'Is symmetric: {np.allclose(H_eps_tilde_p, H_eps_tilde_p.T)}')

Is symmetric: True


In [45]:
res = H_eps_tilde @ eps_tilde_inf
res_p = H_eps_tilde_p @ eps_tilde_inf

In [47]:
np.linalg.norm(res_p)

2.2058259581892878e-05

# Evolution of the data

In [None]:
from functools import lru_cache

# Construct dataframe for predictions
Y = pd.DataFrame(np.zeros(Cinv.shape[0]), index=Cinv.index)
for exp_name, data in central_data_dict.items():
  if data.size == Y.loc[(slice(None), [exp_name], slice(None)), :].size:
    Y.loc[(slice(None), [exp_name], slice(None)), :] = data
  else:
    raise ValueError
  
eps_0 = Y.to_numpy()[:,0] - FK @ f0.flatten()
Ly = (L @ Y).to_numpy()[:,0]
L_eps0 = L @ eps_0

L_eps0_tilde = [np.dot(L_eps0, eigvecs[:,k]) for k in range(eigvecs.shape[1])]
pre_computed_coefficients = [Linv @ eigvecs[:,k] * L_eps0_tilde[k] for k in range(eigvals_reg.size)] 

@lru_cache(maxsize=None)
def preds_t(t, learning_rate = 0.00001, eig_range=None):
  if eig_range is None:
    eig_range = eigvals_reg.size
  predictions = [pre_computed_coefficients[k] * np.exp(-eigvals_reg[k] * learning_rate* t) for k in range(eig_range)] 
  predictions = np.sum(predictions, axis=0)

  predictions = pd.DataFrame(predictions, index=Y.index)
  predictions = Y - predictions
  return predictions

In [None]:
experiments = ['NMC_NC_NOTFIXED_P_EM-SIGMARED', 'SLAC_NC_NOTFIXED_P_EM-F2', 'BCDMS_NC_NOTFIXED_D_EM-F2', 'HERA_NC_318GEV_EM-SIGMARED']
exp_titles = ['NMC', 'SLAC NC P', 'BCDMS NC D', 'HERA NC 318GEV']
y_labels = [r'$\sigma$', r'$F_2$', r'$F_2$', r'$\sigma$']
t = 0.
fig_pred, axes_pred = plt.subplots(2, 2, figsize=(25, 25))  # Adjust figsize for desired plot size
preds = preds_t(t, learning_rate=learning_rate_gd)

scat_gf = []
scat_gd = []
text = []
for i, ax in enumerate(axes_pred.flat):
    y = Y.xs(level='dataset', key=experiments[i]).to_numpy()
    p = preds.xs(level='dataset', key=experiments[i]).to_numpy()
    trained_pred = pred_in_time[0][experiments[i]]
    ax.scatter(np.arange(y.size), y, color='green', label='Central data', marker='o', s=100, alpha=0.4)
    gf = ax.scatter(np.arange(y.size), p, color='orange', label='Analytical solution', marker='^', s=100)
    gd = ax.scatter(np.arange(y.size), trained_pred, color='red', label='Gradient descent', marker='v', s=100)
    scat_gf.append(gf)
    scat_gd.append(gd)
    #ax.set_xlabel(r'$x$')
    ax.set_ylabel(y_labels[i], fontsize=20)
    #ax.set_xscale('log')
    ax.set_title(exp_titles[i], x=0.8,fontsize=20, fontweight='bold')
    ax.legend(fontsize=20)
    text_t = ax.text(0.05, 1.01, f't = {t}, learning rate = {learning_rate_gd}', fontsize=20, transform=ax.transAxes)
    text.append(text_t)


plt.tight_layout()
#fig.savefig('data_evolution.pdf')

In [None]:
experiments = ['NMC_NC_NOTFIXED_P_EM-SIGMARED', 'SLAC_NC_NOTFIXED_P_EM-F2', 'BCDMS_NC_NOTFIXED_D_EM-F2', 'HERA_NC_318GEV_EM-SIGMARED']
exp_titles = ['NMC', 'SLAC NC P', 'BCDMS NC D', 'HERA NC 318GEV']
y_labels = [r'$\sigma$', r'$F_2$', r'$F_2$', r'$\sigma$']
t = 0.
fig_eps, axes_eps = plt.subplots(2, 2, figsize=(25, 25))  # Adjust figsize for desired plot size
preds = preds_t(t, learning_rate=learning_rate_gd)

scat_gf_eps = []
scat_gd_eps = []
text_eps = []
for i, ax in enumerate(axes_eps.flat):
    y = Y.xs(level='dataset', key=experiments[i]).to_numpy()
    p = y - preds.xs(level='dataset', key=experiments[i]).to_numpy()
    trained_pred = y[:,0] - pred_in_time[int(t)][experiments[i]].numpy()
    ax.scatter(np.arange(y.size), y, color='green', label='Central data', marker='o', s=100, alpha=0.4)
    gf = ax.scatter(np.arange(y.size), p, color='orange', label='Analytical solution', marker='^', s=100)
    gd = ax.scatter(np.arange(y.size), trained_pred, color='red', label='Gradient descent', marker='v', s=100)
    scat_gf_eps.append(gf)
    scat_gd_eps.append(gd)
    #ax.set_xlabel(r'$x$')
    ax.set_ylabel(r'$\epsilon$', fontsize=20)
    #ax.set_xscale('log')
    ax.set_title(exp_titles[i], x=0.8,fontsize=20, fontweight='bold')
    ax.legend(fontsize=20)
    text_t = ax.text(0.05, 1.01, f't = {t}, learning rate = {learning_rate_gd}', fontsize=20, transform=ax.transAxes)
    text_eps.append(text_t)


plt.tight_layout()
#fig.savefig('data_evolution.pdf')

In [None]:
def compute_loss_analytical(t, eig_range=None):
  preds = preds_t(t, learning_rate=learning_rate_gd, eig_range=eig_range)
  loss = 0
  ndata = 0
  for exp in Y.index.get_level_values('dataset').unique():
    y = Y.xs(level='dataset', key=exp).to_numpy()
    Cinv_exp = Cinv.xs(level="dataset", key=exp).T.xs(level="dataset", key=exp).to_numpy()
    p = preds.xs(level='dataset', key=exp).to_numpy()
    R = y[:,0] - p[:,0]
    loss += 0.5 * R.T @ Cinv_exp @ R
    ndata += Cinv_exp.shape[0]
  return float(loss) / ndata

def compute_loss_gd(t):
  preds = pred_in_time[int(t)]
  loss = 0
  ndata = 0
  for exp, pred in preds.items():
    y = Y.xs(level='dataset', key=exp).to_numpy()
    Cinv_exp = tf.convert_to_tensor(Cinv.xs(level="dataset", key=exp).T.xs(level="dataset", key=exp).to_numpy(), name=f'Cinv_{exp}', dtype='float32')
    R = tf.convert_to_tensor(y[:,0] - pred, name=f'residue_{exp}', dtype='float32')
    Cinv_R = tf.linalg.matvec(Cinv_exp, R)
    loss += 0.5 * tf.reduce_sum(tf.multiply(R, Cinv_R))
    ndata += Cinv_exp.shape[0]
  return float(loss) / ndata


In [None]:
time_steps_high = np.arange(1000,len(pred_in_time),1000)
time_steps_low = np.arange(0,1000,2)
time_steps = np.concatenate([time_steps_low, time_steps_high])
aloss = [compute_loss_analytical(t, eig_range=100) for t in time_steps]
gd_loss = [compute_loss_gd(t) for t in time_steps]

In [None]:
fig_loss, ax_loss = plt.subplots(figsize=(10, 7))  # Adjust figsize for desired plot size

ax_loss.scatter(time_steps, aloss, label='Analytical solution')
ax_loss.scatter(time_steps, gd_loss, label='Gradient descent')
ax_loss.set_xlabel(r'$t$')
ax_loss.set_ylabel(r'Loss function', fontsize=20)
ax_loss.set_xscale('symlog')
ax_loss.set_title('MSE in function of training time', x=0.5, fontsize=20, fontweight='bold')
ax_loss.legend(fontsize=20)
#text_t = ax.text(0.05, 1.01, f't = {t}, learning rate = {learning_rate_gd}', fontsize=20, transform=ax.transAxes)
#text.append(text_t)


plt.tight_layout()
fig_loss.savefig('Loss_function_time.pdf')

In [None]:
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
import matplotlib
matplotlib.rcParams['animation.embed_limit'] = 2**128

# Animation function
# Update function for predicitons
def update_preds(t):
    preds = preds_t(t, learning_rate=learning_rate_gd)
    for i, (gf, gd, text_t) in enumerate(zip(scat_gf, scat_gd, text)):
        # Update the y-data for each subplot's line
        y = Y.xs(level='dataset', key=experiments[i]).to_numpy()
        p = preds.xs(level='dataset', key=experiments[i]).to_numpy()
        trained_pred = pred_in_time[int(t)][experiments[i]]
        data_gf = np.hstack(( np.arange(y.size)[:, np.newaxis] , p))
        data_gd = np.hstack(( np.arange(y.size)[:, np.newaxis] , trained_pred[:,np.newaxis]))
        gf.set_offsets(data_gf)  # Example: Add phase shift based on t and subplot index
        gd.set_offsets(data_gd)  # Example: Add phase shift based on t and subplot index
        text_t.set_text(f't = {t}, learning rate = {learning_rate_gd}')
    return scat_gf + scat_gd + text

# Update function for epsilon
def update_eps(t):
    preds = preds_t(t, learning_rate=learning_rate_gd)
    for i, (gf, gd, text_t) in enumerate(zip(scat_gf_eps, scat_gd_eps, text_eps)):
        # Update the y-data for each subplot's line
        y = Y.xs(level='dataset', key=experiments[i]).to_numpy()
        p = preds.xs(level='dataset', key=experiments[i]).to_numpy()
        trained_pred = pred_in_time[int(t)][experiments[i]]
        data_gf = np.hstack(( np.arange(y.size)[:, np.newaxis] , p))
        data_gd = np.hstack(( np.arange(y.size)[:, np.newaxis] , trained_pred[:,np.newaxis]))
        gf.set_offsets(data_gf)  # Example: Add phase shift based on t and subplot index
        gd.set_offsets(data_gd)  # Example: Add phase shift based on t and subplot index
        text_t.set_text(f't = {t}, learning rate = {learning_rate_gd}')
    return scat_gf + scat_gd + text

In [None]:
ani_pred = FuncAnimation(fig_pred, update_preds, frames=np.arange(0, len(pred_in_time), 1000), interval=10, blit=True, cache_frame_data=False)
ani_eps = FuncAnimation(fig_eps, update_eps, frames=np.arange(0, len(pred_in_time), 1000), interval=10, blit=True, cache_frame_data=False)

# Save the animation in the background
ani_pred.save('prediction_evolution.mp4', writer='ffmpeg', fps=20)
ani_eps.save('epsilon_evolution.mp4', writer='ffmpeg', fps=20)