In [1]:
import sys

# Add the path to the library folder
sys.path.append('../lib')

from utils import XGRID
from model import PDFmodel, generate_mse_loss
from gen_dicts import generate_dicts
from validphys.api import API

import numpy as np
import pandas as pd

Using Keras backend


In [2]:
pdf = PDFmodel(input=XGRID,
               outputs=9,
               architecture=[25,28],
               activations=['tanh', 'tanh'],
               kernel_initializer='RandomNormal',
               user_ki_args={'mean': 0.0, 'stddev': 1.0},
               seed=1)

In [3]:
# List of DIS dataset
dataset_inputs = [
  #{'dataset': 'NMC_NC_NOTFIXED_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'NMC_NC_NOTFIXED_P_EM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'SLAC_NC_NOTFIXED_P_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'SLAC_NC_NOTFIXED_D_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'BCDMS_NC_NOTFIXED_P_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'BCDMS_NC_NOTFIXED_D_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'CHORUS_CC_NOTFIXED_PB_DW_NU-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'CHORUS_CC_NOTFIXED_PB_DW_NB-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'NUTEV_CC_NOTFIXED_FE_DW_NU-SIGMARED', 'cfac': ['MAS'], 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'NUTEV_CC_NOTFIXED_FE_DW_NB-SIGMARED', 'cfac': ['MAS'], 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_NC_318GEV_EM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_NC_225GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_NC_251GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_NC_300GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_NC_318GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_CC_318GEV_EM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_CC_318GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_NC_318GEV_EAVG_CHARM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  #{'dataset': 'HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
]

# Dictionary for validphys API
common_dict = dict(
    dataset_inputs=dataset_inputs,
    metadata_group="nnpdf31_process",
    use_cuts='internal',
    datacuts={'q2min': 3.49, 'w2min': 12.5},
    theoryid=40000000,
    t0pdfset='NNPDF40_nnlo_as_01180',
    use_t0=True
)

In [4]:
groups_data = API.procs_data(**common_dict)
tuple_of_dicts = generate_dicts(groups_data)
fk_table_dict = tuple_of_dicts.fk_tables
central_data_dict = tuple_of_dicts.central_data

In [5]:
C_sys = API.dataset_inputs_t0_covmat_from_systematics(**common_dict)
C = API.groups_covmat_no_table(**common_dict)
C_index = C.index
C_col = C.columns
Cinv = np.linalg.inv(C)
Cinv = pd.DataFrame(Cinv, index=C_index, columns=C_col)

LHAPDF 6.5.4 loading /opt/homebrew/Caskroom/miniconda/base/envs/nnpdf/share/LHAPDF/NNPDF40_nnlo_as_01180/NNPDF40_nnlo_as_01180_0000.dat
NNPDF40_nnlo_as_01180 PDF set, member #0, version 1; LHAPDF ID = 331100


In [6]:
mse_loss = generate_mse_loss(Cinv)

In [7]:
model_time, steps = pdf.train_network_gd(data=central_data_dict,
                                         FK_dict=fk_table_dict,
                                         loss_func=mse_loss,
                                         learning_rate=1.e-7,
                                         tol=1e-4,
                                         logging=True,
                                         callback=True)

------------------------
Step 0, Loss: 12250.6171875, Loss/Ndat: 20.281456953642383, Rel. loss: 0.0
------------------------
Step 100, Loss: 7079.9404296875, Loss/Ndat: 11.72019867549669, Rel. loss: 0.001643464551307261
------------------------
Step 200, Loss: 6069.31201171875, Loss/Ndat: 10.048013245033113, Rel. loss: 0.0014365552924573421
------------------------
Step 300, Loss: 5309.70263671875, Loss/Ndat: 8.789735099337749, Rel. loss: 0.001238914206624031
------------------------
Step 400, Loss: 4733.52392578125, Loss/Ndat: 7.836092715231788, Rel. loss: 0.0010602247202768922
------------------------
Step 500, Loss: 4291.703125, Loss/Ndat: 7.104304635761589, Rel. loss: 0.0009032261441461742
------------------------
Step 600, Loss: 3948.98779296875, Loss/Ndat: 6.5364238410596025, Rel. loss: 0.000767506833653897
------------------------
Step 700, Loss: 3680.000732421875, Loss/Ndat: 6.0927152317880795, Rel. loss: 0.000648343178909272
------------------------
Step 800, Loss: 3466.388427

In [8]:
steps

[0,
 100,
 200,
 300,
 400,
 500,
 600,
 700,
 800,
 900,
 1000,
 1100,
 1200,
 1300,
 1400,
 1500,
 1600,
 1700,
 1800,
 1900,
 2000,
 2100,
 2200]

In [9]:
model_time[0].model.get_weights()[0]

array([[ 0.1266503 , -0.49301657,  0.6311907 , -0.78884655,  0.5683089 ,
        -1.5569937 , -1.4428717 ,  0.24223132, -2.2694967 ,  1.022747  ,
         0.5021713 , -3.8967683 ,  1.1691167 ,  0.1790239 ,  0.26565596,
        -0.3088571 ,  1.8476253 , -0.98034793,  1.9315791 , -1.6846664 ,
         1.0996997 , -0.73108566,  0.29095498, -0.9118021 , -1.0787293 ]],
      dtype=float32)

In [10]:
model_time[3].model.get_weights()[0]

array([[ 0.09980545, -0.5122845 ,  0.65964717, -0.8187674 ,  0.56427646,
        -1.5603572 , -1.4334155 ,  0.28030187, -2.2742856 ,  1.0333611 ,
         0.50575095, -3.8933487 ,  1.1727369 ,  0.17485273,  0.26311854,
        -0.36162862,  1.8467122 , -0.97337204,  1.9243493 , -1.6897615 ,
         1.085749  , -0.7213313 ,  0.28556693, -0.90641505, -1.0773374 ]],
      dtype=float32)