In [1]:
from validphys.api import API
import sys

# Add the path to the library folder
sys.path.append('../lib')

from utils import XGRID, build_fk_matrix, regularize_matrix
from model import PDFmodel, generate_mse_loss
from gen_dicts import generate_dicts
from plot_utils import plot_eigvals
from validphys.api import API

from validphys.pineparser import pineappl_reader
from n3fit.layers.observable import compute_float_mask
from n3fit.backends import operations as op
from n3fit.layers import DIS

from collections import defaultdict, namedtuple
import numpy as np
import tensorflow as tf

Using Keras backend


In [7]:
# List of DIS dataset
dataset_inputs = [
  #{'dataset': 'NMC_NC_NOTFIXED_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'NMC_NC_NOTFIXED_P_EM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'SLAC_NC_NOTFIXED_P_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'SLAC_NC_NOTFIXED_D_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'BCDMS_NC_NOTFIXED_P_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'BCDMS_NC_NOTFIXED_D_DW_EM-F2', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'CHORUS_CC_NOTFIXED_PB_DW_NU-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'CHORUS_CC_NOTFIXED_PB_DW_NB-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'NUTEV_CC_NOTFIXED_FE_DW_NU-SIGMARED', 'cfac': ['MAS'], 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'NUTEV_CC_NOTFIXED_FE_DW_NB-SIGMARED', 'cfac': ['MAS'], 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_NC_318GEV_EM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_NC_225GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_NC_251GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_NC_300GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_NC_318GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_CC_318GEV_EM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_CC_318GEV_EP-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_NC_318GEV_EAVG_CHARM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
  {'dataset': 'HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED', 'frac': 0.75, 'variant': 'legacy'},
]

# Dictionary for validphys API
common_dict = dict(
    dataset_inputs=dataset_inputs,
    metadata_group="nnpdf31_process",
    use_cuts='internal',
    datacuts={'q2min': 3.49, 'w2min': 12.5},
    theoryid=40000000,
    t0pdfset='NNPDF40_nnlo_as_01180',
    use_t0=True
)

In [8]:
groups_data = API.procs_data(**common_dict)

In [12]:
fk_table_dict = defaultdict(list)
central_data_dict = {}
padded_fk_dict = defaultdict(list)
xgrid_masks_dict = defaultdict(list)

total_ndata_wc = 0
for idx_proc, group_proc in enumerate(groups_data):
  for idx_exp, exp_set in enumerate(group_proc.datasets):
  
    dataset_name = exp_set.name
    dataset_size = exp_set.load_commondata().ndata
    total_ndata_wc += dataset_size

    # Collect FKSpecs and cuts
    fkspecs = exp_set.fkspecs
    cuts = exp_set.cuts

    # Read FKData and FK table in numpy version
    fk_data = pineappl_reader(fkspecs[0]).with_cuts(cuts)
    fk_table = fk_data.get_np_fktable()

    # xgrid for this dataset
    xgrid = fk_data.xgrid

    # Check that XGRID is just a small-x extension
    # of xgrid
    res = True
    for i, x in enumerate(xgrid):
      offset = 50 - xgrid.size
      try:
        assert(np.isclose(x, XGRID[offset+i]))
      except AssertionError:
        print(f"XGRID is not an extension for {dataset_name}.")

    # Load DIS object for padding the FK table
    dis = DIS(
      [fk_data],
      [fk_table],
      dataset_name,
      None,
      exp_set.op,
      n_replicas=1,
      name=f"dat_{dataset_name}"
    )
    
    # Pad the fk table so that (N, x, 9) -> (N, x, 14)
    mask = tf.cast(dis.masks[0], dtype=tf.float64)
    padded_fk_table = dis.fktables[0]#dis.pad_fk(dis.fktables[0], mask)
    padded_fk_dict[dataset_name] = dis.pad_fk(dis.fktables[0], mask)
    
    # Extend xgrid to low-x (N, x, 14) -> (N, 50, 14)
    xgrid_mask = np.zeros(XGRID.size, dtype=bool)
    offset = XGRID.size - xgrid.size
    for i in range(xgrid.size):
      xgrid_mask[offset + i] = True
    xgrid_mask = tf.cast(compute_float_mask(xgrid_mask), dtype=tf.float64)
    paddedx_fk_table = op.einsum('Xx, nFx -> nXF', xgrid_mask, padded_fk_table)
    xgrid_masks_dict[dataset_name] = xgrid_mask
    
    

In [13]:
xgrid_mask

<tf.Tensor: shape=(50, 32), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])>