In [1]:
# Preamble (system)
import sys
import os

if sys.version_info[0] < 3:
  raise AssertionError('Please run this notebook with Python 3.')

In [2]:
# Preamble (EMTF)
import numpy as np

from emtf_algos import *
from emtf_logger import get_logger
from emtf_colormap import get_colormap

# Preamble (notebook)
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

import collections
import functools
import itertools
import toolz

In [3]:
# Preamble (ML)
np.random.seed(2027)  # set random seed

import tensorflow as tf
tf.random.set_seed(2027)  # set random seed

import numba
from numba import njit, vectorize
import dask
import dask.array as da

try:
  import emtf_nnet
except:
  raise ImportError('This notebook requires emtf_nnet. It can be found at '
                    'https://github.com/jiafulow/emtf-nnet')

logger = get_logger()
logger.info('Using cmssw      : {}'.format(os.environ.get('CMSSW_VERSION', 'n/a')))
logger.info('Using python     : {}'.format(sys.version.replace('\n', '')))
logger.info('Using numpy      : {}'.format(np.__version__))
logger.info('Using matplotlib : {}'.format(matplotlib.__version__))
logger.info('Using tensorflow : {}'.format(tf.__version__))
logger.info('Using keras      : {}'.format(tf.keras.__version__))
logger.info('.. list devices  : {}'.format(tf.config.list_physical_devices()))
logger.info('Using numba      : {}'.format(numba.__version__))
logger.info('Using dask       : {}'.format(dask.__version__))
logger.info('Using emtf-nnet  : {}'.format(emtf_nnet.__version__))

[INFO    ] Using cmssw      : CMSSW_10_6_3
[INFO    ] Using python     : 3.6.10 |Anaconda, Inc.| (default, Mar 25 2020, 23:51:54) [GCC 7.3.0]
[INFO    ] Using numpy      : 1.19.5
[INFO    ] Using matplotlib : 3.3.2
[INFO    ] Using tensorflow : 2.4.1
[INFO    ] Using keras      : 2.4.0
[INFO    ] .. list devices  : [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]
[INFO    ] Using numba      : 0.51.2
[INFO    ] Using dask       : 2021.01.1
[INFO    ] Using emtf-nnet  : 0.0.1


In [4]:
# Settings

# zone: (0,1,2) -> eta=(1.98..2.5, 1.55..1.98, 1.2..1.55)
zone = 0
#zone = 1
#zone = 2

# timezone: (0,1,2) -> BX=(0,-1,-2)
timezone = 0

#maxevents = 10
maxevents = -1

#workers = 1
workers = 8

# Input files
patterns_fname = 'patterns_zone%i.npz' % zone
zone_images_fname = 'zone_images_zone%i.h5' % zone

# Scheduler
dask.config.set(scheduler='threads', num_workers=workers)

# Styling
plt.style.use('tdrstyle.mplstyle')
cm = get_colormap()

logger.info('Processing zone {} timezone {}'.format(zone, timezone))
logger.info('.. maxevents        : {}'.format(maxevents))
logger.info('.. workers          : {}'.format(workers))

[INFO    ] Processing zone 0 timezone 0
[INFO    ] .. maxevents        : -1
[INFO    ] .. workers          : 8


### Load data

In [5]:
import h5py
file_handles = []


class PatternHelper(object):
  """Reshapes patterns for use in NN."""

  def get_reshaped_patterns(self, patterns):
    patterns = patterns[3]  # prompt patterns only
    patterns = patterns[[3, 2, 4, 1, 5, 0, 6]]  # ordered by straightness
    return patterns  # shape is (7, 8, 3)

  def get_reshaped_patt_filters(self, patt_filters):
    patt_filters = patt_filters[3]  # prompt patterns only
    patt_filters = patt_filters[[3, 2, 4, 1, 5, 0, 6]]  # ordered by straightness
    patt_filters = np.transpose(patt_filters, [3, 2, 1, 0])  # kernel shape is HWCD
    return patt_filters  # shape is (1, 111, 8, 7)

  def get_reshaped_patt_brightness(self, patt_brightness):
    patt_brightness = patt_brightness // 4  # from 8-bit to 6-bit
    assert patt_brightness.max() == 63
    return patt_brightness  # shape is (256,)


def load_patterns():
  helper = PatternHelper()

  patterns = []
  patt_filters = []
  patt_brightness = []
  for z in range(num_emtf_zones):
    fname = patterns_fname.replace('zone%i' % zone, 'zone%i' % z)  # modify filename
    logger.info('Loading from {}'.format(fname))
    with np.load(fname) as loaded:
      patterns.append(helper.get_reshaped_patterns(loaded['patterns']))
      patt_filters.append(helper.get_reshaped_patt_filters(loaded['boxes_act']))
      patt_brightness.append(helper.get_reshaped_patt_brightness(loaded['hitmap_quality_ranks']))

  patterns = np.asarray(patterns)
  patt_filters = np.asarray(patt_filters)
  patt_brightness = np.asarray(patt_brightness)
  logger.info('patterns: {} patt_filters: {} patt_brightness: {}'.format(
      patterns.shape, patt_filters.shape, patt_brightness.shape))
  return (patterns, patt_filters, patt_brightness)


def load_zone_hits(fname):
  logger.info('Loading from {}'.format(fname))
  loaded = h5py.File(fname, 'r')
  file_handles.append(loaded)

  zone_part = da.from_array(loaded['zone_part'])
  zone_hits_values = da.from_array(loaded['zone_hits_values'])
  zone_hits_row_splits = da.from_array(loaded['zone_hits_row_splits'])
  zone_hits_shape = (zone_hits_row_splits.shape[0] - 1,) + (None,) + zone_hits_values.shape[1:]
  zone_simhits_values = da.from_array(loaded['zone_simhits_values'])
  zone_simhits_row_splits = da.from_array(loaded['zone_simhits_row_splits'])
  zone_simhits_shape = (zone_simhits_row_splits.shape[0] - 1,) + (None,) + zone_simhits_values.shape[1:]
  logger.info('zone_part: {} zone_hits: {} zone_simhits: {}'.format(
      zone_part.shape, zone_hits_shape, zone_simhits_shape))
  return (zone_part, (zone_hits_values, zone_hits_row_splits), (zone_simhits_values, zone_simhits_row_splits))

In [6]:
# Load patterns
patterns, patt_filters, patt_brightness = load_patterns()

pattern_bank = emtf_nnet.keras.utils.PatternBank(
    patterns=patterns, patt_filters=patt_filters, patt_brightness=patt_brightness)
emtf_nnet.keras.utils.save_pattern_bank(pattern_bank)  # write to file

# Load zone_hits
zone_part, zone_hits, zone_simhits = load_zone_hits(zone_images_fname)

zone_part, zone_hits = da.compute(zone_part, zone_hits)  # lazy no more

# Close files
for file_handle in file_handles:
  file_handle.close()

[INFO    ] Loading from patterns_zone0.npz
[INFO    ] Loading from patterns_zone1.npz
[INFO    ] Loading from patterns_zone2.npz
[INFO    ] patterns: (3, 7, 8, 3) patt_filters: (3, 1, 111, 8, 7) patt_brightness: (3, 256)
[INFO    ] Loading from zone_images_zone0.h5
[INFO    ] zone_part: (652591, 9) zone_hits: (652591, None, 18) zone_simhits: (652591, None, 18)


### Create inputs

In [7]:
# Configure
from emtf_nnet.architecture import endless_v3

tf.config.optimizer.set_jit(True)
#tf.config.threading.set_inter_op_parallelism_threads(32)
#tf.config.threading.set_intra_op_parallelism_threads(32)

loaded_pattern_bank = emtf_nnet.keras.utils.load_pattern_bank('pattern_bank.json')  #FIXME: currently hardcoded
endless_v3.set_pattern_bank(loaded_pattern_bank)

config = endless_v3.configure()
endless_v3.set_config(config)

In [8]:
# Create inputs
batch_size = 1024

datagen_sparse = endless_v3.get_datagen_sparse(zone_hits, batch_size=batch_size)
try:
  x_test_sparse = datagen_sparse[0]
except:
  raise ValueError('Fail to get data from datagen_sparse.')

datagen = endless_v3.get_datagen(zone_hits, batch_size=batch_size)
try:
  x_test = datagen[0]
except:
  raise ValueError('Fail to get data from datagen.')

assert isinstance(x_test_sparse, list) and len(x_test_sparse) == batch_size
assert isinstance(x_test, np.ndarray) and len(x_test) == batch_size and x_test.ndim == 4

In [9]:
# Debug
my_array2string = functools.partial(
    np.array2string, separator=', ', formatter={'int':lambda x: '% 4i' % x},
    max_line_width=100, threshold=1000)

print(my_array2string(x_test_sparse[0]))
print(my_array2string(x_test_sparse[2]))
print(my_array2string(x_test_sparse[5]))

[[   2,    0,  2548,    5,   18,   17,    6,    6,    0,    4,    4,    0,    0,    0,    1],
 [  19,    0,  2684,    2,   16,   16,    6,    6,    0,    4,    4,    1,    0,    0,    1],
 [  28,    0,  2819,   15,   17,   16,    5,    5,    0,    4,    4,    0,    0,    0,    1],
 [  28,    1,  2728,    0,   16,   17,    5,    5,    0,    4,    4,    0,    0,    0,    1],
 [  37,    0,  2736,    0,   16,   16,    5,    5,    0,    4,    4,    0,    0,    0,    1],
 [  55,    0,  2505,    0,   18,   18,    2,    2,    0,    4,    4,    1,    0,    0,    1],
 [  73,    0,  2675,    0,   19,   19,    2,    2,    0,    4,    4,    1,    0,    0,    1],
 [  82,    0,  2888,    0,   17,   17,    1,    1,    1,    4,    4,    0,    0,    0,    1],
 [  82,    1,  2714,    0,   17,   17,    5,    5,    1,    4,    4,    0,    0,    0,    1],
 [  91,    0,  2737,    0,   17,   17,    1,    1,    0,    4,    4,    0,    0,    0,    1],
 [ 109,    0,  2479,   15,   17,   17,    6,    6,    0,    

In [10]:
# Debug
isvalid = lambda x: (x[..., -1] != 0)  # x[..., -1] is the valid flag

print(x_test.shape)
print(my_array2string(x_test[0][isvalid(x_test[0])]))
print(my_array2string(x_test[2][isvalid(x_test[2])]))
print(my_array2string(x_test[5][isvalid(x_test[5])]))

(1024, 115, 2, 13)
[[ 2548,    5,   18,   17,    6,    6,    0,    4,    4,    0,    0,    0,    1],
 [ 2684,    2,   16,   16,    6,    6,    0,    4,    4,    1,    0,    0,    1],
 [ 2819,   15,   17,   16,    5,    5,    0,    4,    4,    0,    0,    0,    1],
 [ 2728,    0,   16,   17,    5,    5,    0,    4,    4,    0,    0,    0,    1],
 [ 2736,    0,   16,   16,    5,    5,    0,    4,    4,    0,    0,    0,    1],
 [ 2505,    0,   18,   18,    2,    2,    0,    4,    4,    1,    0,    0,    1],
 [ 2675,    0,   19,   19,    2,    2,    0,    4,    4,    1,    0,    0,    1],
 [ 2888,    0,   17,   17,    1,    1,    1,    4,    4,    0,    0,    0,    1],
 [ 2714,    0,   17,   17,    5,    5,    1,    4,    4,    0,    0,    0,    1],
 [ 2737,    0,   17,   17,    1,    1,    0,    4,    4,    0,    0,    0,    1],
 [ 2479,   15,   17,   17,    6,    6,    0,    4,    4,    1,    0,    0,    1]]
[[ 4643,    0,   12,   12,    4,    4,    0,    4,    4,    1,    0,    0,    1

### Create model

In [11]:
# Create model
model = endless_v3.create_model()
model.summary()

Model: "endless_v3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputs (InputLayer)             [(None, 115, 2, 13)] 0                                            
__________________________________________________________________________________________________
zoning_0 (Zoning)               (None, 8, 288, 1)    0           inputs[0][0]                     
__________________________________________________________________________________________________
zoning_1 (Zoning)               (None, 8, 288, 1)    0           inputs[0][0]                     
__________________________________________________________________________________________________
zoning_2 (Zoning)               (None, 8, 288, 1)    0           inputs[0][0]                     
_________________________________________________________________________________________

In [12]:
# Make predictions
outputs = model.predict(x_test, workers=workers, use_multiprocessing=False)  # now wait...
if isinstance(outputs, tuple):
  for i in range(len(outputs)):
    logger.info('outputs: {} dtype: {}'.format(outputs[i].shape, outputs[i].dtype))
else:
  logger.info('outputs: {} dtype: {}'.format(outputs.shape, outputs.dtype))



[INFO    ] outputs: (1024, 4, 40) dtype: int32
[INFO    ] outputs: (1024, 4, 12) dtype: int32
[INFO    ] outputs: (1024, 4, 1) dtype: bool


In [13]:
print(outputs)

(array([[[  -116, 999999,     20, ...,     16,     63,      0],
        [999999, 999999, 999999, ..., 999999, 999999, 999999],
        [999999, 999999, 999999, ..., 999999, 999999, 999999],
        [999999, 999999, 999999, ..., 999999, 999999, 999999]],

       [[  -251, 999999,     -3, ...,      7,     61,      0],
        [999999, 999999, 999999, ..., 999999, 999999, 999999],
        [999999, 999999, 999999, ..., 999999, 999999, 999999],
        [999999, 999999, 999999, ..., 999999, 999999, 999999]],

       [[   123, 999999,      8, ...,     11,     63,      0],
        [999999, 999999, 999999, ..., 999999, 999999, 999999],
        [999999, 999999, 999999, ..., 999999, 999999, 999999],
        [999999, 999999, 999999, ..., 999999, 999999, 999999]],

       ...,

       [[  -186, 999999, 999999, ...,     18,     62,      0],
        [999999, 999999, 999999, ..., 999999, 999999, 999999],
        [999999, 999999, 999999, ..., 999999, 999999, 999999],
        [999999, 999999, 999999, ..

In [14]:
print(outputs[0][0, 0])
print(outputs[1][0, 0])
print(outputs[0][2, 0])
print(outputs[1][2, 0])
print(outputs[0][5, 0])
print(outputs[1][5, 0])

[  -116 999999     20     64     72 999999 999999     50     73   -159
     11   -185      1 999999      0      0      0 999999 999999      1
      1      2      3      1      5 999999      2      0      0     15
      6 999999      6      5      5      6    -80     16     63      0]
[  4 230  30  53  74 230 230 125 146 168 182 190]
[   123 999999      8    -17    -35 999999 999999    -37    -37 999999
     27    141      1 999999      0      0      0 999999 999999     -1
      0 999999      1      2      0 999999      0      1     -2     -6
      4 999999      5      5      6      6   1776     11     63      0]
[ 10 230  32  54  76 230 230 126 148 230 184 198]
[   381 999999      8    -41 999999 999999 999999    -46     -9    417
 999999    461      3 999999      1      0 999999 999999 999999      1
      0      4 999999      4    -10 999999     -3      1 999999    -28
      5 999999      4      5 999999      6    240     14     63      0]
[  7 230  30  52 230 230 230 124 146 172 230 

### Evaluate model

In [15]:
%%time

outputs = model.predict(datagen, workers=workers, use_multiprocessing=False)  # now wait...
if isinstance(outputs, tuple):
  for i in range(len(outputs)):
    logger.info('outputs: {} dtype: {}'.format(outputs[i].shape, outputs[i].dtype))
else:
  logger.info('outputs: {} dtype: {}'.format(outputs.shape, outputs.dtype))

[INFO    ] outputs: (652591, 4, 40) dtype: int32
[INFO    ] outputs: (652591, 4, 12) dtype: int32
[INFO    ] outputs: (652591, 4, 1) dtype: bool


CPU times: user 35min 11s, sys: 5min 29s, total: 40min 41s
Wall time: 7min 1s
