In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np

from emtf_algos import *

In [2]:
import sys
import os
os.environ['KERAS_BACKEND'] = 'tensorflow'

# Set random seed
np.random.seed(2027)

from collections import OrderedDict

import tensorflow as tf
import keras
from keras import backend as K
import matplotlib as mpl
import matplotlib.pyplot as plt

# Set random seed
tf.set_random_seed(2027)

logger = get_logger()
logger.info('Using cmssw      : {0}'.format(os.environ['CMSSW_VERSION'] if 'CMSSW_VERSION' in os.environ else 'n/a'))
logger.info('Using python     : {0}'.format(sys.version.replace('\n', '')))
logger.info('Using numpy      : {0}'.format(np.__version__))
logger.info('Using tensorflow : {0}'.format(tf.__version__))
logger.info('Using keras      : {0}'.format(keras.__version__))
logger.info('.. list devices  : {0}'.format(K.get_session().list_devices()))
logger.info('Using matplotlib : {0}'.format(mpl.__version__))

%matplotlib inline

Using TensorFlow backend.
[INFO    ] Using cmssw      : CMSSW_10_6_3
[INFO    ] Using python     : 2.7.14+ (default, Apr 19 2019, 15:48:06) [GCC 7.4.1 20190129]
[INFO    ] Using numpy      : 1.16.2
[INFO    ] Using tensorflow : 1.6.0
[INFO    ] Using keras      : 2.2.4
[INFO    ] .. list devices  : [_DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 268435456)]
[INFO    ] Using matplotlib : 2.2.4


In [3]:
# Settings

zone = 0

#maxevents = 200000
maxevents = -1

# Input files
signal = 'signal_add.20200716.npz'
signal_displ = 'signal_displ_add.20200716.npz'
bkgnd = 'bkgnd_add.20200716.npz'

# Input data columns
part_metadata = ['part_invpt', 'part_eta', 'part_phi',
                 'part_vx', 'part_vy', 'part_vz',
                 'part_d0', 'part_sector', 'part_zone']
part_metadata = OrderedDict(zip(part_metadata, range(len(part_metadata))))
#print(part_metadata)

hits_metadata = ['emtf_layer', 'ri_layer', 'zones', 'timezones',
                 'emtf_chamber', 'emtf_segment', 'detlayer', 'bx',
                 'emtf_phi', 'emtf_bend', 'emtf_theta', 'emtf_theta_alt',
                 'emtf_qual', 'emtf_time', 'fr', 'rsvd']
hits_metadata = OrderedDict(zip(hits_metadata, range(len(hits_metadata))))
#print(hits_metadata)

# Image format
num_channels = 1
num_cols = 300
num_rows = 8
image_format = (num_rows, num_cols, num_channels)

# Parameter
num_params = 5

# q/pT and d0 binning
#invpt_bins = np.array([-0.5, -0.3, -0.1, 0.1, 0.3, 0.5])
#dzero_bins = np.array([-150., -90., -30., 30., 90., 150.])

logger.info('Processing zone {0}'.format(zone))
logger.info('.. maxevents    : {0}'.format(maxevents))
logger.info('.. image_format : {0}'.format(image_format))
logger.info('.. num_params   : {0}'.format(num_params))

[INFO    ] Processing zone 0
[INFO    ] .. maxevents    : -1
[INFO    ] .. image_format : (8, 300, 1)
[INFO    ] .. num_params   : 5


### Load data

In [4]:
def load_signal(f):
  logger.info('Loading from {0}'.format(f))
  with np.load(f) as loaded:
    out_part = loaded['out_part']
    out_hits = RaggedTensorValue(loaded['out_hits_values'], loaded['out_hits_row_splits'])
    out_simhits = RaggedTensorValue(loaded['out_simhits_values'], loaded['out_simhits_row_splits'])
    logger.info('out_part: {0} out_hits: {1} out_simhits: {2}'.format(out_part.shape, out_hits.shape, out_simhits.shape))
  return (out_part, out_hits, out_simhits)

def load_bkgnd(f):
  logger.info('Loading from {0}'.format(f))
  with np.load(f) as loaded:
    out_aux = loaded['out_aux']
    out_hits = RaggedTensorValue(loaded['out_hits_values'], loaded['out_hits_row_splits'])
    logger.info('out_aux: {0} out_hits: {1}'.format(out_aux.shape, out_hits.shape))
  return (out_aux, out_hits)

In [5]:
part_data, hits_data, simhits_data = load_signal(signal)

#bkg_aux_data, bkg_hits_data = load_bkgnd(bkgnd)

[INFO    ] Loading from signal_add.20200716.npz
[INFO    ] out_part: (2000000, 9) out_hits: (2000000, None, 16) out_simhits: (2000000, None, 16)


In [6]:
# Debug
with np.printoptions(linewidth=100, threshold=1000):
  for i in range(5):
    print('Event {0}'.format(i))
    print(part_data[i])
    print(hits_data[i])
    print(simhits_data[i])

Event 0
[-2.5611117e-01  2.1183386e+00 -6.1635029e-01  6.7432784e-04  9.9125458e-04 -4.6484246e+00
 -1.1986600e-03  5.0000000e+00  0.0000000e+00]
[[   0    0    4    3    2    0    0    0 2548    5   18   17   -6    0    0  -99]
 [   0    0    4    3    2    1    0    0 2548    5   17   18   -6    0    0  -99]
 [   2    3    4    3   19    0    0    0 2684    2   16   16    6    0    1  -99]
 [   3    5    4    3   28    0    0    0 2819   15   17   16   -5    0    0  -99]
 [   3    5    4    3   28    1    0    0 2728    0   16   17   -5    0    0  -99]
 [   4    7    4    3   37    0    0    0 2736    0   16   16   -5    0    0  -99]
 [   9    9    4    2   55    0    0    0 2505    0   18   18    0    0    1  -99]
 [  10   12    4    2   73    0    0    0 2675    0   19   19    0    0    1  -99]
 [   7   14    4    2   82    0    0    0 2888    0   17   17    0    1    0  -99]
 [   7   14    4    2   82    1    0    0 2714    0   17   17    0    1    0  -99]
 [   8   16    4    2   

In [7]:
# Debug
try:
  with np.printoptions(linewidth=100, threshold=1000):
    for i in range(10):
      print('Event {0}'.format(i))
      print(bkg_aux_data[i])
      print(bkg_hits_data[i])
except:
  pass

Event 0


### Zone selection

In [8]:
part_zone = part_data[:, part_metadata['part_zone']].astype(np.int32)

zone_mask = (part_zone == zone)

In [9]:
zone_part_data = part_data[zone_mask]
zone_hits_data = ragged_row_boolean_mask(hits_data, zone_mask)
zone_simhits_data = ragged_row_boolean_mask(simhits_data, zone_mask)

logger.info('zone_part_data: {0} zone_hits_data: {1} zone_simhits_data: {2}'.format(zone_part_data.shape, zone_hits_data.shape, zone_simhits_data.shape))

[INFO    ] zone_part_data: (802402, 9) zone_hits_data: (802402, None, 16) zone_simhits_data: (802402, None, 16)


In [10]:
# Debug
with np.printoptions(linewidth=100, threshold=1000):
  for i in range(5):
    print('Event {0}'.format(i))
    print(zone_part_data[i])
    print(zone_hits_data[i])
    print(zone_simhits_data[i])

Event 0
[-2.5611117e-01  2.1183386e+00 -6.1635029e-01  6.7432784e-04  9.9125458e-04 -4.6484246e+00
 -1.1986600e-03  5.0000000e+00  0.0000000e+00]
[[   0    0    4    3    2    0    0    0 2548    5   18   17   -6    0    0  -99]
 [   0    0    4    3    2    1    0    0 2548    5   17   18   -6    0    0  -99]
 [   2    3    4    3   19    0    0    0 2684    2   16   16    6    0    1  -99]
 [   3    5    4    3   28    0    0    0 2819   15   17   16   -5    0    0  -99]
 [   3    5    4    3   28    1    0    0 2728    0   16   17   -5    0    0  -99]
 [   4    7    4    3   37    0    0    0 2736    0   16   16   -5    0    0  -99]
 [   9    9    4    2   55    0    0    0 2505    0   18   18    0    0    1  -99]
 [  10   12    4    2   73    0    0    0 2675    0   19   19    0    0    1  -99]
 [   7   14    4    2   82    0    0    0 2888    0   17   17    0    1    0  -99]
 [   7   14    4    2   82    1    0    0 2714    0   17   17    0    1    0  -99]
 [   8   16    4    2   

### Build images

In [11]:
from numba import njit

hits_col_ri_layer = hits_metadata['ri_layer']
hits_col_zones = hits_metadata['zones']
hits_col_timezones = hits_metadata['timezones']
hits_col_emtf_phi = hits_metadata['emtf_phi']

zo_layer_lut = np.array([find_emtf_zo_layer(ri_layer, zone) for ri_layer in range(19)], dtype=np.int32)

@njit
def build_images_fn():
  pass

def build_images(data):
  images = np.zeros((zone_part_data.shape[0],) + image_format, dtype=np.bool)
  
  for i in range(images.shape[0]):
    pass

  return images

images = build_images(zone_simhits_data)


