In [1]:
import numpy as np
import os
import pandas as pd

In [2]:
from network_models import train_neural_network, inference_step

Using TensorFlow backend.


In [3]:
from network_models import TZnet_regression_cosz

In [4]:
from generators import data_generator, metadata_generator, get_n_iterations, get_class_weights

In [5]:
train_test_dir = os.path.join("cosz")
fnames_train =[os.path.join(train_test_dir, "Xy_train{}_sel5_doms.npz".format(i+1)) for i in range(100)]
fnames_test =[os.path.join(train_test_dir, "Xy_test{}_sel5_doms.npz".format(i+1)) for i in range(100)]
fnames_val =[os.path.join(train_test_dir, "Xy_val{}_sel5_doms.npz".format(i+1)) for i in range(100)]
index_filelist = [os.path.join(train_test_dir, "Xy_indx{}_sel5_doms.npz".format(i+1)) for i in range(100)]
dir_xy = "/data/km3net/Xy_multi_data_files"
xy_filelist = [(os.path.join(dir_xy, "Xy_numu_{}_multi_data.npz".format(i+1)), 
                os.path.join(dir_xy, "Xy_nue_{}_multi_data.npz".format(i+1))) for i in range(100)]
metadata_keylist = ["E", "dirx", "diry", "dirz", "posx","posy","posz", "dist"] 

## Training Step

In [6]:
n_files=100
batch_size = 64
steps_per_epoch, n_events = get_n_iterations(fnames_train[:n_files], batch_size=batch_size)
print(steps_per_epoch, n_events)
validation_steps, n_evts_val = get_n_iterations(fnames_val[:n_files], batch_size=batch_size)
print(validation_steps, n_evts_val)
prediction_steps, n_evts_test = get_n_iterations(fnames_test[:n_files], batch_size=batch_size)
print(prediction_steps, n_evts_test)

(2588, 165610)
(648, 41451)
(810, 51818)


In [12]:
from keras.utils import to_categorical
from keras import backend as K

def process_cosz(y):
    y[y>0]=1
    y[y<=0]=0
    return to_categorical(y)

def get_TZ_only(X):
    TZ = np.sum(X, axis=(2, 3))
    if K.image_data_format() == "channels_first":
        TZ = TZ[:, np.newaxis, ...]
    else:
        TZ = TZ[..., np.newaxis]
    return TZ

In [8]:
training_generator = data_generator(fnames_train[:n_files], batch_size=batch_size, 
                                    fdata=get_TZ_only, ftarget=lambda y: y)

In [9]:
validation_generator = data_generator(fnames_val[:n_files], batch_size=batch_size,
                                     fdata=get_TZ_only, ftarget=lambda y: y)

In [None]:
model = TZnet_regression_cosz()

In [20]:
train_neural_network(model, training_generator, steps_per_epoch, validation_generator, validation_steps,
                     batch_size=batch_size, log_suffix="regression_cosz")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100


<keras.callbacks.History at 0x7f5fb97f2b10>

## Inference Step

In [10]:
model = TZnet_regression_cosz()
model.load_weights('./model/tz_net_regression_64_100_regression_cosz.hdf5')  # TZnet_regression_cosz()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
tz_input (InputLayer)        (None, 75, 18, 1)         0         
_________________________________________________________________
tz_block1_conv1 (Conv2D)     (None, 75, 18, 32)        4640      
_________________________________________________________________
tz_block1_conv2 (Conv2D)     (None, 75, 18, 32)        147488    
_________________________________________________________________
tz_block1_pool (AveragePooli (None, 38, 9, 32)         0         
_________________________________________________________________
tz_block2_conv1 (Conv2D)     (None, 38, 9, 64)         294976    
_________________________________________________________________
tz_block2_conv2 (Conv2D)     (None, 38, 9, 64)         589888    
_________________________________________________________________
tz_block2_pool (AveragePooli (None, 19, 5, 64)         0         
__________

In [26]:
from tqdm import tqdm

In [27]:
y_true = list()
y_pred = list()
metadata = None
predict_steps, n_test_events = get_n_iterations(fnames_test[:n_files], batch_size=64)
print(predict_steps, n_test_events)

metadata_gen  = metadata_generator(index_filelist, xy_filelist, metadata_keylist)
data_gen = data_generator(fnames_test[:n_files], batch_size=batch_size, 
                          fdata=get_TZ_only, ftarget=lambda y: y)

for i in tqdm(range(predict_steps)):
    ZT_batch, y_batch_true = next(data_gen)
    metadata_batch = next(metadata_gen)
    if metadata is None:
        metadata = metadata_batch
    else:
        metadata = pd.concat((metadata, metadata_batch))
    y_batch_pred = model.predict_on_batch(ZT_batch)
    y_batch_pred = y_batch_pred.ravel()
    y_true.append(y_batch_true)
    y_pred.append(y_batch_pred)
    
y_true = np.hstack(np.asarray(y_true))
y_pred = np.hstack(np.asarray(y_pred))


  0%|          | 0/810 [00:00<?, ?it/s]

(810, 51818)


100%|██████████| 810/810 [02:26<00:00,  5.53it/s]


In [28]:
from sklearn.metrics import mean_squared_error, r2_score

mean_squared_error(y_true, y_pred)

0.3267446726003822

In [29]:
r2_score(y_true, y_pred)

-0.020188022699421548

In [31]:
index_pos = np.argwhere(y_true>0).flatten()
y_true_pos = y_true[index_pos]

In [34]:
y_pred_pos = y_pred[index_pos]

In [37]:
mean_squared_error(y_true_pos, y_pred_pos)

0.21205295561802565

In [38]:
index_neg = np.argwhere(y_true<=0).flatten()
y_true_neg = y_true[index_neg]

y_pred_neg = y_pred[index_neg]

mean_squared_error(y_true_neg, y_pred_neg)

0.46285880430943993