In [1]:
import csv
import keras

import keras.backend as K
import math
import matplotlib.pyplot as plt
import numpy as np


import tensorflow as tf

import time
import pandas as pd
import seaborn as sns

from keras import optimizers, metrics
from keras.layers import Dense, LayerNormalization, BatchNormalization, Dropout, GaussianNoise
from keras.models import load_model
from tools.constants import column_labels_mass_reco
from tools.constants import column_labels_particle_assignment as column_names

In [2]:
""" Loss function. """
# source: https://stackoverflow.com/questions/46009619/keras-weighted-binary-crossentropy
def create_weighted_binary_crossentropy(ones_weights,zeros_weights):

    def weighted_binary_crossentropy(y_true, y_pred):

        b_ce = K.binary_crossentropy(y_true, y_pred)
        weight_vector = y_true * ones_weights + (1. - y_true) * zeros_weights
        weighted_b_ce = weight_vector * b_ce

        return K.mean(weighted_b_ce)

    return weighted_binary_crossentropy

In [3]:
""" Metrics """
# source: https://neptune.ai/blog/implementing-the-macro-f1-score-in-keras
def custom_f1(y_true, y_pred):    
    def recall_m(y_true, y_pred):
        TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        Positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        
        recall = TP / (Positives+K.epsilon())    
        return recall 
    
    
    def precision_m(y_true, y_pred):
        TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        Pred_Positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    
        precision = TP / (Pred_Positives+K.epsilon())
        return precision 
    
    precision, recall = precision_m(y_true, y_pred), recall_m(y_true, y_pred)
    
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

# source: https://stackoverflow.com/questions/39895742/matthews-correlation-coefficient-with-keras
def matthews_correlation(y_true, y_pred):
    y_pred_pos = K.round(K.clip(y_pred, 0, 1))
    y_pred_neg = 1 - y_pred_pos

    y_pos = K.round(K.clip(y_true, 0, 1))
    y_neg = 1 - y_pos

    tp = K.sum(y_pos * y_pred_pos)
    tn = K.sum(y_neg * y_pred_neg)

    fp = K.sum(y_neg * y_pred_pos)
    fn = K.sum(y_pos * y_pred_neg)

    numerator = (tp * tn - fp * fn)
    denominator = K.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))

    return numerator / (denominator + K.epsilon())

METRICS = [
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
      keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
      matthews_correlation,
      custom_f1,
]

2022-01-04 13:40:41.605733: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-01-04 13:40:41.606458: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-01-04 13:40:41.610432: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [4]:
# Load the trained model.
model = load_model("models/particle_assignment_model_real", custom_objects={'custom_f1':custom_f1, 'matthews_correlation':matthews_correlation, 'weighted_binary_crossentropy':create_weighted_binary_crossentropy})

In [9]:
""" Same as for narrow selection, but this time the wide selection ttW and tt is used. """

data = []

for file_name in ["mc15","mc16","mc17","mc18"]:
    with open("data/{}.csv".format(file_name)) as f:
        X = np.loadtxt(f, delimiter=",")
    test_ones_diff = 0 
    test_samples_count = 0

    all_best_ys = []
    all_best_possible_ys = []

    y_pred_best = []
    y_true_best = []


    with open("scaler_params/reduced_variables_scaler.csv") as f:
        scaler_params = np.loadtxt(f, delimiter=",")
        scaler = scaler_params

    X_test = X[:,1:-5]
    X_test = (X_test-scaler[0])/scaler[1]

    y_test = X[:,0:5]

    X_to_file = np.concatenate((X[:,:-5],X[:,-4:]), axis = 1) 

    preds = model.predict(X_test)

    X_y_preds = np.concatenate((X_to_file, y_test, preds),axis=1)

    ids = np.unique(X_to_file[:,0])

    X_to_file = X_to_file[:,1:]
    print(len(ids))
    for id in ids:
        X_y_preds_all_combinations = np.array([row[1:] for row in X_y_preds if row[0] == id])
        X_all_combinations = X_y_preds_all_combinations[:,:X_to_file.shape[1]]
        preds_all_combinations = X_y_preds_all_combinations[:,X_to_file.shape[1]+y_test.shape[1]:]

        product = np.product(preds_all_combinations, axis=1)

        best_pred = preds_all_combinations[np.argmax(product)]
        best_X = X_all_combinations[np.argmax(product)]

        data += [best_X.tolist() + best_pred.tolist()]

f = open("data/mass_reco/mass_reco_input_real.csv", "w")
writer = csv.writer(f)
writer.writerow(column_labels_mass_reco)
writer.writerows(data)
f.close()

3
25
30
43


In [10]:
print(preds)

[[4.52838659e-01 4.87233967e-01 4.47138816e-01 1.26180053e-03
  1.29216909e-03]
 [4.75252390e-01 4.92118597e-01 4.74965006e-01 9.10000920e-01
  9.02994514e-01]
 [4.86979544e-01 4.88328308e-01 4.80403930e-01 1.15406334e-01
  1.25073999e-01]
 [4.83831912e-01 4.99599189e-01 4.59491134e-01 9.08676624e-01
  9.20357108e-01]
 [4.75016594e-01 4.70331937e-01 4.54976708e-01 3.49087000e-01
  3.72567534e-01]
 [4.52528358e-01 4.47334558e-01 4.18643653e-01 5.89453280e-01
  5.88518560e-01]
 [4.89044756e-01 4.89638209e-01 5.07106423e-01 3.34259599e-01
  3.28761220e-01]
 [5.01587212e-01 4.99398798e-01 5.18763304e-01 3.71883392e-01
  3.67517591e-01]
 [4.97052342e-01 4.97303665e-01 4.86732513e-01 5.65248847e-01
  5.68861783e-01]
 [5.11283040e-01 5.07234752e-01 5.05150020e-01 5.23464501e-01
  5.23365557e-01]
 [4.49685365e-01 4.63443875e-01 4.97273952e-01 2.48768926e-03
  2.78952718e-03]
 [4.81352091e-01 4.82251346e-01 4.38651383e-01 9.81048346e-01
  9.83474612e-01]
 [4.95548159e-01 4.98418450e-01 4.550641