In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import tensorflow as tf


DLL_COLUMNS = ['RichDLLe', 'RichDLLk', 'RichDLLmu', 'RichDLLp', 'RichDLLbt']
PARTICLE = 'pion'

!unzip -qq '/content/drive/MyDrive/cern/data/results/30x30/dp_0.01/2024-oct-04/pion_sample_30x30.zip'
y_sample = np.load('/content/results/pion_y_real.npy')
x_sample = np.load('/content/results/pion_x_real.npy')
t_generated = np.load('/content/results/t_generated.npy')

In [None]:
# output_dir = '/content/drive/MyDrive/cern/data/results/30x30/article_results/'

# x_sample = np.load(output_dir + f'{PARTICLE}_x_real.npy')
# x_sample_orig = np.load(output_dir + f'{PARTICLE}_x_real_orig.npy')
# y_sample = np.load(output_dir + f'{PARTICLE}_y_real.npy')
# y_sample_orig = np.load(output_dir + f'{PARTICLE}_y_real_orig.npy')
# t_generated = np.load(output_dir + f'{PARTICLE}_t_generated.npy')

# mcd_all_uncertainties = np.load(output_dir + f'{PARTICLE}_mcd_uncertainty_dp_0.1.npy')
# fd_uncertainty_normalized = np.load(output_dir + f'{PARTICLE}_fd_uncertainty_layer_8.npy')

In [None]:
from scipy.spatial.distance import jensenshannon
from scipy.stats import linregress, kstest


def estimate_distances(y_real, y_generated, uncertainty_scores, uncertainty_type = None, bin_type = 'linear',
                                                 particle_index = 0, metric='JS', n_rows = 2, n_cols = 5, dll_columns=DLL_COLUMNS):
  n_bins = n_rows * n_cols

  targets = np.array(y_real[:, particle_index])
  predictions = np.array(y_generated[:, particle_index])
  uncertainty_scores = np.array(uncertainty_scores)

  if uncertainty_type == 'MCD':
    uncertainty_scores = uncertainty_scores[:, particle_index]

  if bin_type == 'linear':
    bin_edges = np.linspace(uncertainty_scores.min(), uncertainty_scores.max(), n_bins + 1)
  else: # Quantiles
    bin_edges = np.quantile(uncertainty_scores, np.linspace(0, 1, n_bins + 1))

  # Digitize returns sample indices per bin
  bin_indices = np.digitize(uncertainty_scores, bin_edges)



  distances = []

  for i in range(10):
    indices = bin_indices == i + 1

    mins = targets[indices].min(), predictions[indices].min()
    maxs = targets[indices].max(), predictions[indices].max()

    hist_range = min(mins), max(maxs)

    targets_hist = np.histogram(targets[indices], 25, hist_range, True)[0]
    predictions_hist = np.histogram(
        predictions[indices], 25, hist_range, True)[0]


    if metric == 'JS':
      dist = jensenshannon(predictions_hist, targets_hist)
    else:
      dist = kstest(predictions[indices], targets[indices]).statistic


    distances += [dist]


  #print(f"{metric} Distances:\n" + ", ".join([str(dist) for dist in distances]))

  return bin_edges, distances


def estimate_correlation(all_bin_ranges, all_distances, dll_columns=DLL_COLUMNS):

  correlation_coefficient = []
  for i in range(5):
    bin_ranges = np.mean([all_bin_ranges[i][1:], all_bin_ranges[i][:-1]], 0)
    regress = linregress(bin_ranges, all_distances[i])
    correlation_coefficient += [regress.rvalue]
    #print(f'Correlation coefficient for {dll_columns[i]}:', regress.rvalue)

  return correlation_coefficient

In [None]:
import pandas as pd

def calculate_stats(all_correlations, columns):
    df = pd.DataFrame(all_correlations, columns=columns)

    means = df.mean(axis=0)
    stds = df.std(axis=0)
    df.loc['Mean'] = means
    df.loc['Std'] = stds

    print(df)

In [None]:
def calculate_correlations(metric, uncertainty_type, uncertainty_data, N = 30):
    all_correlations = []

    for j in range(N):
        all_bin_edges, all_distances = [], []
        for i in range(5):
            bin_edges, distances = estimate_distances(
                y_sample, t_generated, uncertainty_data[j],
                uncertainty_type=uncertainty_type, bin_type='quantiles',
                particle_index=i, metric=metric
            )

            all_bin_edges += [bin_edges]
            all_distances += [distances]

        all_correlations.append(estimate_correlation(all_bin_edges, all_distances))

    return all_correlations


# FD

## Load data

### LAYER 1

In [None]:
dir = '/content/drive/MyDrive/cern/data/results/30x30/dp_0.01/2024-nov-04/layer_2/'
fd_uncertainty_normalized = np.load(dir + f'{PARTICLE}_fd_uncertainty.npy')

### LAYER 3

In [None]:
dir = '/content/drive/MyDrive/cern/data/results/30x30/dp_0.01/2024-nov-04/layer_8/'
fd_uncertainty_normalized = np.load(dir + f'{PARTICLE}_fd_uncertainty.npy')

### LAYER 5

In [None]:
dir = '/content/drive/MyDrive/cern/data/results/30x30/dp_0.01/2024-nov-04/layer_14/'
fd_uncertainty_normalized = np.load(dir + f'{PARTICLE}_fd_uncertainty.npy')

## Features Densities with JS

In [None]:
all_correlations = calculate_correlations('JS', 'FD', fd_uncertainty_normalized)
calculate_stats(all_correlations, DLL_COLUMNS)

      RichDLLe  RichDLLk  RichDLLmu  RichDLLp  RichDLLbt
0     0.988396  0.966197   0.972063  0.920595   0.768379
1     0.989796  0.967776   0.968580  0.927795   0.808414
2     0.990678  0.974766   0.969802  0.906766   0.821514
3     0.988707  0.970598   0.971211  0.909221   0.851280
4     0.991147  0.974445   0.980492  0.932459   0.927631
5     0.990946  0.976468   0.978448  0.934330   0.804656
6     0.989244  0.968711   0.976016  0.917928   0.804315
7     0.991505  0.971764   0.977016  0.925046   0.780437
8     0.988052  0.975684   0.981953  0.928195   0.791981
9     0.987991  0.973822   0.972081  0.932017   0.813672
10    0.986004  0.969187   0.972439  0.940282   0.833704
11    0.988156  0.971939   0.971341  0.907400   0.762467
12    0.987106  0.975337   0.975296  0.908177   0.807271
13    0.987739  0.975786   0.968455  0.918111   0.884675
14    0.988333  0.963906   0.967193  0.937142   0.844586
15    0.991983  0.975955   0.978617  0.915907   0.880294
16    0.988886  0.969793   0.97

## Features Densities with KS

In [None]:
all_correlations = calculate_correlations('KS', 'FD', fd_uncertainty_normalized)
calculate_stats(all_correlations, DLL_COLUMNS)

      RichDLLe  RichDLLk  RichDLLmu  RichDLLp  RichDLLbt
0     0.937474  0.971874   0.899581  0.945033   0.795220
1     0.932370  0.972926   0.892362  0.946502   0.770138
2     0.941086  0.976361   0.892466  0.955092   0.845413
3     0.935351  0.975132   0.898662  0.948513   0.804707
4     0.931539  0.973553   0.901787  0.954589   0.825879
5     0.933629  0.976351   0.899757  0.945310   0.745675
6     0.950056  0.974153   0.912154  0.946999   0.795216
7     0.929069  0.975687   0.891324  0.949441   0.786553
8     0.936226  0.975586   0.903162  0.950674   0.800482
9     0.940208  0.975497   0.908687  0.952567   0.785891
10    0.944780  0.970470   0.906288  0.945871   0.750775
11    0.938372  0.973499   0.904857  0.949620   0.840753
12    0.941949  0.973706   0.906070  0.948416   0.849281
13    0.939079  0.973379   0.898531  0.946652   0.792047
14    0.940305  0.973977   0.906638  0.947282   0.804320
15    0.936220  0.975127   0.906935  0.951764   0.802737
16    0.941956  0.973905   0.91

# MCD

## Load data

### DROPOUT 0.05

In [None]:
dir = '/content/drive/MyDrive/cern/data/results/30x30/dp_0.05/2024-oct-20/'

mcd_all_uncertainties  = np.load(dir + f'{PARTICLE}_mcd_uncertainty.npy')

### DROPOUT 0.01

In [None]:
!unzip -qq '/content/drive/MyDrive/cern/data/results/30x30/dp_0.01/2024-oct-04/pion_uncertainty_30x30_reps.zip'
mcd_all_uncertainties  = np.load('/content/' + f'{PARTICLE}_mcd_uncertainty_30_reps.npy')

replace pion_mcd_uncertainty_30_reps.npy? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


### DROPOUT 0.1

In [None]:
dir = '/content/drive/MyDrive/cern/data/results/30x30/dp_0.1/2024-oct-18/'

mcd_all_uncertainties  = np.load(dir + f'{PARTICLE}_mcd_uncertainty.npy')

## MCD with JS

In [None]:
all_correlations = calculate_correlations('JS', 'MCD', mcd_all_uncertainties)
calculate_stats(all_correlations, DLL_COLUMNS)

      RichDLLe  RichDLLk  RichDLLmu  RichDLLp  RichDLLbt
0     0.931712  0.892428   0.927162  0.981013   0.752428
1     0.932801  0.913138   0.942275  0.985214   0.747845
2     0.915555  0.909380   0.965178  0.971393   0.769795
3     0.923141  0.897347   0.977387  0.991940   0.742450
4     0.922550  0.913731   0.960804  0.968792   0.809786
5     0.908788  0.906154   0.946164  0.971405   0.776485
6     0.926276  0.917019   0.972417  0.986013   0.809021
7     0.930464  0.901918   0.980704  0.984468   0.736220
8     0.926601  0.899143   0.927712  0.976093   0.775351
9     0.932433  0.907352   0.971200  0.986352   0.768836
10    0.930012  0.897800   0.954017  0.978606   0.804160
11    0.933869  0.906648   0.933330  0.987977   0.795520
12    0.948315  0.905112   0.848564  0.987393   0.743769
13    0.952713  0.906916   0.927898  0.976741   0.862072
14    0.943223  0.911184   0.976087  0.992929   0.723222
15    0.932740  0.910928   0.898221  0.984948   0.801496
16    0.949656  0.903927   0.97

## MCD with KS

In [None]:
all_correlations = calculate_correlations('KS', 'MCD', mcd_all_uncertainties)
calculate_stats(all_correlations, DLL_COLUMNS)

      RichDLLe  RichDLLk  RichDLLmu  RichDLLp  RichDLLbt
0     0.900017  0.894222   0.897238  0.967978   0.616516
1     0.897940  0.917895   0.990227  0.978043   0.658763
2     0.878441  0.907950   0.983142  0.956923   0.631530
3     0.884357  0.901341   0.950946  0.979883   0.606102
4     0.895306  0.915214   0.995029  0.948828   0.681325
5     0.843513  0.907032   0.988540  0.956222   0.641202
6     0.878221  0.922935   0.970313  0.976142   0.672499
7     0.870327  0.908059   0.985194  0.972894   0.638253
8     0.829179  0.903771   0.969962  0.963994   0.664239
9     0.842508  0.907928   0.994250  0.973793   0.667476
10    0.893678  0.898061   0.901402  0.960095   0.657557
11    0.842354  0.914866   0.900312  0.978566   0.640179
12    0.938817  0.911381   0.888018  0.973989   0.605006
13    0.922875  0.908213   0.990911  0.959045   0.767105
14    0.862864  0.915870   0.967733  0.983107   0.597293
15    0.890918  0.913653   0.942021  0.970654   0.667908
16    0.929453  0.906547   0.94