In [1]:
import numpy as np

from test_bench import get_checkpoint_name, load_particle_datasets, subsample_dataset
from test_bench.model import MonteCarloDropoutModel


In [3]:
IN_COLAB = True

try:
  import google.colab
  # Using Google Drive
  from google.colab import drive
  drive.mount('/content/drive')
  # your path to the data
  !ls '/content/drive/MyDrive/data/rich'
  
  !git clone https://gitlab.com/lambda-hse/lhcb-rich-gan-uncertainty.git
  !mv lhcb-rich-gan-uncertainty/experiments .
  !mv lhcb-rich-gan-uncertainty/src .
  !rm -r lhcb-rich-gan-uncertainty/
  !rm -r sample_data/
  !pip install tensorflow-addons
  
  # Dataset download and extraction
  !unzip -qq drive/MyDrive/cern/data/rich.zip
  
  # Model checkpoint download and extraction
  !unzip -qq drive/MyDrive/cern/data/checkpoints_dropout_0.01.zip  
  
except:
  IN_COLAB = False  
  
print(f'IN_COLAB: {IN_COLAB}')

IN_COLAB: False


# Test Bench for the Monte Carlo Dropout and Feature Density methods

1. Select sample data
2. Create a model
3. Generate a single target with single inference mode
4. Estimate MCD uncertainty
5. Estimate FD uncertainty

In [4]:
# Parameters
PARTICLE = 'pion'
CHECKPOINT_DP = 0.01
DROPOUT_TYPE = 'bernoulli_structured'
CHECKPOINT_BASE = 'checkpoints/'
DATA_DIR = 'rich/'
SUB_SAMPLE_PERCENT = 0.1

# MCD parameters
MCD_ENSEMBLE_SIZE = 300

#FD parameters
embeddings_dir = f'embeddings/'
if IN_COLAB:
    embeddings_dir = 'drive/MyDrive/Colab Notebooks/' + embeddings_dir


# Load data and Sample selection

In [5]:
dataset = load_particle_datasets(PARTICLE, DATA_DIR)

Reading and concatenating datasets:
	rich\pion2_+_down_2016_.csv
	rich\pion2_+_up_2016_.csv
	rich\pion2_-_down_2016_.csv
	rich\pion2_-_up_2016_.csv
	rich\pion_+_down_2016_.csv
	rich\pion_+_up_2016_.csv
	rich\pion_-_down_2016_.csv
	rich\pion_-_up_2016_.csv
splitting to train/val/test
fitting the scaler
scaler train sample size: 2000000
scaler n_quantiles: 100000, time = 1.3087596893310547
scaling train set
scaling test set
converting dtype to <class 'numpy.float32'>
feats_train shape	(948325, 3)
targets_train shape	(948325, 5)
feats_val shape  	(527302, 3)
targets_val shape	(527302, 5)



In [6]:
# Draw a sample of the datasets
x_sample, y_sample = subsample_dataset(dataset['feats_val'], dataset['targets_val'], SUB_SAMPLE_PERCENT)
x_sample.shape, y_sample.shape

(TensorShape([52730, 3]), TensorShape([52730, 5]))

# Model creation

In [7]:
model = MonteCarloDropoutModel(
    PARTICLE,
    dropout_rate=CHECKPOINT_DP,
    checkpoint_dir=CHECKPOINT_BASE + get_checkpoint_name(PARTICLE, CHECKPOINT_DP, DROPOUT_TYPE),
    debug=True
)
generator = model.get_generator()

Generating model for pion with a dropout rate of 0.01
Layer 0
Layer 1
Layer 2
Layer 3
Layer 4

Generator:

Model: "virtual_ensemble_model"
________________________________________________________________________________________________
 Layer (type)                              Output Shape                          Param #        
 Inputs (InputLayer)                       [(None, 3)]                           0              
                                                                                                
 NoiseInjection (NoiseInjection)           (None, 67)                            0              
                                                                                                
 Layer_0/Dense (Dense)                     (None, 128)                           8704           
                                                                                                
 Layer_0/LeakyReLU (LeakyReLU)             (None, 128)                           0   

## Single model prediction

In [8]:
generator.single_model_inference_mode()
t_generated = generator.predict(x_sample)
t_generated



array([[-2.2360675 , -1.1847739 , -1.5161496 , -1.1183429 , -1.2257295 ],
       [-0.04207586, -1.2648206 , -0.12901922, -0.61363685, -0.67083454],
       [-0.9352725 , -2.4061308 , -0.4834558 , -2.185705  , -2.367094  ],
       ...,
       [-0.21783379, -1.2257509 , -0.4460397 , -0.93520015, -1.0148213 ],
       [ 1.431718  , -0.8172986 ,  1.2122009 , -1.0378337 , -0.83755225],
       [-2.1402342 , -0.8638425 , -2.4000866 , -0.78204876, -0.84564716]],
      dtype=float32)

## Monte Carlo Dropout method

In [9]:
from mcd.MCDEvaluator import evaluate_model

mcd_uncertainty, _ =  evaluate_model(model, x_sample, MCD_ENSEMBLE_SIZE)
mcd_uncertainty


Generating ensemble(300) predictions


100%|██████████| 300/300 [00:04<00:00, 61.74it/s]


<tf.Tensor: shape=(52730, 5), dtype=float32, numpy=
array([[0.40873483, 0.3998819 , 0.61187154, 0.4298904 , 0.48295096],
       [0.37222233, 0.47420976, 0.51377887, 0.57635206, 0.69689095],
       [0.48894337, 0.44581538, 0.5583058 , 0.55381644, 0.7149202 ],
       ...,
       [0.33421418, 0.5330626 , 0.57754517, 0.5674262 , 0.6533295 ],
       [0.19450414, 0.3283254 , 0.14817482, 0.4794714 , 0.8294769 ],
       [0.1972451 , 0.278714  , 0.27302375, 0.2856447 , 0.29262617]],
      dtype=float32)>

In [10]:
mcd_uncertainty.shape

TensorShape([52730, 5])

## Feature Densities method

### Uncertainty estimation

In [None]:
from feature_densities.feature_density_evaluator import evaluate_model as fd_evaluate_model

train_embeddings = np.load(embeddings_dir + f'{PARTICLE}_train_embeddings.npy')
print(train_embeddings.shape)

fd_uncertainty, _ = fd_evaluate_model(model, x_sample, known_embeddings=train_embeddings, likelihood_method='integration')

print('Feature Densities using INTEGRATION uncertainty score for x_sample:')
fd_uncertainty

(948325, 128)
Generating an embeddings model
Fitting KDE functions to known embeddings
Calculating sample´s embeddings
Estimating sample´s feature densities
Calculating likelihoods with integration method


  0%|          | 0/128 [00:00<?, ?it/s]

In [None]:
fd_uncertainty, _ = fd_evaluate_model(model, x_sample, known_embeddings=train_embeddings, likelihood_method='normalized')

print('Feature Densities using NORMALIZED uncertainty score for x_sample:')
fd_uncertainty

### Generation of FD embeddings

In [None]:
# from feature_densities.feature_density_evaluator import create_embeddings_model
# embeddings_model = create_embeddings_model(model)

In [None]:
# train_embeddings, train_predictions = embeddings_model.predict(dataset['feats_train'])

In [None]:
# test_embeddings, test_predictions = embeddings_model.predict(dataset['feats_val'])

In [None]:
# !rm -r embeddings
# !mkdir embeddings

# np.save(embeddings_dir + f'{PARTICLE}_train_embeddings.npy', train_embeddings)
# np.save(embeddings_dir + f'{PARTICLE}_train_predictions.npy', train_predictions)
# np.save(embeddings_dir + f'{PARTICLE}_test_embeddings.npy', test_embeddings)
# np.save(embeddings_dir + f'{PARTICLE}_test_predictions.npy', test_predictions)