In [49]:
import numpy as np
from keras.models import Model

from test_bench import get_checkpoint_name, load_particle_datasets, subsample_dataset
from test_bench.model import MonteCarloDropoutModel


In [None]:
IN_COLAB = True

try:
  import google.colab
  # Using Google Drive
  from google.colab import drive
  drive.mount('/content/drive')
  # your path to the data
  !ls '/content/drive/MyDrive/data/rich'
  
  !git clone https://gitlab.com/lambda-hse/lhcb-rich-gan-uncertainty.git
  !mv lhcb-rich-gan-uncertainty/experiments .
  !mv lhcb-rich-gan-uncertainty/src .
  !rm -r lhcb-rich-gan-uncertainty/
  !rm -r sample_data/
  !pip install tensorflow-addons
  
  # Dataset download and extraction
  !unzip -qq drive/MyDrive/cern/data/rich.zip
  
  # Model checkpoint download and extraction
  !unzip -qq drive/MyDrive/cern/data/checkpoints_dropout_0.01.zip  
  
except:
  IN_COLAB = False  
  
print(f'IN_COLAB: {IN_COLAB}')

# Test Bench for the Monte Carlo Dropout and Feature Density methods

1. Select sample data
2. Create a model
3. Generate a single target with single inference mode
4. Estimate MCD uncertainty
5. Estimate FD uncertainty

In [51]:
# Parameters
PARTICLE = 'pion'
CHECKPOINT_DP = 0.01
DROPOUT_TYPE = 'bernoulli_structured'
CHECKPOINT_BASE = 'checkpoints/'
DATA_DIR = 'rich/'
SUB_SAMPLE_PERCENT = 0.1

# MCD parameters
MCD_ENSEMBLE_SIZE = 300

#FD parameters
embeddings_dir = f'embeddings/'
if IN_COLAB:
    embeddings_dir = 'drive/MyDrive/Colab Notebooks/' + embeddings_dir


# Load data and Sample selection

In [None]:
dataset = load_particle_datasets(PARTICLE, DATA_DIR)

In [None]:
# Draw a sample of the datasets
x_sample, y_sample = subsample_dataset(dataset['feats_val'], dataset['targets_val'], SUB_SAMPLE_PERCENT)
x_sample.shape, y_sample.shape

# Model creation

In [None]:
model = MonteCarloDropoutModel(
    PARTICLE,
    dropout_rate=CHECKPOINT_DP,
    checkpoint_dir=CHECKPOINT_BASE + get_checkpoint_name(PARTICLE, CHECKPOINT_DP, DROPOUT_TYPE),
    debug=True
)
generator = model.get_generator()

## Single model prediction

In [None]:
generator.single_model_inference_mode()
t_generated = generator.predict(x_sample)
t_generated

## MCD

In [None]:
from mcd.MCDEvaluator import evaluate_model

mcd_uncertainty, _ =  evaluate_model(model, x_sample, MCD_ENSEMBLE_SIZE)
mcd_uncertainty


In [None]:
mcd_uncertainty.shape

## FD

### Generation of FD embeddings

In [45]:
EMBEDDING_LAYER = 14
# 1. Set the model in inference mode
generator.single_model_inference_mode()

# 2. Create a new model that exposes the layer(s) of interest
input_layer = generator.input
output_layer = generator.layers[EMBEDDING_LAYER].output  # Index is 14 because layer indexing starts from 0

# 3. Create a `new_model` without optimizations
embeddings_model = Model(input_layer, [output_layer, generator.output])
print('Embeddings model created')
print(embeddings_model.summary())
print('-'*50)

# !rm -r embeddings
# !mkdir embeddings


Embeddings model created
Model: "model_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Inputs (InputLayer)         [(None, 3)]               0         
                                                                 
 NoiseInjection (NoiseInject  (None, 67)               0         
 ion)                                                            
                                                                 
 Layer_0/Dense (Dense)       (None, 128)               8704      
                                                                 
 Layer_0/LeakyReLU (LeakyReL  (None, 128)              0         
 U)                                                              
                                                                 
 Layer_0/DropoutTrain (Dropo  (None, 128)              0         
 utTrain)                                                        
                                 

In [46]:
train_embeddings, train_predictions = embeddings_model.predict(dataset['feats_train'])



In [47]:
test_embeddings, test_predictions = embeddings_model.predict(dataset['feats_val'])



In [52]:
np.save(embeddings_dir + f'{PARTICLE}_train_embeddings.npy', train_embeddings)
np.save(embeddings_dir + f'{PARTICLE}_train_predictions.npy', train_predictions)
np.save(embeddings_dir + f'{PARTICLE}_test_embeddings.npy', test_embeddings)
np.save(embeddings_dir + f'{PARTICLE}_test_predictions.npy', test_predictions)

### FD Uncertainty estimation

In [53]:
# train_predictions = np.load(embeddings_dir + f'{PARTICLE}_train_predictions.npy')
# test_predictions = np.load(embeddings_dir +  f'{PARTICLE}_test_predictions.npy')

print('Training data shape:', train_predictions.shape)
print('Test data shape:', test_predictions.shape)

Training data shape: (948325, 5)
Test data shape: (527302, 5)
