# Calculate validation points for testing other models

We essentially do this by generating 100 LHS points with a different fixed seed.

In [1]:
# Import computational environment

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize

#Our expensive physics simulation
import run_simple_energy_loss

import subprocess
import pandas as pd

from joblib import Parallel, delayed

In [2]:
# Define simulation
simulation=run_simple_energy_loss.run_simulation

In [3]:
# Define ranges

pT_range = (1, 10, 10) # Compute more observable than needed to avoid boundary unstabilities.
pT_obs = [3, 5, 7] # p_T Observables we will use for the analysis
prior_ranges= {'alpha_s':(0.1,0.4), 'exponent_inel':(-2,2), 'exponent_el':(-2,2), 'scale_inel':(0.1, 0.4), 'scale_el':(0.1, 0.4)}
true_values= {'alpha_s': 0.2, 'exponent_inel': -1, 'exponent_el': 1, 'scale_inel': 0.3, 'scale_el': 0.3}

In [4]:
def generate_lhs(npoints, ndim, seed):
    """
    Generate a maximin Latin-hypercube sample (LHS) with the given number of
    points, dimensions, and random seed.
    
    Reproduced from js-sims-bayes
    """
    print(
        'generating maximin LHS: '
        'npoints = %d, ndim = %d, seed = %d'\
        % (npoints, ndim, seed)
    )

    print('generating using R')
    proc = subprocess.run(
        ['R', '--slave'],
        input="""
        library('lhs')
        set.seed({})
        write.table(maximinLHS({}, {}), col.names=FALSE, row.names=FALSE)
        """.format(seed, npoints, ndim).encode(),
        stdout=subprocess.PIPE,
        check=True
    )

    lhs = np.array(
        [l.split() for l in proc.stdout.splitlines()],
        dtype=float
    )

    return lhs

In [5]:
# Generate validation points

ndim = 5
npoints = 100
validation_lhspoints = generate_lhs(npoints,ndim,123456789)
validation_points = np.zeros(validation_lhspoints.shape)

validation_points[:,0] = validation_lhspoints[:,0]*(prior_ranges['alpha_s'][1]-prior_ranges['alpha_s'][0])+prior_ranges['alpha_s'][0]
validation_points[:,1] = validation_lhspoints[:,1]*(prior_ranges['exponent_inel'][1]-prior_ranges['exponent_inel'][0])+prior_ranges['exponent_inel'][0]
validation_points[:,2] = validation_lhspoints[:,2]*(prior_ranges['exponent_el'][1]-prior_ranges['exponent_el'][0])+prior_ranges['exponent_el'][0]
validation_points[:,3] = validation_lhspoints[:,3]*(prior_ranges['scale_inel'][1]-prior_ranges['scale_inel'][0])+prior_ranges['scale_inel'][0]
validation_points[:,4] = validation_lhspoints[:,4]*(prior_ranges['scale_el'][1]-prior_ranges['scale_el'][0])+prior_ranges['scale_el'][0]


generating maximin LHS: npoints = 100, ndim = 5, seed = 123456789
generating using R


In [6]:
# Evaluate simulation at validation points

cores = 4
validation_runs=Parallel(n_jobs=cores)(delayed(simulation)(point.reshape(1,-1), *pT_range) for point in validation_points)

In [7]:
validation_runs = np.array(validation_runs).reshape(100,10)

Note: Because the likelihood function depends on the data and nominally this can be changed in the notebooks, I do not calculate the likelihood at the validation points. However, this is simple enough to do later.

In [8]:
param_keys = ['alpha_s','exponent_inel','exponent_el','scale_inel', 'scale_el']
result_keys = ['pT {} GeV'.format(str(i)) for i in np.linspace(*pT_range)]

In [9]:
# Compile validation results into dataframe

validation_df = pd.DataFrame(validation_points,columns=param_keys)
for i in range(len(result_keys)):
    validation_df[result_keys[i]] = validation_runs[:,i]

In [10]:
validation_df

Unnamed: 0,alpha_s,exponent_inel,exponent_el,scale_inel,scale_el,pT 1.0 GeV,pT 2.0 GeV,pT 3.0 GeV,pT 4.0 GeV,pT 5.0 GeV,pT 6.0 GeV,pT 7.0 GeV,pT 8.0 GeV,pT 9.0 GeV,pT 10.0 GeV
0,0.139880,1.233532,0.023331,0.277208,0.310282,0.923065,0.944544,0.765945,0.703582,0.673583,0.657323,0.648015,0.642235,0.636743,0.628520
1,0.332736,-0.131905,0.751403,0.212024,0.279861,0.688620,0.791497,0.175990,0.092273,0.064660,0.052266,0.045716,0.041578,0.037848,0.033352
2,0.370226,1.457209,-0.285213,0.201070,0.190545,0.568233,0.665193,0.114913,0.051152,0.031581,0.023231,0.018968,0.016290,0.013855,0.011014
3,0.259227,-0.766722,-1.405136,0.164316,0.136741,0.803727,0.875761,0.366005,0.254727,0.210319,0.188285,0.176045,0.168278,0.161129,0.151751
4,0.276825,-0.279757,-0.899501,0.178222,0.316869,0.651347,0.718871,0.258172,0.168719,0.135002,0.118950,0.110332,0.104940,0.099839,0.092970
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.201197,1.801333,-1.960022,0.103309,0.119967,0.924937,1.025017,0.357303,0.229135,0.180459,0.156740,0.143530,0.135051,0.127515,0.118390
96,0.311341,-1.580115,-1.818737,0.126657,0.353471,0.372903,0.411475,0.142879,0.089903,0.070185,0.061221,0.056788,0.054183,0.051345,0.046778
97,0.313361,-1.011546,-1.767668,0.333143,0.105985,0.801333,0.952992,0.137939,0.054563,0.029489,0.018788,0.013284,0.009824,0.006918,0.003938
98,0.109305,-1.936426,0.438479,0.141795,0.102183,0.944221,0.958236,0.839198,0.795408,0.773913,0.762177,0.755474,0.751362,0.747423,0.741318


In [11]:
# Write dataframe to file
validation_df.to_csv('validation.csv')