In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from deepjets import learning, models, preprocessing, utils
import seaborn

Using Theano backend.




In [None]:
model = models.get_maxout(25**2)

In [None]:
n_images = -1
n_folds = 1
test_frac = 0.1
val_frac = 0.1
sig_file = 'images/w_images.h5'
bkd_file = 'images/qcd_images.h5'
dataset_name = 'datasets/test'
model_name = 'models/test'

In [None]:
h5_files = utils.prepare_datasets(
    sig_file, bkd_file, dataset_name, n_sig=n_images, n_bkd=n_images, test_frac=test_frac,
    val_frac=val_frac, n_folds=n_folds, auxvars=['weights'], shuffle=True, shuffle_seed=1)

In [None]:
learning.train_model(model, h5_files['train'], model_name)

In [None]:
utils.plot_sig_bkd_dists(models.load_model('models/maxout_w_default'),
                         'datasets/w_test.h5')

In [None]:
utils.plot_sig_bkd_dists(models.load_model('models/maxout_w_default_2'),
                         'datasets/w_test.h5')

In [None]:
roc = learning.test_model(models.load_model('models/maxout_w_default'),
                          'datasets/w_test.h5', show_ROC_curve=False)
roc_2 = learning.test_model(models.load_model('models/maxout_w_default_2'),
                            'datasets/w_test.h5', show_ROC_curve=False)

In [None]:
utils.plot_roc_curves([roc['ROC_curve'], roc_2['ROC_curve']],
                      ['MaxOut 162k images', 'MaxOut 450k images'])

In [None]:
import h5py
import numpy as np

edges = preprocessing.pixel_edges(jet_size=1.0, subjet_size_fraction=0.5, pix_size=(0.1, 0.1))

with h5py.File('images/w_test.h5', 'r') as h5file:
    nevents = len(h5file['subjets'])
    images_s = np.empty((nevents, 25, 25), dtype=np.double)
    for i in xrange(nevents):
        subjets = h5file['subjets'][i]
        trimmed_constit = h5file['trimmed_constituents'][i]
        shrinkage = h5file['shrinkage'][i]
        image = preprocessing.preprocess(subjets, trimmed_constit, edges, zoom=1./shrinkage,
                                         normalize=True, out_width=25)
        images_s[i] = image
    pT_s = h5file['trimmed_jet']['pT'][:]
    mass_s = h5file['trimmed_jet']['mass'][:]
    tau21_s = h5file['tau_2'][:] / h5file['tau_1'][:]

with h5py.File('images/qcd_test.h5', 'r') as h5file:
    nevents = len(h5file['subjets'])
    images_b = np.empty((nevents, 25, 25), dtype=np.double)
    for i in xrange(nevents):
        subjets = h5file['subjets'][i]
        trimmed_constit = h5file['trimmed_constituents'][i]
        shrinkage = h5file['shrinkage'][i]
        image = preprocessing.preprocess(subjets, trimmed_constit, edges, zoom=1./shrinkage,
                                         normalize=True, out_width=25)
        images_b[i] = image
    pT_b = h5file['trimmed_jet']['pT'][:]
    mass_b = h5file['trimmed_jet']['mass'][:]
    tau21_b = h5file['tau_2'][:] / h5file['tau_1'][:]

n_sig = len(images_s)
n_bkd = len(images_b)
images = np.concatenate((images_s, images_b))
images = images.reshape(-1, images.shape[1] * images.shape[2])
# True classes
classes = np.concatenate([np.repeat([[1, 0]], n_sig, axis=0),
                          np.repeat([[0, 1]], n_bkd, axis=0)])
with h5py.File('datasets/w_test.h5', 'w') as h5file:
    h5file.create_dataset('X_test', data=images)
    h5file.create_dataset('Y_test', data=classes)
    h5file.create_dataset('pT', data=np.concatenate((pT_s, pT_b)))
    h5file.create_dataset('mass', data=np.concatenate((mass_s, mass_b)))
    h5file.create_dataset('tau_21', data=np.concatenate((tau21_s, tau21_b)))

In [None]:
roc_pT = utils.auxvar_roc_curve('datasets/w_test.h5', 'pT')
roc_mass = utils.auxvar_roc_curve('datasets/w_test.h5', 'mass')
roc_tau_21 = utils.auxvar_roc_curve('datasets/w_test.h5', 'tau_21')

In [None]:
utils.plot_roc_curves([roc_2['ROC_curve'], roc_pT, roc_mass, roc_tau_21],
                      ['MaxOut', r'$p_T$', 'mass', r'$\tau_{21}$'])

In [None]:
import matplotlib.pyplot as plt
with h5py.File('datasets/w_test.h5', 'r') as h5file:
    Y_test = h5file['Y_test'][:]
    var = h5file['tau_21'][:]
var -= var.min()
var /= var.max()
var_s = var[Y_test[:, 0] == 1]
var_b = var[Y_test[:, 0] == 0]
fig = plt.figure(figsize=(6, 5))
ax = fig.add_subplot(111)
bins = np.linspace(0, 1, 50)
ax.hist(var_s, bins=bins, histtype='stepfilled', normed=True, color='b', alpha=0.5,
        label='signal')
ax.hist(var_b, bins=bins, histtype='stepfilled', normed=True, color='r', alpha=0.5,
        label='background')
ax.set_xlabel("var", fontsize=16)
ax.set_ylabel("frequency", fontsize=16)
ax.tick_params(axis='both', which='major', labelsize=12)
plt.legend(fontsize=16, loc=2)
fig.show()