In [234]:
%matplotlib inline

"""This version of the file is just for cooking up new ideas. gan_skip.ipynb is the master file."""

filename="gan_skip_2" #important for bookkeeping since ipython can't use __file__

import os
# running with non gpu singularity container, so commented out the next line to use CPU
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["KERAS_BACKEND"] = "tensorflow"
import tensorflow as tf
tf.set_random_seed(42)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
print "import tensorflow"
           
import keras.backend.tensorflow_backend as K

import keras
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, Flatten, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D, LeakyReLU, Lambda
from keras.layers import Input, merge, Concatenate, concatenate, Add
from keras.losses import binary_crossentropy
from keras.utils import plot_model
print "import keras"

import numpy as np
#from tqdm import tqdm
import time
import pickle
import sys

import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm

print "import matplotlib"

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics
from scipy.stats import binned_statistic_2d

print "import sklearn"

np.random.seed(42)
cov_hash = None
cov_ans = None


import tensorflow
import keras
import matplotlib
import sklearn


In [235]:
import hashlib
from scipy.stats import ks_2samp

def onetime(func):
    """stores the functions output, returns the output if called again on the same input, else computes new output"""
    def decorated(*args, **kwargs):
        global cov_ans
        global cov_hash
        new_hash=hashlib.md5(str(args)+str(kwargs)).hexdigest() 
        if new_hash != cov_hash:
            #print("computing")
            cov_ans = func(*args, **kwargs)
        cov_hash=new_hash
        return cov_ans
    return decorated
    

@onetime
def covariance_metrics(real_data, predictions):
    """Takes in real_data matrix with real entries as rows and predictions matrix with generated events as rows and returns the covariance matricies for the two as well as the average, maximum, and std. dev of the difference between the entries in the coverance matrix as well as in the average of the variables."""
    
    cov_pred = np.cov(predictions.T)
    avg_pred = predictions.mean(axis=0)
    cov_real = np.cov(real_data.T)
    avg_real = real_data.mean(axis=0)
    
    #cov_diff = np.abs((cov_pred - cov_real)/np.sqrt(np.abs(np.outer(avg_real, avg_pred))))
    cov_diff = np.abs((cov_pred - cov_real)/cov_real)
    ar=avg_real
    ar[ar == 0] = 1
    avg_diff = np.abs((avg_pred - avg_real)/ar)
    
    return cov_diff, avg_diff


def get_score(real_data, predictions, weight_cov = (1/361.), weight_avg = (1/19.)):
    cov_diff, avg_diff = covariance_metrics(real_data, predictions)
    return weight_cov*np.sum(cov_diff)+weight_avg*np.sum(avg_diff)

def getKS(real_data, predictions):
    return ks_2samp(Minv(real_data,ptetaphi=False,nopy2=True), Minv(predictions,ptetaphi=False,nopy2=True))
#a = np.matrix([[1,2,3],[4,5,6],[7,8,9]])
#ap = np.matrix([[2,2,4],[4,5,6],[7,8,9]])
#b = np.matrix([[-1,-2,-3],[-4,-5,-6],[-7,-8,-9]])


#print(get_score(a,ap))
#print(covariance_metrics(a,b))
#print(covariance_metrics(a,b))

In [236]:
def Minv(cols,ptetaphi=False,nopy2=False):
    """
    Computes M for two objects given the cartesian momentum projections
    if `ptetaphi` is True, then assumes the 8 input columns are cylindrical eptetaphi
    if `nopy2` is True, input is 7 columns with no py2
    """
    if ptetaphi:
        cols = ptetaphi_to_cartesian(cols)
    if nopy2:
        M2 = (cols[:,0]+cols[:,4])**2
        M2 -= (cols[:,1]+cols[:,5])**2
        M2 -= (cols[:,2]          )**2
        M2 -= (cols[:,3]+cols[:,6])**2
    else:
        M2 = (cols[:,0]+cols[:,4])**2
        M2 -= (cols[:,1]+cols[:,5])**2
        M2 -= (cols[:,2]+cols[:,6])**2
        M2 -= (cols[:,3]+cols[:,7])**2
    return np.sqrt(M2)

def cartesian_to_ptetaphi(eight_cartesian_cols):
    """
    Takes 8 columns as cartesian e px py pz e px py pz
    and converts to e pt eta phi e pt eta phi
    """
    e1 =  eight_cartesian_cols[:,0]
    e2 =  eight_cartesian_cols[:,4]
    px1 = eight_cartesian_cols[:,1]
    px2 = eight_cartesian_cols[:,5]
    py1 = eight_cartesian_cols[:,2]
    py2 = eight_cartesian_cols[:,6]
    pz1 = eight_cartesian_cols[:,3]
    pz2 = eight_cartesian_cols[:,7]
    p1 = np.sqrt(px1**2+py1**2+pz1**2)
    p2 = np.sqrt(px2**2+py2**2+pz2**2)
    pt1 = np.sqrt(px1**2+py1**2)
    pt2 = np.sqrt(px2**2+py2**2)
    phi1 = np.arctan2(py1,px1)
    phi2 = np.arctan2(py2,px2)
    eta1 = np.arctanh(pz1/p1)
    eta2 = np.arctanh(pz2/p2)
    return np.c_[e1,pt1,eta1,phi1,e2,pt2,eta2,phi2]

def ptetaphi_to_cartesian(eight_eptetaphi_cols):
    """
    Takes 8 columns as e pt eta phi e pt eta phi
    and converts to e px py pz e px py pz
    """
    e1 =  eight_eptetaphi_cols[:,0]
    e2 =  eight_eptetaphi_cols[:,4]
    pt1 =  eight_eptetaphi_cols[:,1]
    pt2 =  eight_eptetaphi_cols[:,5]
    eta1 =  eight_eptetaphi_cols[:,2]
    eta2 =  eight_eptetaphi_cols[:,6]
    phi1 =  eight_eptetaphi_cols[:,3]
    phi2 =  eight_eptetaphi_cols[:,7]
    px1 = np.abs(pt1)*np.cos(phi1)
    px2 = np.abs(pt2)*np.cos(phi2)
    py1 = np.abs(pt1)*np.sin(phi1)
    py2 = np.abs(pt2)*np.sin(phi2)
    pz1 = np.abs(pt1)/np.tan(2.0*np.arctan(np.exp(-1.*eta1)))
    pz2 = np.abs(pt2)/np.tan(2.0*np.arctan(np.exp(-1.*eta2)))
    return np.c_[e1,px1,py1,pz1,e2,px2,py2,pz2]

def get_dphi(px1,py1,px2,py2):
    phi1 = np.arctan2(py1,px1)
    phi2 = np.arctan2(py2,px2)
    dphi = phi1-phi2
    dphi[dphi>np.pi] -= 2*np.pi
    dphi[dphi<-np.pi] += 2*np.pi 
    return dphi

def get_rotated_pxpy(px1,py1,px2,py2):
    """
    rotates two leptons such that phi2 = 0
    """
    pt1 = np.sqrt(px1**2+py1**2)
    pt2 = np.sqrt(px2**2+py2**2)
    phi1 = np.arctan2(py1,px1)
    phi2 = np.arctan2(py2,px2)
    px1p = pt1*np.cos(phi1-phi2)
    py1p = pt1*np.sin(phi1-phi2)
    px2p = pt2*np.cos(phi2-phi2)
    return px1p,py1p,px2p,np.zeros(len(pt2))
    
def cartesian_zerophi2(coords,ptetaphi=False):
    """
    returns 8-1=7 columns rotating leptons such that phi2 is 0 (and removing it)
    if `ptetaphi` is True, then return eptetaphi instead of epxpypz
    """
    lepcoords_cyl = cartesian_to_ptetaphi(coords)
    phi1 = lepcoords_cyl[:,3]
    phi2 = lepcoords_cyl[:,7]
    dphi = phi1-phi2
    dphi[dphi>np.pi] -= 2*np.pi
    dphi[dphi<-np.pi] += 2*np.pi
    lepcoords_cyl[:,3] = dphi
    lepcoords_cyl[:,7] = 0.
    if ptetaphi:
        return np.delete(lepcoords_cyl, [7], axis=1)
    else:
        return np.delete(ptetaphi_to_cartesian(lepcoords_cyl), [6], axis=1)

In [237]:
def invmass_from_8cartesian(x):
    invmass = K.sqrt(
                (x[:,0:1]+x[:,4:5])**2-
                (x[:,1:2]+x[:,5:6])**2-
                (x[:,2:3]+x[:,6:7])**2-
                (x[:,3:4]+x[:,7:8])**2
                )
    return invmass

def invmass_from_8cartesian_nopy2(x):
    invmass = K.sqrt(
                (x[:,0:1]+x[:,4:5])**2-
                (x[:,1:2]+x[:,5:6])**2-
                (x[:,2:3]         )**2-
                (x[:,3:4]+x[:,6:7])**2
                )
    return invmass

def get_first_N(x,N=19):
    return x[:,0:N]

def add_invmass_from_8cartesian(x):
    return K.concatenate([x,invmass_from_8cartesian(x)])


def fix_outputs(x):
    """
    Take nominal delphes format of 19 columns and fix some columns
    """
    return K.concatenate([
        # x[:,0:21],
        x[:,0:7], # epxpypz for lep1,lep2 -1 for no py2
        x[:,7:8], # nvtx
        K.sign(x[:,8:10]), # q1 q2
        x[:,10:12], # iso1 iso2
        x[:,12:14], # met, metphi
        x[:,14:19], # jet pts
        ])

def custom_loss(c, loss_type = "force_mll"):
    if loss_type == "force_mll":
        def loss_func(y_true, y_pred_mll):
            y_true = y_true[:,0]
            y_pred = y_pred_mll[:,0]
            mll_pred = y_pred_mll[:,1]

            mll_loss = K.mean(K.abs(mll_pred - 91.2))

    #         pseudomll = K.random_normal_variable(shape=(1,1), mean=91.2, scale=2)
    #         mll_loss = K.mean((mll_pred - pseudomll)**2)

            return binary_crossentropy(y_true, y_pred) + c*mll_loss
        return loss_func
    elif loss_type == "force_z_width":
        def loss_func(y_true, y_pred_mll):
            y_true = y_true[:,0]
            y_pred = y_pred_mll[:,0]
            mll_pred = y_pred_mll[:,1]
            
            mll_loss = K.mean(K.abs(mll_pred - 91.2))
            mll_sigma_loss = K.abs(K.std(mll_pred) - 7.67)

            return binary_crossentropy(y_true, y_pred) + c*mll_loss + c*mll_sigma_loss
        return loss_func
        
    else:
        raise ValueError("Can not make loss function of type %s" % loss_type)

In [238]:
def METPhiMap(metphis):
    """Maps number line to period boundary conditions between [-pi,pi]"""
    #Add or subtract the proper number of factors of 2pi. If number is between [-3pi, -pi] add 2pi,
    #if between [-5pi, -3pi], add 4pi. For positive intervals subtract instead of add. To get the 
    #number of 2pis to subtract or add, take the floor of the abs of the number over pi, that gives
    #the integer number of pis away from 0. Subtract 2pi for every 2pi if you are greater than pi,
    #and subtract another for every 2pi greater than pi. The same holds in reverse for negative values.
    return metphis - np.sign(metphis)*np.ceil(np.floor(np.abs(metphis)/(np.pi))/2)*2*np.pi

def make_plots(preds,reals,title="",fname="",show_pred=True,wspace=0.1,hspace=0.3,tightlayout=True,visible=False):
    nrows, ncols = 5,5
    fig, axs = plt.subplots(nrows,ncols,figsize=(16,13))
#     fig, axs = plt.subplots(nrows,ncols,figsize=(12,10))
#     fig.subplots_adjust(wspace=0.1,hspace=0.3)
    fig.subplots_adjust(wspace=wspace,hspace=hspace)


    info = [
        ["mll",(60,120,50)],
        ["lep1_e",(0,250,50)],
        ["lep1_px",(-100,100,50)],
        ["lep1_py",(-100,100,50)],
        ["lep1_pz",(-200,200,50)],
        ["lep2_e",(0,250,50)],
        ["lep2_px",(-100,100,50)],
        ["lep2_pz",(-200,200,50)],
        ["dphi",(-4,4,50)],
        ["nvtxs",(0,50,350)],
        ["met",(0,150,50)],
        ["metphi",(-6,6,50)],
        ["lep1_charge",(-7,7,30)],
        ["lep2_charge",(-7,7,30)],
        ["lep1_iso",(0,2.0,30)],
        ["lep2_iso",(0,2.0,30)],
        ["jet_pt1",(0,100,50)],
        ["jet_pt2",(0,100,50)],
        ["jet_pt3",(0,100,50)],
        ["jet_pt4",(0,100,50)],
        ["jet_pt5",(0,100,50)],
        ["njets",(0,7,7)],

    ]
    for axx in axs:
        for ax in axx:
            ax.get_yaxis().set_visible(False)
    for ic,(cname,crange) in enumerate(info):
        if cname == "mll":
            real = reals["mll"]
            pred = Minv(preds,ptetaphi=False,nopy2=True)
        elif cname == "lep1_e": real, pred = reals[cname], preds[:,0]
        elif cname == "lep1_pz": real, pred = reals[cname], preds[:,3]
        elif cname == "lep2_e": real, pred = reals[cname], preds[:,4]
        elif cname == "lep2_pz": real, pred = reals[cname], preds[:,6]
        elif cname == "lep1_px": 
            real = reals[cname]
            pred = preds[:,1]
        elif cname == "lep1_py":
            real = reals[cname]
            pred = preds[:,2]
        elif cname == "lep2_px":
            real = reals[cname]
            pred = preds[:,5]
        elif cname == "dphi":
            real = get_dphi(reals["lep1_px"], reals["lep1_py"], reals["lep2_px"], np.zeros(len(reals)))
            pred = get_dphi(preds[:,1], preds[:,2], preds[:,5], np.zeros(len(preds)))
        elif cname == "nvtxs": real, pred = reals[cname], np.round(preds[:,7])
        elif cname == "lep1_charge": real, pred = reals[cname], preds[:,8]
        elif cname == "lep2_charge": real, pred = reals[cname], preds[:,9]
        elif cname == "lep1_iso": real, pred = reals[cname], preds[:,10]
        elif cname == "lep2_iso": real, pred = reals[cname], preds[:,11]
        elif cname == "met": real, pred = reals[cname], preds[:,12]
        elif cname == "metphi": real, pred = reals[cname], METPhiMap(preds[:,13])
        elif cname == "jet_pt1": real, pred = reals[cname], preds[:,14]
        elif cname == "jet_pt2": real, pred = reals[cname], preds[:,15]
        elif cname == "jet_pt3": real, pred = reals[cname], preds[:,16]
        elif cname == "jet_pt4": real, pred = reals[cname], preds[:,17]
        elif cname == "jet_pt5": real, pred = reals[cname], preds[:,18]
        elif cname == "njets":
            real = \
                1*(reals["jet_pt1"] > 10) + \
                1*(reals["jet_pt2"] > 10) + \
                1*(reals["jet_pt3"] > 10) + \
                1*(reals["jet_pt4"] > 10) + \
                1*(reals["jet_pt5"] > 10)
            pred = \
                1*(preds[:,14] > 10) + \
                1*(preds[:,15] > 10) + \
                1*(preds[:,16] > 10) + \
                1*(preds[:,17] > 10) + \
                1*(preds[:,18] > 10)
        idx = ic // ncols, ic % ncols
        bins_real = axs[idx].hist(real, range=crange[:2],bins=crange[-1], histtype="step", lw=1.5,density=True)
        if show_pred:
            bins_pred = axs[idx].hist(pred, range=crange[:2],bins=crange[-1], histtype="step", lw=1.5,density=True)
        axs[idx].set_xlabel("{}".format(cname))
        axs[idx].get_yaxis().set_visible(False)
    #     axs[idx].set_yscale("log", nonposy='clip')
    _ = axs[0,0].legend(["True","Pred"], loc='upper right')
    _ = axs[0,0].set_title(title)
    if tightlayout:
        plt.tight_layout()
    if fname:
        fig.savefig(fname)
    if not visible:
        plt.close(fig)

"""def make_plots(preds,reals,title="",fname=""):
    nrows, ncols = 5,5
    fig, axs = plt.subplots(nrows,ncols,figsize=(16,13))
    fig.subplots_adjust(wspace=0.1,hspace=0.3)


    #print(preds)
    info = [
        ["mll",(60,120,50)],
        ["lep1_e",(0,250,50)],
        ["lep1_px",(-100,100,50)],
        ["lep1_py",(-100,100,50)],
        ["lep1_pz",(-200,200,50)],
        ["lep2_e",(0,250,50)],
        ["lep2_px",(-100,100,50)],
        ["lep2_pz",(-200,200,50)],
        ["dphi",(-4,4,50)],
        ["nvtxs",(0,50,350)],
        ["met",(0,150,50)],
        ["metphi",(-6,6,50)],
        ["lep1_charge",(-7,7,30)],
        ["lep2_charge",(-7,7,30)],
        ["lep1_iso",(0,0.2,30)],
        ["lep2_iso",(0,0.2,30)],
        ["genjet_pt1",(0,100,50)],
        ["genjet_pt2",(0,100,50)],
        ["genjet_pt3",(0,100,50)],
        ["genjet_pt4",(0,100,50)],
        ["genjet_pt5",(0,100,50)],

    ]
    for ic,(cname,crange) in enumerate(info):
        if cname == "mll":
            real = reals["mll"]
            pred = Minv(preds,ptetaphi=False,nopy2=True)
        elif cname == "lep1_e": real, pred = reals[cname], preds[:,0]
        elif cname == "lep1_pz": real, pred = reals[cname], preds[:,3]
        elif cname == "lep2_e": real, pred = reals[cname], preds[:,4]
        elif cname == "lep2_pz": real, pred = reals[cname], preds[:,6]
        elif cname == "lep1_px": 
            real = get_rotated_pxpy(reals["lep1_px"], reals["lep1_py"], reals["lep2_px"], reals["lep2_py"])[0]
            pred = preds[:,1]
        elif cname == "lep1_py":
            real = get_rotated_pxpy(reals["lep1_px"], reals["lep1_py"], reals["lep2_px"], reals["lep2_py"])[1]
            pred = preds[:,2]
        elif cname == "lep2_px":
            real = get_rotated_pxpy(reals["lep1_px"], reals["lep1_py"], reals["lep2_px"], reals["lep2_py"])[2]
            pred = preds[:,5]
        elif cname == "dphi":
            real = get_dphi(reals["lep1_px"], reals["lep1_py"], reals["lep2_px"], reals["lep2_py"])
            pred = get_dphi(preds[:,1], preds[:,2], preds[:,5], np.zeros(len(preds)))
        elif cname == "nvtxs": real, pred = reals[cname], np.round(preds[:,7])
        elif cname == "lep1_charge": real, pred = reals[cname], preds[:,8]
        elif cname == "lep2_charge": real, pred = reals[cname], preds[:,9]
        elif cname == "lep1_iso": real, pred = reals[cname], preds[:,10]
        elif cname == "lep2_iso": real, pred = reals[cname], preds[:,11]
        elif cname == "met": real, pred = reals[cname], preds[:,12]
        elif cname == "metphi": real, pred = reals[cname], METPhiMap(preds[:,13])
        elif cname == "genjet_pt1": real, pred = reals[cname], preds[:,14]
        elif cname == "genjet_pt2": real, pred = reals[cname], preds[:,15]
        elif cname == "genjet_pt3": real, pred = reals[cname], preds[:,16]
        elif cname == "genjet_pt4": real, pred = reals[cname], preds[:,17]
        elif cname == "genjet_pt5": real, pred = reals[cname], preds[:,18]
        idx = ic // ncols, ic % ncols
        bins_real = axs[idx].hist(real, range=crange[:2],bins=crange[-1], histtype="step", lw=2,density=True)
        bins_pred = axs[idx].hist(pred, range=crange[:2],bins=crange[-1], histtype="step", lw=2,density=True)
        axs[idx].set_xlabel("{}".format(cname))
        axs[idx].get_yaxis().set_visible(False)
    #     axs[idx].set_yscale("log", nonposy='clip')
    _ = axs[0,0].legend(["True","Pred"], loc='upper right')
    _ = axs[0,0].set_title(title)
    plt.tight_layout()
    if fname:
        fig.savefig(fname)"""


'def make_plots(preds,reals,title="",fname=""):\n    nrows, ncols = 5,5\n    fig, axs = plt.subplots(nrows,ncols,figsize=(16,13))\n    fig.subplots_adjust(wspace=0.1,hspace=0.3)\n\n\n    #print(preds)\n    info = [\n        ["mll",(60,120,50)],\n        ["lep1_e",(0,250,50)],\n        ["lep1_px",(-100,100,50)],\n        ["lep1_py",(-100,100,50)],\n        ["lep1_pz",(-200,200,50)],\n        ["lep2_e",(0,250,50)],\n        ["lep2_px",(-100,100,50)],\n        ["lep2_pz",(-200,200,50)],\n        ["dphi",(-4,4,50)],\n        ["nvtxs",(0,50,350)],\n        ["met",(0,150,50)],\n        ["metphi",(-6,6,50)],\n        ["lep1_charge",(-7,7,30)],\n        ["lep2_charge",(-7,7,30)],\n        ["lep1_iso",(0,0.2,30)],\n        ["lep2_iso",(0,0.2,30)],\n        ["genjet_pt1",(0,100,50)],\n        ["genjet_pt2",(0,100,50)],\n        ["genjet_pt3",(0,100,50)],\n        ["genjet_pt4",(0,100,50)],\n        ["genjet_pt5",(0,100,50)],\n\n    ]\n    for ic,(cname,crange) in enumerate(info):\n        if cna

In [239]:
class GAN():
    def __init__(self, **kwargs):

        self.args = dict(kwargs)

        self.tag = kwargs["tag"]
        self.input_file = str(kwargs["input_file"])
        self.noise_shape = (int(kwargs["noise_size"]),)
        self.output_shape = (int(kwargs["output_size"]),)
        self.noise_type = int(kwargs["noise_type"])
        self.ntest_samples = int(kwargs["ntest_samples"])
        self.nepochs_dump_pred_metrics = int(kwargs["nepochs_dump_pred_metrics"])
        self.nepochs_dump_models = int(kwargs["nepochs_dump_models"])
        self.nepochs_dump_plots = int(kwargs["nepochs_dump_plots"])
        self.nepochs_max = int(kwargs["nepochs_max"])
        self.batch_size = int(kwargs["batch_size"])
        self.do_concatenate_disc = kwargs["do_concatenate_disc"]
        self.do_concatenate_gen = kwargs["do_concatenate_gen"]
        self.do_batch_normalization_disc = kwargs["do_batch_normalization_disc"]
        self.do_batch_normalization_gen = kwargs["do_batch_normalization_gen"]
        self.do_soft_labels = kwargs["do_soft_labels"]
        self.do_noisy_labels = kwargs["do_noisy_labels"]
        self.do_tanh_gen = kwargs["do_tanh_gen"]
        self.nepochs_decay_noisy_labels = int(kwargs["nepochs_decay_noisy_labels"])
        self.use_ptetaphi_additionally = kwargs["use_ptetaphi_additionally"]
        self.optimizer_gen = kwargs["optimizer_gen"]
        self.optimizer_disc = kwargs["optimizer_disc"]
        self.depth_disc = kwargs["depth_disc"]
        self.width_disc = kwargs["width_disc"]
        self.depth_gen = kwargs["depth_gen"]
        self.width_gen = kwargs["width_gen"]
        self.beefy_generator = kwargs["beefy_generator"]
        self.beefy_discriminator = kwargs["beefy_discriminator"]
        self.add_invmass_disc = kwargs["add_invmass_disc"]
        self.fix_delphes_outputs = kwargs["fix_delphes_outputs"]
        self.use_delphes = kwargs["use_delphes"]
        self.use_mll_loss = kwargs["use_mll_loss"]
        self.loss_mll_weight = kwargs["loss_mll_weight"]
        self.do_skip_connection = kwargs["do_skip_connection"]
        self.terminate_early = kwargs["terminate_early"]
        self.loss_type = kwargs["loss_type"]
        if self.use_ptetaphi_additionally: self.output_shape = (self.output_shape[0]+8,)
            
        print(self.__dict__)

        os.system("mkdir -p progress/{}/".format(self.tag))
        os.system("cp {} progress/{}/".format(filename, self.tag))

        self.scaler_type = kwargs["scaler_type"]
        self.scaler = None
        if self.scaler_type.lower() == "minmax":
            self.scaler = MinMaxScaler(feature_range=(-1.,1.))
        elif self.scaler_type.lower() == "robust":
            self.scaler = RobustScaler()
        elif self.scaler_type.lower() == "standard":
            self.scaler = StandardScaler()

        self.data = None
        self.data_ref = None
        self.d_epochinfo = {}
        self.X_train = None

        # optimizer = Adam(0.0002, 0.5)
        optimizer_d = self.optimizer_disc
        # optimizer_d = "sgd"
        optimizer_g = self.optimizer_gen
        # optimizer_g = "adam"

        # Build and compile the discriminator
        if self.use_mll_loss:
            loss = custom_loss(c=self.loss_mll_weight, loss_type=self.loss_type)
        else:
            loss = "binary_crossentropy"
            
        self.loss=loss
            
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss=loss,
            optimizer=optimizer_d,
            metrics=['accuracy'])

        # Build and compile the generator
        self.generator = self.build_generator()
        self.generator.compile(loss=loss, optimizer=optimizer_g)

        # The generator takes noise as input and generated imgs
        z = Input(shape=self.noise_shape)
        img = self.generator(z)

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The valid takes generated images as input and determines validity
        valid = self.discriminator(img)

        # The combined model  (stacked generator and discriminator) takes
        # noise as input => generates images => determines validity
        self.combined = Model(z, valid)
        self.combined.compile(loss=loss, optimizer=optimizer_g)


In [240]:
class GAN(GAN):
    
    def build_generator(self):

        inputs = Input(shape=self.noise_shape)

        ## Head
        x = Dense(64)(inputs)
        if self.do_batch_normalization_gen:
            x = BatchNormalization()(x)
        x = LeakyReLU(alpha=0.2)(x)
        if self.do_concatenate_gen:
            x = Lambda(lambda x: K.concatenate([x*x,x]))(x)
            x = LeakyReLU(alpha=0.2)(x)

        ## Main Body
        if self.depth_gen > 0 and self.width_gen > 0:
            for level in xrange(0,self.depth_gen):
                size=self.width_gen/(2**level)
                if(size<self.output_shape[0]):
                    raise ValueError("The layer size %d would be smaller than the output size, make sure you have a wide enough network to deal with %s layers" % (size, self.depth_gen))
                x = Dense(size)(x) #Triangle with width halved at each level
                x = LeakyReLU(alpha=0.2)(x)
        elif self.beefy_generator:
            for size in [128,256,512,256,128]:
                x = Dense(size)(x)
                x = LeakyReLU(alpha=0.2)(x)
        else:
            for size in [128,128,128,64,32]:
                x = Dense(size)(x)
                x = LeakyReLU(alpha=0.2)(x)
 
        ## Tail
        x = Dense(self.output_shape[0])(x)
        
#         if False:
        if self.do_skip_connection:
            # get the non-noise part of the input, and add it to the tail
            y = Lambda(get_first_N, arguments={'N': self.output_shape[0]})(inputs)
#             print y
            x = Add()([x,y])
#             print x
            
        if self.do_tanh_gen:
            x = Activation("tanh")(x)
        elif self.fix_delphes_outputs:
            x = Lambda(fix_outputs,
                input_shape=self.output_shape,
                output_shape=self.output_shape
                )(x)
            
#         model = Model(inputs=inputs, outputs=concatenate([out,mll]))
        model = Model(inputs=inputs, outputs=[x])
        
        print "Generator params: {}".format(model.count_params())
#         model.summary()
        
        return model

In [241]:
class GAN(GAN):
    
    def build_discriminator(self):

        inputs = Input(self.output_shape)
        mll = Lambda(invmass_from_8cartesian_nopy2)(inputs)
        x = Dense(128)(inputs)
        if self.do_batch_normalization_disc:
            x = BatchNormalization()(x)
        x = LeakyReLU(alpha=0.2)(x)
        if self.do_concatenate_disc:
            x = Lambda(lambda x: K.concatenate([x*x,x]))(x)
            x = LeakyReLU(alpha=0.2)(x)

        ## Main Body
        if self.depth_disc > 0 and self.width_disc > 0:
            for level in xrange(0,self.depth_disc):
                x = Dense(self.width_disc/(2**level))(x) #Triangle with width halved at each level
                x = LeakyReLU(alpha=0.2)(x)
        elif self.beefy_generator:
            for size in [128,256,256,128,64,32,16,8]:
                x = Dense(size)(x)
                x = LeakyReLU(alpha=0.2)(x)

        else:
            for size in [128]*5 + [64,32,16,8]:
                x = Dense(size)(x)
                x = LeakyReLU(alpha=0.2)(x)

        ## Tail
        out = Dense(1,activation='sigmoid')(x)
        
        if self.use_mll_loss:
            model = Model(inputs=inputs, outputs=concatenate([out,mll]))
        else:
            model = Model(inputs=inputs, outputs=out)
#         print model.output_shape
#         model.summary()
        print "Discriminator params: {}".format(model.count_params())
        
        return model
        


In [242]:
class GAN(GAN):
    
    def load_data(self):
        if self.data is not None: return
        
        if self.use_delphes:
            self.data = np.load(self.input_file)
        else:
            self.data = np.load(self.input_file)
            
        self.data = self.data[self.data["genmll"] > 50.]

In [243]:
class GAN(GAN):

    def get_noise(self, amount=1024):
        # nominal
        if self.noise_type == 1:
            noise_half = np.random.normal(0, 1, (amount//2, self.noise_shape[0]))
            noise_full = np.random.normal(0, 1, (amount, self.noise_shape[0]))

        elif self.noise_type == 2: # random soup, 4,2,2 have to be modified to sum to noise_shape[0]
            ngaus = self.noise_shape[0] // 2
            nflat = (self.noise_shape[0] - ngaus) // 2
            nexpo = self.noise_shape[0] - nflat - ngaus
            noise_gaus = np.random.normal( 0, 1, (amount//2+amount, ngaus))
            noise_flat = np.random.uniform(-1, 1, (amount//2+amount, nflat))
            noise_expo = np.random.exponential( 1,    (amount//2+amount, nexpo))
            noise = np.c_[ noise_gaus,noise_flat,noise_expo ]
            noise_half = noise[:amount//2]
            noise_full = noise[-amount:]
            
        elif self.noise_type == 3: # truth conditioned
            
#             noise_half = np.c_[ 
#                     self.X_train[np.random.randint(0, self.X_train.shape[0], amount//2)], 
#                     np.random.normal(0, 1, (amount//2,self.noise_shape[0]-self.X_train.shape[1]))
#                     ]
#             noise_full = np.c_[ 
#                     self.X_train[np.random.randint(0, self.X_train.shape[0], amount)], 
#                     np.random.normal(0, 1, (amount,self.noise_shape[0]-self.X_train.shape[1]))
#                     ]
            
            npurenoise = self.noise_shape[0]-self.X_train.shape[1]
            ngaus = npurenoise // 2
            nflat = (npurenoise - ngaus) // 2
            nexpo = npurenoise - nflat - ngaus
            noise_gaus = np.random.normal( 0, 1, (amount//2+amount, ngaus))
            noise_flat = np.random.uniform(-1, 1, (amount//2+amount, nflat))
            noise_expo = np.random.exponential( 1,    (amount//2+amount, nexpo))
            truenoise = self.X_train[np.random.randint(0, self.X_train.shape[0], amount//2+amount)]
            noise = np.c_[ truenoise,noise_gaus,noise_flat,noise_expo ]
            noise_half = noise[:amount//2]
            noise_full = noise[-amount:]

        return noise_half, noise_full



In [244]:
class GAN(GAN):
            
    def train(self):

        self.load_data()
        
        if self.use_delphes:
            lepcoords = np.c_[
                self.data["lep1_e"],
                self.data["lep1_px"],
                self.data["lep1_py"],
                self.data["lep1_pz"],
                self.data["lep2_e"],
                self.data["lep2_px"],
#                 self.data["lep2_py"],
                self.data["lep2_pz"],
            ]
#             lepcoords_dphi = cartesian_zerophi2(lepcoords)
            
            nvtx_smeared = np.round(np.random.normal(self.data["nvtxs"],0.5))
            self.X_train = np.c_[
#                 lepcoords_dphi, # 7 columns
                lepcoords, # 7 columns
                nvtx_smeared, # 1 column
                self.data["lep1_charge"], self.data["lep2_charge"],
                self.data["lep1_iso"], self.data["lep2_iso"],
                self.data["met"], self.data["metphi"],
                self.data["jet_pt1"],
                self.data["jet_pt2"],
                self.data["jet_pt3"],
                self.data["jet_pt4"],
                self.data["jet_pt5"],
            ].astype(np.float32)
        else:
            self.X_train = self.data[:,range(1,1+8)]
            if self.use_ptetaphi_additionally:
                self.X_train = np.c_[self.X_train, cartesian_to_ptetaphi(self.X_train)]

        # # NOTE. StandardScaler should be fit on training set
        # # and applied the same to train and test, otherwise we
        # # introduce a bias
        if self.scaler:
            self.scaler.fit(self.X_train)
            self.X_train = self.scaler.transform(self.X_train).astype(np.float32)
            pickle.dump(self.scaler, open("progress/{}/scaler.pkl".format(self.tag),'w'))

        # make an alias to save typing
        X_train = self.X_train
        
        half_batch = int(self.batch_size / 2)

        prev_gen_loss = -1
        prev_disc_loss = -1
        n_loss_same_gen = 0  # number of epochs for which generator loss has remained ~same (within 0.01%)
        n_loss_same_disc = 0  # number of epochs for which discriminator loss has remained ~same (within 0.01%)
        old_info = -1, -1
        for epoch in range(self.nepochs_max):

            if self.terminate_early:
                if n_loss_same_gen > 1000 or n_loss_same_disc > 1000:
                    print "BREAKING because disc/gen loss has remained the same for {}/{} epochs!".format(n_loss_same_disc,n_loss_same_gen)
                    break

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random half batch of images
            idx = np.random.randint(0, X_train.shape[0], half_batch)
            imgs = X_train[idx]
            
            noise_half, noise_full = self.get_noise(self.batch_size)
            
            # Generate a half batch of new images
            gen_imgs = self.generator.predict(noise_half)

            # Train the discriminator
            ones = np.ones((half_batch, 1))
            zeros = np.zeros((half_batch, 1))

            if self.do_soft_labels:
                ones *= 0.9

            if self.do_noisy_labels:
                frac = 0.3*np.exp(-epoch/self.nepochs_decay_noisy_labels)
                if frac > 0.005:
                    ones[np.random.randint(0, len(ones), int(frac*len(ones)))] = 0
                    zeros[np.random.randint(0, len(zeros), int(frac*len(zeros)))] = 1

            d_loss_real = self.discriminator.train_on_batch(imgs, ones)
            #print("Real Disc loss: %s " % str(d_loss_real[0]))
            d_loss_fake = self.discriminator.train_on_batch(gen_imgs, zeros)
            #print("Fake Disc loss: %s " % str(d_loss_real[0]))
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
            #print("Avg Disc loss: %s " % str(d_loss[0]))
            
            # ---------------------
            #  Train Generator
            # ---------------------
            # The generator wants the discriminator to label the generated samples
            # as valid (ones)
            valid_y = np.array([1] * self.batch_size)

            # Train the generator
            g_loss = self.combined.train_on_batch(noise_full, valid_y)

            if (g_loss - prev_gen_loss) < 0.0001: n_loss_same_gen += 1
            else: n_loss_same_gen = 0
            prev_gen_loss = g_loss

            if (d_loss[0] - prev_disc_loss) < 0.0001: n_loss_same_disc += 1
            else: n_loss_same_disc = 0
            prev_disc_loss = d_loss[0]

            # Plot the progress
#             print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
            sys.stdout.write("\r{} [D loss: {}, acc.: {:.2f}%] [G loss: {}] [mll={:.3f}+-{:.3f}]".format(epoch, d_loss[0], 100.0*d_loss[1], g_loss, old_info[0], old_info[1]))

            if epoch % self.nepochs_dump_pred_metrics == 0 and epoch > 0:
            
                _, noise_test = self.get_noise(self.ntest_samples)
            
                sys.stdout.write("\n") # break up the stream of text

                gen_imgs = self.generator.predict(noise_test)
                
                if self.scaler:
                    gen_imgs = self.scaler.inverse_transform(gen_imgs)

                masses = Minv(gen_imgs,nopy2=True)
                masses = masses[np.isfinite(masses)]
                old_info = masses.mean(), masses.std()

                cov_diff, avg_diff = covariance_metrics(X_train, gen_imgs)
                
                if "epoch" not in self.d_epochinfo:
                    self.d_epochinfo["epoch"] = []
                    self.d_epochinfo["d_acc"] = []
                    self.d_epochinfo["d_loss"] = []
                    self.d_epochinfo["g_loss"] = []
                    self.d_epochinfo["mass_mu"] = []
                    self.d_epochinfo["mass_sig"] = []
                    self.d_epochinfo["time"] = []
                    self.d_epochinfo["avg_cov_diff"] = []
                    self.d_epochinfo["max_cov_diff"] = []
                    self.d_epochinfo["std_dev_cov_diff"] = []
                    self.d_epochinfo["avg_mean_diff"] = []
                    self.d_epochinfo["max_mean_diff"] = []
                    self.d_epochinfo["std_dev_mean_diff"] = []
                    self.d_epochinfo["args"] = self.args
                else:
                    self.d_epochinfo["epoch"].append(epoch)
                    self.d_epochinfo["d_acc"].append(100*d_loss[1])
                    self.d_epochinfo["d_loss"].append(d_loss[0])
                    self.d_epochinfo["g_loss"].append(g_loss)
                    self.d_epochinfo["mass_mu"].append(masses.mean())
                    self.d_epochinfo["mass_sig"].append(masses.std())
                    self.d_epochinfo["time"].append(time.time())
                    self.d_epochinfo["avg_cov_diff"].append(cov_diff.mean())
                    self.d_epochinfo["max_cov_diff"].append(cov_diff.max())
                    self.d_epochinfo["std_dev_cov_diff"].append(cov_diff.std())
                    self.d_epochinfo["avg_mean_diff"].append(avg_diff.mean())
                    self.d_epochinfo["max_mean_diff"].append(avg_diff.max())
                    self.d_epochinfo["std_dev_mean_diff"].append(avg_diff.std())
                    

                pickle.dump(self.d_epochinfo, open("progress/{}/history.pkl".format(self.tag),'w'))

            if epoch % self.nepochs_dump_plots == 0 and epoch > 0:
                _, noise = self.get_noise(self.ntest_samples)
                preds = gan.generator.predict(noise)
                reals = self.data[:15000]
                _ = make_plots(preds,reals,title="{}: epoch {}".format(self.tag,epoch),
                               fname="progress/{}/plots_{:06d}.png".format(self.tag,epoch))
            
            if epoch % self.nepochs_dump_models == 0 and epoch > 0:
                self.discriminator.save("progress/{}/disc_{}.weights".format(self.tag,epoch))
                self.generator.save("progress/{}/gen_{}.weights".format(self.tag,epoch))
                #self.discriminator.save("test.weights")

                print("Stats Score: %f" %  get_score(X_train, gen_imgs))
                print(ks_2samp(Minv(X_train,ptetaphi=False,nopy2=True), Minv(gen_imgs,ptetaphi=False,nopy2=True)))

In [246]:
# defaults
params = {
        "input_file": "data_xyz.npy",
        "output_size": 8,
        "noise_size": 8,
        "noise_type": 1,
        "ntest_samples": 10000,
        "nepochs_dump_pred_metrics": 250,
        "nepochs_dump_plots": 500,
        "nepochs_dump_models": 5000,
        "nepochs_max": 100001,
        "batch_size": 200,
        "do_concatenate_disc": False,
        "do_concatenate_gen": False,
        "do_batch_normalization_disc": False,
        "do_batch_normalization_gen": False,
        "do_soft_labels": False,
        "do_noisy_labels": False,
        "do_tanh_gen": False,
        "nepochs_decay_noisy_labels": 3000,
        "use_ptetaphi_additionally": False,
        "scaler_type": "",
        "optimizer_disc": "adadelta",
        "optimizer_gen": "adadelta",
        "beefy_generator": False,
        "beefy_discriminator": False,
        "depth_gen": 0,
        "width_gen": 0,
        "depth_disc": 0,
        "width_disc": 0,
        "add_invmass_disc": False,
        "fix_delphes_outputs": True,
        "use_delphes": False,
        "use_mll_loss": False,
        "loss_mll_weight": 0.0001,
        "do_skip_connection": False,
        "terminate_early": True,
        "loss_type": "force_mll"
        }

# for delphes:
params.update({
    "use_delphes": True,
    #"fix_delphes_outputs": True,
    "fix_delphes_outputs": False,
    "do_soft_labels": False,
    "do_noisy_labels": False,
    "nepochs_decay_noisy_labels": 2000,
    "input_file": "/home/users/bhashemi/Projects/GIT/DY-GAN/delphes/total_Zmumu_13TeV_PU20_v2.npa",
    "output_size": 19,
})
params.update({
    "noise_type": 1,
    "noise_size": 19, # 19 for the true events and 8 more for noise
    "use_mll_loss": True,
    "loss_mll_weight": 0.01,
    "nepochs_max": 100001,
    "batch_size": 512,
    "do_skip_connection": False,
    "terminate_early": False,
    "nepochs_dump_models": 500,
    "beefy_generator": True,
    "beefy_discriminator": True,
    #"loss_type": "force_mll",
    "loss_type": "force_z_width",
    #"depth_gen": 8,
    #"width_gen": 10000
    #"nepochs_dump_plots": 1,
    #"nepochs_dump_models": 1,
    #"nepochs_max": 10,
    
})

# change tag for provenance
# params["tag"] = "v1_512_bgbd_nomll"
params["tag"] = "v2_batch512_bgbd_mllANDwidth_NonTC_newdata_mllfix"

print params
gan = GAN(**params)

#plot_model(model, to_file='progress/%s/model.png' % params["tag"], show_shapes=True, show_layer_names=True)
gan.discriminator.summary()
gan.generator.summary()

{'width_disc': 0, 'ntest_samples': 10000, 'optimizer_disc': 'adadelta', 'output_size': 19, 'terminate_early': False, 'do_batch_normalization_disc': False, 'use_delphes': True, 'nepochs_dump_plots': 500, 'use_ptetaphi_additionally': False, 'do_noisy_labels': False, 'tag': 'v2_batch512_bgbd_mllANDwidth_NonTC_newdata_mllfix', 'nepochs_dump_pred_metrics': 250, 'do_batch_normalization_gen': False, 'add_invmass_disc': False, 'width_gen': 0, 'fix_delphes_outputs': False, 'loss_type': 'force_z_width', 'nepochs_dump_models': 500, 'input_file': '/home/users/bhashemi/Projects/GIT/DY-GAN/delphes/total_Zmumu_13TeV_PU20_v2.npa', 'noise_type': 1, 'scaler_type': '', 'batch_size': 512, 'do_concatenate_disc': False, 'do_soft_labels': False, 'depth_gen': 0, 'noise_size': 19, 'loss_mll_weight': 0.01, 'nepochs_max': 100001, 'beefy_discriminator': True, 'depth_disc': 0, 'do_tanh_gen': False, 'do_skip_connection': False, 'use_mll_loss': True, 'beefy_generator': True, 'nepochs_decay_noisy_labels': 2000, 'opti

In [None]:
gan.train()


250 [D loss: 0.291139632463, acc.: 0.00%] [G loss: 5.59598112106] [mll=-1.000+--1.000]]
500 [D loss: 0.471023768187, acc.: 13.09%] [G loss: 4.49681091309] [mll=57.928+-9.440]




Stats Score: 1650.143745
Ks_2sampResult(statistic=0.8630215218041601, pvalue=0.0)
750 [D loss: 0.380550742149, acc.: 1.17%] [G loss: 3.90091371536] [mll=58.557+-15.874]
1000 [D loss: 0.638082683086, acc.: 0.00%] [G loss: 3.11517500877] [mll=76.493+-13.457]
Stats Score: 2298.838124
Ks_2sampResult(statistic=0.7075617121816807, pvalue=0.0)
1250 [D loss: 0.390109419823, acc.: 0.00%] [G loss: 2.35288405418] [mll=100.652+-7.433]]
1500 [D loss: 0.317288219929, acc.: 0.00%] [G loss: 2.18847203255] [mll=95.448+-8.088]
Stats Score: 6898.866151
Ks_2sampResult(statistic=0.8208325171517304, pvalue=0.0)
1750 [D loss: 0.756548643112, acc.: 0.00%] [G loss: 2.10182547569] [mll=113.001+-14.478]
2000 [D loss: 0.266668528318, acc.: 0.00%] [G loss: 2.08871102333] [mll=83.059+-10.640]
Stats Score: 4900.874002
Ks_2sampResult(statistic=0.5525932217528449, pvalue=0.0)
2250 [D loss: 0.759331524372, acc.: 0.00%] [G loss: 1.35732913017] [mll=84.142+-6.562]
2500 [D loss: 0.687050163746, acc.: 0.00%] [G loss: 1.138

16250 [D loss: 0.171397149563, acc.: 0.00%] [G loss: 3.89292907715] [mll=86.508+-6.922]
16500 [D loss: 0.203816205263, acc.: 0.20%] [G loss: 4.15406036377] [mll=94.473+-9.994]


In [None]:
# plot masses
plt.plot(gan.d_epochinfo["mass_sig"])
plt.plot(gan.d_epochinfo["mass_mu"])

In [None]:
# plot losses
plt.plot(gan.d_epochinfo["d_loss"])
plt.plot(gan.d_epochinfo["g_loss"])

In [None]:
# get noise, predict from it, and plot stuff. easy.

# You must load the config of the model you want into the gan first by running the block of code with the
# proper config settings or the loss function will be messed up here.

#tag = gan.tag
tag = "v2_512_bgbd_mllANDwidth_NonTC"
gan.load_data()
epoch=52000

print("progress/%s/gen_%d.weights" % (tag,epoch))
model = load_model("progress/%s/gen_%d.weights" % (tag,epoch), custom_objects={'loss_func': custom_loss(c="", loss_type=gan.loss_type)})
_, noise = gan.get_noise(50000)
# print noise
# print noise.shape
# print noise
preds = model.predict(noise)
print (preds-noise[:,0:19]).mean(axis=0)
cov_pred = np.cov(preds.T)
cov_real = np.cov(noise[:,0:19].T)
cov_diff = (cov_pred - cov_real)
print(cov_real[2,2], cov_pred[2,2], cov_diff[2,2])
print(cov_diff.shape)
print(cov_real)
print(cov_pred)
print(cov_diff)
# make_plots(noise,gan.data[:5000],title="epoch {}".format(3000))
make_plots(preds,gan.data[:5000],title="epoch {}".format(3000))

In [None]:
mfake = Minv(preds,nopy2=True)
mreal = Minv(noise[:,0:19],nopy2=True)
mreal = mreal[np.isfinite(mreal)]
mfake = mfake[np.isfinite(mfake)]
print mreal.mean(), mreal.std()
print mfake.mean(), mfake.std()
#print (mreal-mfake)[:100]
print(np.mean(noise[:,0]))
#_ = plt.hist(mreal-mfake,bins=np.linspace(-50,50,100))

_ = plt.hist((preds-noise[:,0:19])[:,18],bins=30)

In [None]:
## I need to add a step that computes the covariance matrix elements between the variables, but for some reason I'm
## struggling to understand how to write the function.
import itertools

mean = [0, 0]
cov = [[4, 2], [2, 5]]  # diagonal covariance

x, y = np.random.multivariate_normal(mean, cov, 5000).T
#z = np.random.multivariate_normal(mean, cov, 5000).T

#print(x,y)

def get_covariance(row1, row2):
    mean1 = np.mean(row1)
    mean2 = np.mean(row2)
    #print("row1: %s, \n mean1: %d, sum1: %d \n var1 = %d " % (row1, mean1, np.mean(row1), np.sum((row1 - mean1)*(row1 - mean1)) ) )
    #print("row2: %s, \n mean2: %d, sum2: %d \n var2 = %d " % (row2, mean2, np.mean(row2), np.sum(row2 - mean2)) )
    return (np.mean((row1 - mean1)*(row2-mean2)))

print(get_covariance(x,y))
np.cov(x,y)

In [None]:
bins = np.linspace(0,100,50)
_ = plt.hist(gan.data["met"][gan.data["nvtxs"] < 18],bins=bins,histtype="step", density=True, label="low PU")
_ = plt.hist(gan.data["met"][gan.data["nvtxs"] > 28],bins=bins,histtype="step", density=True, label="high PU")
plt.legend()

In [None]:
bins = np.linspace(0,100,50)
_ = plt.hist(preds[:,12][preds[:,7] < 18],bins=bins,histtype="step", density=True, label="low PU")
_ = plt.hist(preds[:,12][preds[:,7] > 28],bins=bins,histtype="step", density=True, label="high PU")
plt.legend()