In [1]:
import ROOT
from rootpy.tree import Tree, TreeModel, FloatCol, IntCol
from rootpy.io import root_open
from ROOT import gROOT, TCanvas, TF1, TFile, TTree, gRandom, TH1F

from ROOT import RooRealVar, RooFormulaVar, RooVoigtian, RooChebychev, RooArgList, \
                 RooArgSet, RooAddPdf, RooDataSet, RooCategory, RooSimultaneous, \
                 RooBreitWigner, RooCBShape, RooFFTConvPdf, RooGaussian,RooExponential, \
                 RooBinning, kRed, kBlue, kDotted,TString,RooAbsData, RooPlot, TCut, RooAbsData

In [2]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

import os, sys, time, random

from ROOT import TTree, TFile

# from root_numpy import root2array, rec2array, array2root

import pandas as pd
import numpy as np
import scipy 
import root_pandas as rp
import root_numpy as ry 

import pandas.core.common as com
from pandas.core.index import Index
from pandas.tools import plotting
from pandas.tools.plotting import scatter_matrix

from tqdm import tqdm_notebook

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import Imputer, StandardScaler
from sklearn.externals import joblib
from sklearn.metrics import classification_report, roc_curve, auc, roc_auc_score

sys.path.append('/home/chasenberg/repos/')
sys.path.append('/home/chasenberg/repos/dopy')
from dopy import * 
from dopy.dolearn.sklearn_utils import plot_roc_curve, plot_classifier_output, plot_correlations
from dopy.dolearn.sklearn_utils import plot_feature_importances, plot_classifier_output, classify_unseen_data
#from dopy.sklearn_utils import plot_bdt_vars
from dopy.doplot.plotting import Plotter, Plot
from dopy.doanalysis.df_utils import add_min_max, add_eta 



### State what should be done

In [18]:
create_sanity_tuple = False
create_bestPV_tuple = False
create_randomSel_tuple = True
create_l0veto_tuple = False
do_sanity_massfit = False
do_bestPV_massfit = True 
do_randomPV_massfit = True 

### Define simple massmodel

In [5]:
def massfit(data_dir,file,info_file):
    import ROOT
    from ROOT import RooFit
    #Initialize variables
    B0_M = RooRealVar("B0_FitDaughtersConst_M", "B0_M", 5280, "MeV")
    upper_limit_mass = 5450
    lower_limit_mass = 5220
    mass = RooRealVar("B0_FitDaughtersConst_M", "Mass(J/psi K_{S})", 5280,lower_limit_mass, upper_limit_mass, "MeV")
    # Construct signal from two Gaussian pdf's with different means and sigmas 
    mean = RooRealVar("mean", "mean", 5280,  5270,5290)
    sigma_1 = RooRealVar("sigma_1", "sigma_1", 10, 0, 30)
    sigma_2 = RooRealVar("sigma_2", "sigma_2", 13, 0, 30)
    sig1frac = RooRealVar("sig1frac","fraction of component 1 in signal",0.5,0.,1.)

    signal_1 = RooGaussian("signal_1", "signal_1", mass, mean, sigma_1)
    signal_2 = RooGaussian("signal_2", "signal_2", mass, mean, sigma_2)
    signal = RooAddPdf("signal","signal",signal_1, signal_2,sig1frac)#,RooArgList(mean,sigma_1, sigma_2)) 
    # Construct background pdfd
    lambda_1= RooRealVar("lambda","lambda",0.0,-0.2,0.0)
    background = RooExponential("background","background",mass,lambda_1)
    # Construct composite pdf
    nsig = RooRealVar("nsig", "nsig", 557050,0,6000000)#, 0, 100000)
    nbkg = RooRealVar("nbkg", "nbkg", 7079500, 0, 8000000)
    model = RooAddPdf("model", "model", RooArgList(signal, background), RooArgList(nsig, nbkg))
    #Create dataset and fit
    data = ROOT.TFile(data_dir+file)
    tree_data = data.Get('Bd2JpsiKS') 
    tree_data.GetEntries()
    print('---------------------')
    print('Entries found in tree:')
    print(tree_data.GetEntries())
    print('---------------------')
    ntupleVarSet =  RooArgSet(mass)
    dataset = RooDataSet('data','data',tree_data,ntupleVarSet)
    #Fit model to dataset
    model.fitTo(dataset, RooFit.NumCPU(6),
            RooFit.Minimizer('Minuit','minimize'),
            RooFit.Hesse(True),
            RooFit.Optimize(0),
            RooFit.PrintEvalErrors(0),
            RooFit.Save(True),
            RooFit.Extended(),
            RooFit.Verbose(True)) 
    sigyields  = str(nsig.getValV())
    bkgyields = str(nbkg.getValV())
    size = str(tree_data.GetEntries())
    file_sanity = open(data_dir+info_file,'w')  
    file_sanity.write("----------------------------------------")
    file_sanity.write('The fit results are:\n') 
    file_sanity.write('Signal yields:'+sigyields+'\n')
    file_sanity.write('Background yields:'+bkgyields+'\n')
    file_sanity.write('The file has'+size+'entries\n')
    file_sanity.write('----------------------------------------')
    # Prepare frame
    print('Signal yields:'+sigyields)
    print('Background yields:'+bkgyields)
    %matplotlib inline  
    import ROOT
    frame = mass.frame(ROOT.RooFit.Bins(25))
    dataset.plotOn(frame, ROOT.RooFit.Name("data1"))
    #model.plotOn( frame , ROOT.RooFit.Components("signal"), ROOT.RooFit.LineStyle(kDotted), ROOT.RooFit.LineColor(kRed))
    model.plotOn( frame , ROOT.RooFit.Components("background"), ROOT.RooFit.LineStyle(kDotted), ROOT.RooFit.LineColor(kBlue))
    model.plotOn(frame, ROOT.RooFit.Name("fit"))
    ### Add additional informations to the plot
    text_size = 0.035
    # Create TLegend
    legend = ROOT.TLegend(0.7, 0.75, 0.9, 0.9, '')
    legend.AddEntry(frame.findObject('data1'), "Data points", "p");
    legend.AddEntry(frame.findObject('fit'), 'Fit', 'l')
    legend.SetTextSize(text_size)
    # Plot pulls
    #can, _ = plot_pulls('test', frame, legend=legend)#, latex=latex)
    #can.SaveAs("/home/chasenberg/repos/b2cc_sin2beta/notebooks/mass_fit/plots/B0_Mass_bdtcut.pdf")
    #can  # To display plot in notebooks

### All branches that will be considered in the analysis

In [6]:
variables = [
'B0_M',
'B0_TAGDECISION_OS',
'B0_TAGOMEGA_OS',    
'B0_TAU',
'B0_TAUERR',
'B0_FitDaughtersConst_M',
'B0_FitDaughtersConst_chi2',
'B0_FitDaughtersConst_IPCHI2',
'B0_FitDaughtersConst_J_psi_1S_IP',   
'B0_FitDaughtersConst_KS0_P1_PT',
'B0_FitDaughtersConst_KS0_P0_PT',  
'B0_FitDaughtersConst_KS0_decayLength',
'B0_FitDaughtersConst_KS0_IP',
'B0_FitDaughtersConst_KS0_P0_IPCHI2', 
'B0_FitDaughtersConst_J_psi_1S_IPCHI2',
'idxPV',
'piplus_TRACK_Type',
'B0_FitPVConst_status',
'B0_FitDaughtersConst_KS0_P1_PT', 
'B0_FitDaughtersConst_KS0_P0_PT',
'B0_FitDaughtersConst_KS0_P0_IPCHI2', 
'B0_FitDaughtersConst_KS0_P1_IPCHI2',
'B0_FitDaughtersConst_J_psi_1S_P0_PT', 
'B0_FitDaughtersConst_J_psi_1S_P1_PT',
'B0_FitPVConst_MinIPCHI2anyPV',
'B0_FitPVConst_KS0_tau',
'B0_FitPVConst_KS0_tauErr',
'B0_FitPVConst_IPCHI2',
'B0_FitPVConst_PV_X_flat',
'B0_FitPVConst_PV_XVAR_flat',
'B0_FitPVConst_PV_Y_flat',
'B0_FitPVConst_PV_YVAR_flat',
'B0_FitPVConst_PV_Z_flat',
'B0_FitPVConst_PV_ZVAR_flat',
'eventNumber',
'runNumber'
]

### Directories and files

In [7]:
#Names of the produced tuples
data_dir_2015_2016 = '/fhgfs/users/chasenberg/data/2015_2016_merged/jpsimumuks/'
data_sanity_cuts = 'Bd2JpsiKS_sanity.root'
data_bestPV = 'Bd2JpsiKS_bestPV.root'
data_random = 'Bd2JpsiKS_random.root'

In [8]:
sanity_cuts = 'B0_FitDaughtersConst_status==0&B0_FitPVConst_status==0&(B0_L0MuonDecision_Dec==1|B0_L0DiMuonDecision_Dec==1|B0_L0MuonHighDecision_Dec==1)&(B0_Hlt1DiMuonHighMassDecision_Dec==1|B0_Hlt1TrackMVADecision_Dec==1)&(B0_Hlt1DiMuonHighMassDecision_Dec==1|B0_Hlt1TrackMVADecision_Dec==1)&(B0_Hlt2DiMuonJPsiDecision_Dec==1|B0_Hlt2DiMuonDetachedJPsiDecision_Dec==1)'
tree_data ='Bd2JpsiKs'

In [8]:
if create_sanity_tuple==True:
    #directories and files for 2015
    data_file_2015 = 'Bd2JpsimumuKS_data_2015_flat.root'
    data_dir_2015 = '/fhgfs/users/chasenberg/data/2015/jpsimumuks/'
    data_2015 = data_dir_2015 + data_file_2015
    #read from ROOT-file
    df_2015 = rp.read_root(data_2015,key=tree_data, columns=variables,where=sanity_cuts, flatten=True)
    df_2015 = df_2015.replace([np.inf, -np.inf], np.nan)
    df_2015 = df_2015.dropna()
    #directories and files for 2015
    data_file_2016 = 'Bd2JpsimumuKS_data_2016_flat.root'
    data_dir_2016 = '/fhgfs/users/chasenberg/data/2016/jpsimumuks/'
    data_2016 = data_dir_2016 + data_file_2016
    #read from ROOT-file
    df_2016 = rp.read_root(data_2016,key=tree_data, columns=variables,where=sanity_cuts, flatten=True)
    df_2016 = df_2016.replace([np.inf, -np.inf], np.nan)
    df_2016 = df_2016.dropna()
    df_merged = pd.concat([df_2015,df_2016])
    data_dir_2015_2016 = '/fhgfs/users/chasenberg/data/2015_2016_merged/jpsimumuks/'
    file_name = 'info.txt'
    file = open(data_dir_2015_2016+file_name,'w')  
    file.write("----------------------------------------")
    file.write('The file'+data_sanity_cuts+'has got the following cuts:\n') 
    file.write(sanity_cuts)
    file.write('No other selection has been applied yet.\n')
    size = str(df_merged.shape[0])
    file.write('The file has'+size+'entries\n')
    file.write('----------------------------------------')
    #Calculate random variable for random selection
    np.random.seed(42)
    df_merged['idxRandom'] = np.random.choice(2**30, df_merged.shape[0])  
    df_merged['idxEventNumber'] = df_merged['eventNumber'] 
    df_merged['idxRunNumber'] = df_merged['runNumber'] 
    df_merged.to_root(data_dir_2015_2016+data_sanity_cuts,key='Bd2JpsiKS')

### Fit the reconstructed B0 mass after applying sanity cuts

In [9]:
if do_sanity_massfit==True:
    import ROOT
    from ROOT import RooFit
        #Initialize variables
    B0_M = RooRealVar("B0_FitDaughtersConst_M", "B0_M", 5280, "MeV")
    upper_limit_mass = 5450
    lower_limit_mass = 5220
    mass = RooRealVar("B0_FitDaughtersConst_M", "Mass(J/psi K_{S})", 5280,lower_limit_mass, upper_limit_mass, "MeV")
    # Construct signal from two Gaussian pdf's with different means and sigmas 
    mean = RooRealVar("mean", "mean", 5280,  5260,5300)
    sigma_1 = RooRealVar("sigma_1", "sigma_1", 10, 0, 30)
    sigma_2 = RooRealVar("sigma_2", "sigma_2", 13, 0, 30)
    sig1frac = RooRealVar("sig1frac","fraction of component 1 in signal",0.5,0.,1.)

    signal_1 = RooGaussian("signal_1", "signal_1", mass, mean, sigma_1)
    signal_2 = RooGaussian("signal_2", "signal_2", mass, mean, sigma_2)
    signal = RooAddPdf("signal","signal",signal_1, signal_2,sig1frac)#,RooArgList(mean,sigma_1, sigma_2)) 
    # Construct background pdfd
    lambda_1= RooRealVar("lambda","lambda",0.0,-0.2,0.0)
    background = RooExponential("background","background",mass,lambda_1)
    # Construct composite pdf
    nsig = RooRealVar("nsig", "nsig", 557050,0,6000000)#, 0, 100000)
    nbkg = RooRealVar("nbkg", "nbkg", 7079500, 0, 8000000)
    model = RooAddPdf("model", "model", RooArgList(signal, background), RooArgList(nsig, nbkg))
    #Create dataset and fit
    data = ROOT.TFile(data_dir_2015_2016+data_sanity_cuts)
    tree_data = data.Get('Bd2JpsiKS') 
    tree_data.GetEntries()
    print('---------------------')
    print('Entries found in tree:')
    print(tree_data.GetEntries())
    print('---------------------')
    ntupleVarSet =  RooArgSet(mass)
    dataset = RooDataSet('data','data',tree_data,ntupleVarSet)
    #Fit model to dataset
    model.fitTo(dataset, RooFit.NumCPU(6),
        RooFit.Minimizer('Minuit','minimize'),
        RooFit.Hesse(True),
        RooFit.Optimize(0),
        RooFit.PrintEvalErrors(0),
        RooFit.Save(True),
        RooFit.Extended(),
        RooFit.Verbose(True)) 
    sigyields  = str(nsig.getValV())
    bkgyields = str(nbkg.getValV())
    size = str(tree_data.GetEntries())
    file_name = 'fit_results_sanity.txt'
    file_sanity = open(data_dir_2015_2016+file_name,'w')  
    #file_sanity.write("----------------------------------------")
    file_sanity.write('The fit results are:\n') 
    file_sanity.write('Signal yields: '+sigyields+'\n')
    file_sanity.write('Background yields: '+bkgyields+'\n')
    file_sanity.write('The file has '+size+' entries\n')
    #file_sanity.write('----------------------------------------')
    # Prepare frame
    print('Signal yields:'+sigyields)
    print('Background yields:'+bkgyields)
    %matplotlib inline  
    import ROOT
    frame = mass.frame(ROOT.RooFit.Bins(25))
    dataset.plotOn(frame, ROOT.RooFit.Name("data1"))
    #model.plotOn( frame , ROOT.RooFit.Components("signal"), ROOT.RooFit.LineStyle(kDotted), ROOT.RooFit.LineColor(kRed))
    model.plotOn( frame , ROOT.RooFit.Components("background"), ROOT.RooFit.LineStyle(kDotted), ROOT.RooFit.LineColor(kBlue))
    model.plotOn(frame, ROOT.RooFit.Name("fit"))
    ### Add additional informations to the plot
    text_size = 0.035
    # Create TLegend
    legend = ROOT.TLegend(0.7, 0.75, 0.9, 0.9, '')
    legend.AddEntry(frame.findObject('data1'), "Data points", "p");
    legend.AddEntry(frame.findObject('fit'), 'Fit', 'l')
    legend.SetTextSize(text_size)
    # Plot pulls
    #can, _ = plot_pulls('test', frame, legend=legend)#, latex=latex)
    #can.SaveAs("/home/chasenberg/repos/b2cc_sin2beta/notebooks/mass_fit/plots/B0_Mass_bdtcut.pdf")
    #can  # To display plot in notebooks

---------------------
Entries found in tree:
12840451
---------------------
Signal yields:556887.7491554942
Background yields:7079696.841523397


## Compare best PV Selection to Random Selection

In [10]:
if create_bestPV_tuple==True:
    df_bestPV = df_merged.query('idxPV==0')
    df_bestPV.to_root(data_dir_2015_2016+data_bestPV,key='Bd2JpsiKS')

### Fit the reconstructed B0 mass after applying idxPV cut

In [9]:
if do_bestPV_massfit==True:
    info_file = 'fitresult_bestPV.txt'
    massfit(data_dir_2015_2016,data_bestPV,info_file)

---------------------
Entries found in tree:
5175057
---------------------
Signal yields:251451.07562274017
Background yields:2838718.4372890815


### Create tuple with random selected events

In [27]:
import os, subprocess
if create_randomSel_tuple==True:
    print('INFO: Call CandidateSelectionGrimReaper', flush=True)
    my_env = os.environ.copy()
    my_env['PATH'] = '/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin:/home/chasenberg/bin'
    my_env['LD_LIBRARY_PATH'] = '/usr/local/lib'
    my_env['script'] = '/home/chasenberg/repos/b2cc_sin2beta_run2/notebooks/selection'
    print("Start SingleCutGrimReaper!")
    my_command = 'source /lhcbsoft/LHCbSoftwareSetup.sh &&' \
             'lb-run DaVinci/v41r2 $BASH -c "' \
             'echo $PATH && ' \
             'source /doosoft/InstallDooSoftware/LoadDooSoftware && SingleCutGrimReaper /fhgfs/users/chasenberg/data/2015_2016_merged/jpsimumuks/Bd2JpsiKS_random.root Bd2JpsiKS /fhgfs/users/chasenberg/data/2015_2016_merged/jpsimumuks/Bd2JpsiKS_random_minIPCUT.root Bd2JpsiKS "B0_FitPVConst_MinIPCHI2anyPV>6""'                     
    subprocess.Popen([my_command], env=my_env, shell=True)
   # subprocess.call(('bash /home/chasenberg/repos/b2cc_sin2beta_run2/notebooks/selection/random_sel.sh'),shell=True)                      
    print("Wait for GrimReaper")
    time.sleep(120.0)    # pause 5.5 seconds
    print("GrimReaper hopefully finished")
    my_command = 'source /lhcbsoft/LHCbSoftwareSetup.sh &&' \
             'lb-run DaVinci/v41r2 $BASH -c "' \
             'echo $PATH && ' \
             'source /doosoft/InstallDooSoftware/LoadDooSoftware && CandidateSelectionGrimReaper /fhgfs/users/chasenberg/data/2015_2016_merged/jpsimumuks/Bd2JpsiKS_sanity.root Bd2JpsiKS /fhgfs/users/chasenberg/data/2015_2016_merged/jpsimumuks/Bd2JpsiKS_random.root Bd2JpsiKS "idxRandom""'                     
    subprocess.Popen([my_command], env=my_env, shell=True)
    print("Wait for GrimReaper")
    time.sleep(120.0)    # pause 5.5 seconds
    print("GrimReaper hopefully finished")

INFO: Call CandidateSelectionGrimReaper
Start SingleCutGrimReaper!
Wait for GrimReaper
GrimReaper hopefully finished
Wait for GrimReaper
GrimReaper hopefully finished


### Fit the reconstructed B0 mass after applying random selection

In [28]:
if do_randomPV_massfit==True:
    info_file = 'fitresult_randomPV.txt'
    massfit(data_dir_2015_2016,data_random,info_file)

---------------------
Entries found in tree:
2526479
---------------------
Signal yields:139955.00780188807
Background yields:1375378.9780159793


# Apply L0 veto

### Run VariablesGrimReaper to calculate L0 veto mass

In [None]:
if create_l0veto_tuple==True:
    data_veto = Bd2JpsiKS_veto.root
    print('INFO: Call CandidateSelectionGrimReaper', flush=True)
    my_env = os.environ.copy()
    my_env['PATH'] = '/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin:/home/chasenberg/bin'
    my_env['LD_LIBRARY_PATH'] = '/usr/local/lib'
    my_env['script'] = '/home/chasenberg/repos/b2cc_sin2beta_run2/notebooks/selection'
    my_command = 'source /lhcbsoft/LHCbSoftwareSetup.sh &&' \
             'lb-run DaVinci/v41r2 $BASH -c "' \
             'echo $PATH && ' \
             'source /doosoft/InstallDooSoftware/LoadDooSoftware && CandidateSelectionGrimReaper /fhgfs/users/chasenberg/mc/2015_2016_merged/jpsimumuks/Bd2JpsiKS_sanity.root Bd2JpsiKS /fhgfs/users/chasenberg/mc/2015_2016_merged/jpsimumuks/Bd2JpsiKS_random.root Bd2JpsiKS "idxRunNumber""'                     
    subprocess.Popen([my_command], env=my_env, shell=True)
   # subprocess.call(('bash /home/chasenberg/repos/b2cc_sin2beta_run2/notebooks/selection/random_sel.sh'),shell=True)                      
    print("Wait for GrimReaper")
    time.sleep(40.0)    # pause 5.5 seconds
    print("GrimReaper hopefully finished")

In [None]:
l0_veto = '((abs(1115.683-varLambda0MassHypo_ppluspiminus)>10)|piplus_ProbNNp<0.01)&((abs(1115.683-varLambda0MassHypo_pminuspiplus)>10)|(piminus_ProbNNp<0.01))'