# Training of classifier with TMVA

## import necessary libraries 

In [1]:
import ROOT

Welcome to JupyROOT 6.15/02


In [2]:
from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

In [3]:
from ROOT import TMVA
%jsroot on

### Define training variables

In [4]:
InputVar = [
    "max_eta"              ,
    "lep_Pt_1"             ,
    "Mll01"                , 
    "minDeltaR_LJ_0"       ,
    "minDeltaR_LJ_1"       ,
    "MET_RefFinal_et"      ,
    "nJets_OR_T"           ,
    "nJets_OR_T_MV2c10_70" ,
    "lep_flavour"          
]

### Create instance of TMVA factory
see TMVA/macros/TMVAClassification.C for more factory options

 some examples of options for the factory
"!V:ROC:!Correlations:!Silent:Color:!DrawProgressBar:AnalysisType=Classification" ); 



In [5]:
TMVA.Tools.Instance()

<ROOT.TMVA::Tools object at 0x7f854c5343c0>

In [6]:
outputFile = TFile.Open("TMVAOutputCV.root", "RECREATE");

In [7]:
factory = TMVA.Factory( "TMVAClassification", outputFile, 
                             "!V:ROC:!Correlations:!Silent:Color:!DrawProgressBar:AnalysisType=Classification")        

In [8]:
dataloader = TMVA.DataLoader("dataset")

Define the input variables that shall be used for the classifier training

In [9]:
InputVarSize = len(InputVar);
#print(InputVarSize)
for ivar in range(len(InputVar)):
    #print(InputVar[i])
    dataloader.AddVariable(InputVar[ivar],'F');

Load signal and background files to training factory

In [10]:
signal = ROOT.TChain("nominal")
signal.Add("../../Files/skimmed/ttH.root")


1

In [12]:
for branch in signal.GetListOfBranches():
    print(branch)

Name: DEtall01 Title: DEtall01/F
Name: lep_flavour Title: lep_flavour/I
Name: max_eta Title: max_eta/F
Name: minDeltaR_LJ_0 Title: minDeltaR_LJ_0/F
Name: minDeltaR_LJ_1 Title: minDeltaR_LJ_1/F
Name: Meff Title: Meff/F
Name: DRlj00 Title: DRlj00/F
Name: min_DRl0b Title: min_DRl0b/F
Name: min_DRlj_new Title: min_DRlj_new/F
Name: DPhij0MET Title: DPhij0MET/F
Name: lead_BjetPt Title: lead_BjetPt/F
Name: lead_jet_sumBEff Title: lead_jet_sumBEff/F
Name: sublead_jet_sumBEff Title: sublead_jet_sumBEff/F
Name: scale_nom Title: scale_nom/F
Name: pileupEventWeight_090 Title: pileupEventWeight_090/D
Name: MV2c10_70_EventWeight Title: MV2c10_70_EventWeight/D
Name: JVT_EventWeight Title: JVT_EventWeight/D
Name: SherpaNJetWeight Title: SherpaNJetWeight/D
Name: EventNumber Title: EventNumber/l
Name: RunYear Title: RunYear/I
Name: MET_RefFinal_et Title: MET_RefFinal_et/F
Name: lep_Pt_0 Title: lep_Pt_0/F
Name: lep_Eta_0 Title: lep_Eta_0/F
Name: lep_Phi_0 Title: lep_Phi_0/F
Name: lep_Pt_1 Title: lep_Pt_1

In [14]:
background = ROOT.TChain("nominal");
background.Add("../../Files/skimmed/ttW.root");
background.GetListOfBranches()

<ROOT.TObjArray object ("TObjArray") at 0x7f854c7cb378>

In [15]:
signalWeight     = 1.0
backgroundWeight = 1.0

register trees

In [17]:
dataloader.AddSignalTree    ( signal,     signalWeight     )
dataloader.AddBackgroundTree( background, backgroundWeight )

DataSetInfo              : [dataset] : Added class "Signal"
                         : Add Tree nominal of type Signal with 69902 events
                         : Add Tree nominal of type Background with 86754 events


In [18]:
# Apply additional cuts on the signal and background sample. 
mycutSig = TCut( "" ) 
mycutBkg = TCut( "" ) 
# <- keep empty as samples were specifically prepared during slimming: 
# https://github.com/grevtsovkirill/tthml_perf/blob/master/skim/GN2_light/ugly_2lss_sel.cpp#L3810


In [19]:
dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

In [None]:
#tth default:
# Method_Opt = "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:
# UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2";
# Boosted Decision Trees 
#  Gradient Boost
factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG",
                           "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );
