# Training of classifier with TMVA

## import necessary libraries 

In [1]:
import ROOT

Welcome to JupyROOT 6.15/02


In [2]:
from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TCanvas, TGraph

In [3]:
from ROOT import TMVA
%jsroot on

### Define training variables

In [4]:
InputVar = [
    "max_eta"              ,
    "lep_Pt_1"             ,
    "Mll01"                , 
    "minDeltaR_LJ_0"       ,
    "minDeltaR_LJ_1"       ,
    "MET_RefFinal_et"      ,
    "nJets_OR_T"           ,
    "nJets_OR_T_MV2c10_70" ,
    "lep_flavour"          
]

### Create instance of TMVA factory
see TMVA/macros/TMVAClassification.C for more factory options

 some examples of options for the factory
"!V:ROC:!Correlations:!Silent:Color:!DrawProgressBar:AnalysisType=Classification" ); 



In [5]:
TMVA.Tools.Instance()

<ROOT.TMVA::Tools object at 0x7f871ad2af70>

In [6]:
outputFile = TFile.Open("TMVAOutputCV.root", "RECREATE");

In [7]:
factory = TMVA.Factory( "TMVAClassification", outputFile, 
                             "!V:ROC:!Correlations:!Silent:Color:!DrawProgressBar:AnalysisType=Classification")        

In [8]:
dataloader = TMVA.DataLoader("dataset")
# as well name for output folder with relevant plots

Define the input variables that shall be used for the classifier training

In [9]:
InputVarSize = len(InputVar);
#print(InputVarSize)
for ivar in range(len(InputVar)):
    #print(InputVar[i])
    dataloader.AddVariable(InputVar[ivar],'F');

Load signal and background files to training factory

In [10]:
signal = ROOT.TChain("nominal")
signal.Add("../../Files/skimmed/ttH.root")

1

In [None]:
for branch in signal.GetListOfBranches():
    print(branch)

In [11]:
background = ROOT.TChain("nominal")
background.Add("../../Files/skimmed/ttW.root")
#background.GetListOfBranches()

1

In [12]:
signalWeight     = 1.0
backgroundWeight = 1.0

register trees

In [13]:
dataloader.AddSignalTree    ( signal,     signalWeight     )
dataloader.AddBackgroundTree( background, backgroundWeight )

DataSetInfo              : [dataset] : Added class "Signal"
                         : Add Tree nominal of type Signal with 69902 events
DataSetInfo              : [dataset] : Added class "Background"
                         : Add Tree nominal of type Background with 86754 events


In [14]:
# Apply additional cuts on the signal and background sample. 
mycutSig = TCut( "" ) 
mycutBkg = TCut( "" ) 
# <- keep empty as samples were specifically prepared during slimming: 
# https://github.com/grevtsovkirill/tthml_perf/blob/master/skim/GN2_light/ugly_2lss_sel.cpp#L3810


In [15]:
dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

 https://root.cern.ch/root/htmldoc/guides/tmva/TMVAUsersGuide.pdf page 22  methods definitions

In [16]:
# Boosted Decision Trees, Gradient Boost from tutorials (https://github.com/root-project/root/blob/master/tutorials/tmva/TMVAClassification.C#L504)
factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT",
                   "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" )

#agrees to the tth default:
# Method_Opt = "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:
# UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2";

<ROOT.TMVA::MethodBDT object ("BDT") at 0x7f8717648000>

Factory                  : Booking method: [1mBDT[0m
                         : 
                         : the option NegWeightTreatment=InverseBoostNegWeights does not exist for BoostType=Grad
                         : --> change to new default NegWeightTreatment=Pray
DataSetFactory           : [dataset] : Number of events in input trees
                         : 
                         : 
                         : Number of training and testing events
                         : ---------------------------------------------------------------------------
                         : Signal     -- training events            : 34951
                         : Signal     -- testing events             : 34951
                         : Signal     -- training and testing events: 69902
                         : Background -- training events            : 43377
                         : Background -- testing events             : 43377
                         : Background -- training a

In [17]:
factory.BookMethod( dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

<ROOT.TMVA::MethodSVM object ("SVM") at 0x7f8718c1aa00>

Factory                  : Booking method: [1mSVM[0m
                         : 
SVM                      : [dataset] : Create Transformation "Norm" with events from all classes.
                         : 
                         : Transformation, Variable selection : 
                         : Input : variable 'max_eta' <---> Output : variable 'max_eta'
                         : Input : variable 'lep_Pt_1' <---> Output : variable 'lep_Pt_1'
                         : Input : variable 'Mll01' <---> Output : variable 'Mll01'
                         : Input : variable 'minDeltaR_LJ_0' <---> Output : variable 'minDeltaR_LJ_0'
                         : Input : variable 'minDeltaR_LJ_1' <---> Output : variable 'minDeltaR_LJ_1'
                         : Input : variable 'MET_RefFinal_et' <---> Output : variable 'MET_RefFinal_et'
                         : Input : variable 'nJets_OR_T' <---> Output : variable 'nJets_OR_T'
                         : Input : variable 'nJets_OR_T_MV2c10

In [None]:
# take too long
#factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit",
 #                          "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )


In [None]:
   # Train MVAs
factory.TrainAllMethods()

In [None]:
    # Test MVAs
factory.TestAllMethods()
    

In [None]:
    # Evaluate MVAs
factory.EvaluateAllMethods()    
    


In [None]:
    # Save the output.
outputFile.Close()

In [None]:
c2=factory.GetROCCurve(dataloader)
c2.Draw()

In [None]:
factory.GetROCIntegral(dataloader,"BDT")

In [None]:
factory.GetROCCurve (dataloader,"BDT")

In [None]:
c1= TCanvas( 'c1', 'A Simple Graph Example', 200, 10, 700, 500 )

In [None]:
gr.Draw( 'ACP' )
c1.Update()
c1.GetFrame().SetFillColor( 21 )
c1.GetFrame().SetBorderSize( 12 )
c1.Modified()
c1.Update()