# Higgs Example 

## Load modules

In [1]:
import os
import ROOT
from ROOT import TMVA
os.environ["KERAS_BACKEND"] = "tensorflow"

Welcome to JupyROOT 6.12/06


## I/O

In [2]:
ROOT.TMVA.Tools.Instance()
TMVA.PyMethodBase.PyInitialize();

outputFile = ROOT.TFile.Open("Higgs_ClassificationOutput_BDT.root", "RECREATE")

factory = ROOT.TMVA.Factory("TMVA_Higgs_Classification", outputFile,
                            "!V:ROC:!Silent:Color:!DrawProgressBar:AnalysisType=Classification" )

## Load Data and Features

In [3]:
loader = ROOT.TMVA.DataLoader("dataset")

loader.AddVariable("m_jj")
loader.AddVariable("m_jjj")
loader.AddVariable("m_lv")
loader.AddVariable("m_jlv")
loader.AddVariable("m_bb")
loader.AddVariable("m_wbb")
loader.AddVariable("m_wwbb")
loader.AddVariable("missing_energy_magnitude")
loader.AddVariable("jet1_btag")
loader.AddVariable("jet2_btag")
#loader.AddVariable("jet3_btag")
#loader.AddVariable("jet4_btag")
#loader.AddVariable("lepton_pT")
#loader.AddVariable("missing_energy_phi")

## Define Inputs

In [4]:
inputFileName = "data/Higgs_data.root"
#inputFileName = "data/prepare_higgs/higgs-dataset.root"

inputFile = ROOT.TFile.Open( inputFileName )

signalTree     = inputFile.Get("sig_tree")
backgroundTree = inputFile.Get("bkg_tree")
#signalTree     = inputFile.Get("TreeS")
#backgroundTree = inputFile.Get("TreeB")

signalWeight     = 1.0
backgroundWeight = 1.0
   
loader.AddSignalTree    ( signalTree,     signalWeight     )
loader.AddBackgroundTree( backgroundTree, backgroundWeight )

mycuts = ROOT.TCut("")  
mycutb = ROOT.TCut("") 

loader.PrepareTrainingAndTestTree( mycuts, mycutb,
#                                   "nTrain_Signal=9000:nTrain_Background=9000:SplitMode=Random:"
                                    "nTrain_Signal=9000:nTrain_Background=9000:SplitMode=Block:"
                                   "NormMode=NumEvents:!V" )

DataSetInfo              : [dataset] : Added class "Signal"
                         : Add Tree sig_tree of type Signal with 10000 events
DataSetInfo              : [dataset] : Added class "Background"
                         : Add Tree bkg_tree of type Background with 10000 events


## Book Algorithms (Boosted Decisions Trees, Random Forest)

In [5]:
## TMVA Boosted Decision Trees
factory.BookMethod(loader,ROOT.TMVA.Types.kBDT, "BDT", "!V:NTrees=200:MinNodeSize=5%:MaxDepth=7:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" )

## Scikit-learn Gradient-Boosted Trees
#factory.BookMethod(loader, ROOT.TMVA.Types.kPyGTB, "PyGTB","H:!V:VarTransform=G:NEstimators=400:LearningRate=0.1:"
#                                                  "MaxDepth=3")
## Scikit-learn Random Forest
#factory.BookMethod(loader, ROOT.TMVA.Types.kPyRandomForest, "PyRandomForest","!V:VarTransform=G:NEstimators=400:"#
#                           "Criterion=gini:MaxFeatures=auto:MaxDepth=6:MinSamplesLeaf=3:MinWeightFractionLeaf=0:"
#                            "Bootstrap=kTRUE" )
## Scikit-learn AdaBoosted Trees      
#factory.BookMethod(loader, ROOT.TMVA.Types.kPyAdaBoost, "PyAdaBoost","!V:VarTransform=G:NEstimators=400" )


<ROOT.TMVA::MethodBDT object ("BDT") at 0x71ba0f0>

Factory                  : Booking method: [1mBDT[0m
                         : 
DataSetFactory           : [dataset] : Number of events in input trees
                         : 
                         : 
                         : Number of training and testing events
                         : ---------------------------------------------------------------------------
                         : Signal     -- training events            : 9000
                         : Signal     -- testing events             : 1000
                         : Signal     -- training and testing events: 10000
                         : Background -- training events            : 9000
                         : Background -- testing events             : 1000
                         : Background -- training and testing events: 10000
                         : 
DataSetInfo              : Correlation matrix (Signal):
                         : -----------------------------------------------------------

## Book Algorithms (Neural Networks)

In [6]:
#from keras.models import Sequential
#from keras.optimizers import Adam, SGD
#from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Reshape

In [7]:
# Define model
#model = Sequential()
#model.add(Dense(64, kernel_initializer='glorot_normal', activation='tanh', input_dim=7))
#model.add(Dense(64, kernel_initializer='glorot_normal', activation='tanh'))
#model.add(Dense(64, kernel_initializer='glorot_normal', activation='tanh'))
#model.add(Dense(2, kernel_initializer='glorot_uniform', activation='softmax'))
#model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['categorical_accuracy',])
#model.save('model_dense.h5')
#model.summary()

In [8]:
#factory.BookMethod(loader, ROOT.TMVA.Types.kPyKeras, 'Keras_Dense',
#                   'H:!V:VarTransform=G:FilenameModel=./model_dense.h5:'+\
#                   'NumEpochs=30:BatchSize=32:TriesEarlyStopping=10')

## Train Methods

In [9]:
factory.TrainAllMethods();

Factory                  : [1mTrain all methods[0m
Factory                  : [dataset] : Create Transformation "I" with events from all classes.
                         : 
                         : Transformation, Variable selection : 
                         : Input : variable 'm_jj' <---> Output : variable 'm_jj'
                         : Input : variable 'm_jjj' <---> Output : variable 'm_jjj'
                         : Input : variable 'm_lv' <---> Output : variable 'm_lv'
                         : Input : variable 'm_jlv' <---> Output : variable 'm_jlv'
                         : Input : variable 'm_bb' <---> Output : variable 'm_bb'
                         : Input : variable 'm_wbb' <---> Output : variable 'm_wbb'
                         : Input : variable 'm_wwbb' <---> Output : variable 'm_wwbb'
                         : Input : variable 'missing_energy_magnitude' <---> Output : variable 'missing_energy_magnitude'
                         : Input : variable 'jet1_bta

## Test  Methods

In [10]:
factory.TestAllMethods();   

Factory                  : [1mTest all methods[0m
Factory                  : Test method: BDT for Classification performance
                         : 
BDT                      : [dataset] : Evaluation of BDT on testing sample (2000 events)
                         : Elapsed time for evaluation of 2000 events: 0.0387 sec       


## Evaluate Methods

In [11]:
factory.EvaluateAllMethods();

Factory                  : [1mEvaluate all methods[0m
Factory                  : Evaluate classifier: BDT
                         : 
BDT                      : [dataset] : Loop over test events and fill histograms with classifier response...
                         : 
TFHandler_BDT            :                 Variable                        Mean                        RMS                [        Min                        Max ]
                         : -------------------------------------------------------------------------------------------------------------------------------------------
                         :                     m_jj:                    1.0406                   0.71664   [                   0.15013                    17.681 ]
                         :                    m_jjj:                    1.0353                   0.39949   [                   0.48774                    8.8785 ]
                         :                     m_lv:                  

## Plot ROC Curve

In [12]:
%jsroot on
c1 = factory.GetROCCurve(loader);
c1.Draw();