In [373]:
print ("Kernel is Alive!")

Kernel is Alive!


In [374]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.tree import DecisionTreeClassifier

from root_numpy import root2array, tree2array

import itertools  

from hep_ml.speedup import LookupClassifier

### create data

In [375]:
data_dict = np.array([(1,1), (1,9), (9,1), (9,9)], dtype=[('x', '<f4'), ('y', '<f4')])
data = pd.DataFrame(data_dict, columns=['x', 'y'],  )
data.head()

Unnamed: 0,x,y
0,1,1
1,1,9
2,9,1
3,9,9


In [376]:
data["Label"] = [0,0,1,1]
target = data.Label
features = data.drop("Label", axis=1).astype(np.float64)
data.head()

Unnamed: 0,x,y,Label
0,1,1,0
1,1,9,0
2,9,1,1
3,9,9,1


In [377]:
base_classifier = DecisionTreeClassifier(max_depth = 1, criterion = 'entropy')

n_bins = 2

classifier = LookupClassifier(base_estimator=base_classifier, n_bins=n_bins, keep_trained_estimator=False)
classifier.fit(features, target)

LookupClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=1,
            max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            random_state=None, splitter='best'),
         keep_trained_estimator=False, max_cells=500000000, n_bins=2)

## Save BBDT into rootfile. 

In [378]:
from root_pandas import to_root

n_bins = 2
n_features = 2


all_bins_indices = np.dstack(np.meshgrid(range(n_bins), range(n_features))).reshape(-1, n_bins)# chrome: ujeb.se/38z6W
all_bins_indices_df = pd.DataFrame(all_bins_indices)

lookup_indices = classifier.convert_bins_to_lookup_index(all_bins_indices_df)
predictions = np.array(classifier._lookup_table[lookup_indices] [:,1])


combinated_array = np.c_[all_bins_indices, predictions]
tuple_clf_df = pd.DataFrame(combinated_array, columns=["x","y","pred"])
tuple_clf_df.convert_objects(convert_numeric=True)

to_root(tuple_clf_df, '../Source/tuple_classifer.root', key = 'tree')

In [379]:
tuple_clf_df.head()

Unnamed: 0,x,y,pred
0,0,0,0
1,1,0,1
2,0,1,0
3,1,1,1


# Generate C++ test class

In [381]:
def generate_header():
    return """    
    
#pragma once

#include <vector>
#include <map>
#include <TROOT.h>
#include <TFile.h>
#include <TTree.h>


class PatBBDTSeedClassifier 
{
public:
    PatBBDTSeedClassifier();

    void initialize();
    double getMvaValue(const std::vector<double>& parametersVector );
    std::vector<int> getBinIndices(const std::vector<double>& parametersVector);


private:
    //initialization phase
    void initBinEdgeMaps();
    void initTupleClassifier();

    // model prediction phase
    double getBBDTPrediction(const std::vector<int>& binIndices);


    std::vector <std::pair<std::string, std::vector<double>>> m_binsEdgeMap;
    std::map <std::vector<int>, double > m_tupleClassifier;


    // tree specific member fields

    // Declaration of leaf types
    std::vector<Double_t*> m_leafTypes;

    Double_t           x;
    Double_t           y;
    Double_t        pred;

    // List of branches
    TBranch        *b_x;   //!
    TBranch        *b_y;   //!
    TBranch        *b_pred;   //!
};


"""

In [382]:
def generate_source_file():
    return """

#include "PatBBDTSeedClassifier.h"
#include <iostream>

using namespace std;


double PatBBDTSeedClassifier::getMvaValue(const std::vector<double>& parametersVector )
{
    auto binIndices = getBinIndices(parametersVector);
    return getBBDTPrediction(binIndices);
}


PatBBDTSeedClassifier::PatBBDTSeedClassifier( )
{
    initialize();
}

void PatBBDTSeedClassifier::initialize()
{
    initBinEdgeMaps();
    initTupleClassifier();
}

std::vector<int> PatBBDTSeedClassifier::getBinIndices(const std::vector<double>& parametersVector)
{
    int actualFeature = 0;
    int binPerFeatures = 2;
    std::vector<int> binIndicesMap;
    for (const auto& featurePair: m_binsEdgeMap){
        int binNumber = 0;
        for(const auto& binValue : featurePair.second) {
            if (parametersVector[actualFeature] < binValue) {
                binIndicesMap.push_back(binNumber);
                break;
            }
            binNumber++;
            if(binNumber == binPerFeatures -1 ) binIndicesMap.push_back(binNumber);

        }
        actualFeature++;
    }
    return binIndicesMap;
}

double PatBBDTSeedClassifier::getBBDTPrediction(const std::vector<int>& binIndices)
{
    return m_tupleClassifier[binIndices];
}

void PatBBDTSeedClassifier::initTupleClassifier() {
    TTree *tree = 0;
    TFile *f = (TFile *) gROOT->GetListOfFiles()->FindObject("Source/tuple_classifer.root");
    if (!f || !f->IsOpen()) {
        f = new TFile("Source/tuple_classifer.root");
    }
    f->GetObject("tree", tree);

    if (!tree) cout << "Tree is null" << endl;

    tree->SetBranchAddress("x", &x, &b_x);
    tree->SetBranchAddress("y", &y, &b_y);
    tree->SetBranchAddress("pred", &pred, &b_pred);

    m_leafTypes.push_back(&x);
    m_leafTypes.push_back(&y);


    Long64_t nentries = tree->GetEntriesFast();
    for (Long64_t jentry = 0; jentry < nentries; jentry++) {
        tree->GetEntry(jentry);
        std::vector<int> binIndices;
        for (const auto &leaf : m_leafTypes) {
            binIndices.push_back(*leaf);
        }
        m_tupleClassifier.insert(std::make_pair(binIndices, pred));
    }
    
}
"""

In [383]:
def generate_InitBinEdgeMap_function():
    function_body = """
void PatBBDTSeedClassifier::initBinEdgeMaps()
{
    m_binsEdgeMap ={
    """
    for feature, bins_edges in classifier.bin_edges.iteritems():
        function_body += "\t {\"" + str(feature) +"\", {"
        for bin_edge in bins_edges:
            function_body += str(bin_edge)+ ","
        function_body +=  "}},\n"
    
    function_body += """\n };
}
    """
    return function_body


In [384]:
file = open("../Source/PatBBDTSeedClassifier.h", 'w')
file.write(generate_header())
file.close()

file = open("../Source/PatBBDTSeedClassifier.cpp", 'w')
file_content = generate_source_file()
file_content += generate_InitBinEdgeMap_function()

file.write(file_content)
file.close()