In [64]:
import re
import xml.etree.ElementTree as ET
regex_float_pattern = r'[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?'

def build_tree(xgtree, xgtree_json, base_xml_element, var_indices):
    parent_element_dict = {'0':base_xml_element}
    pos_dict = {'0':'s'}
    for line in xgtree.split('\n'):
        if not line: continue
        if 'booster' in line: continue
        if ':leaf=' in line:
            #leaf node
            result = re.match(r'(\t*)(\d+):leaf=({0})$'.format(regex_float_pattern), line)
            if not result:
                print(line)
            depth = result.group(1).count('\t')
            inode = result.group(2)
            res = result.group(3)
            node_elementTree = ET.SubElement(parent_element_dict[inode], "Node", pos=str(pos_dict[inode]),
                                             depth=str(depth), NCoef="0", IVar="-1", Cut="0.0e+00", cType="1", res=str(res), rms="0.0e+00", purity="0.0e+00", nType="-99")
        else:
            #\t\t3:[var_topcand_mass<138.19] yes=7,no=8,missing=7
            result = re.match(r'(\t*)([0-9]+):\[(?P<var>.+)<(?P<cut>{0})\]\syes=(?P<yes>\d+),no=(?P<no>\d+)'.format(regex_float_pattern),line)
            if not result:
                print(line)
            depth = result.group(1).count('\t')
            inode = result.group(2)
            var = result.group('var')
            cut = result.group('cut')
            lnode = result.group('yes')
            rnode = result.group('no')
            pos_dict[lnode] = 'l'
            pos_dict[rnode] = 'r'
            node_elementTree = ET.SubElement(parent_element_dict[inode], "Node", pos=str(pos_dict[inode]),
                                             depth=str(depth), NCoef="0", IVar=str(var_indices[var]), Cut=str(cut),
                                             cType="1", res=str(xgtree_json['base_weights'][int(inode)]), rms="0.0e+00", purity="0.0e+00", nType="0")
            parent_element_dict[lnode] = node_elementTree
            parent_element_dict[rnode] = node_elementTree
            
def convert_model(model, model_json, input_variables, output_xml):
    NTrees = 0
    for i in model: NTrees+=1
    var_list = input_variables
    var_indices = {}
    
    # <MethodSetup>
    MethodSetup = ET.Element("MethodSetup", Method="BDT::BDT")
    
    # <GeneralInfo>
    GeneralInfo = ET.SubElement(MethodSetup, "GeneralInfo")
    Info_Creator = ET.SubElement(GeneralInfo, "Info", name="Creator", value="Jona Motta")
    Info_AnalysisType = ET.SubElement(GeneralInfo, "Info", name="AnalysisType", value="Regression")

    # <Options>
    Options = ET.SubElement(MethodSetup, "Options")
    Option_NodePurityLimit = ET.SubElement(Options, "Option", name="NodePurityLimit", modified="No").text = "5.00e-01"
    Option_BoostType = ET.SubElement(Options, "Option", name="BoostType", modified="Yes").text = "Grad"
    
    # <Variables>
    Variables = ET.SubElement(MethodSetup, "Variables", NVar=str(len(var_list)))
    for ind, val in enumerate(var_list):
        name = val[0]
        var_type = val[1]
        var_indices[name] = ind
        Variable = ET.SubElement(Variables, "Variable", VarIndex=str(ind), Type=val[1], 
            Expression=name, Label=name, Title=name, Unit="", Internal=name, 
            Min=val[2]+"e+00", Max=val[3]+"e+00")

    # <Weights>
    Weights = ET.SubElement(MethodSetup, "Weights", NTrees=str(NTrees), AnalysisType="1")
    
    for itree in range(NTrees):
        BinaryTree = ET.SubElement(Weights, "BinaryTree", type="DecisionTree", boostWeight="1.0e+00", itree=str(itree))
        build_tree(model[itree], model_json['learner']['gradient_booster']['model']['trees'][itree], BinaryTree, var_indices)
        
    tree = ET.ElementTree(MethodSetup)
    tree.write(output_xml)
    # format it with 'xmllint --format'
    
# example
# bst = xgb.train( param, d_train, num_round, watchlist );
# model = bst.get_dump()
# convert_model(model,input_variables=[('var1','F'),('var2','I')],output_xml='xgboost.xml')

In [65]:
import pickle
import json

def save_obj(obj,dest):
    with open(dest,'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(source):
    with open(source,'rb') as f:
        return pickle.load(f)

In [66]:
C2model = load_obj('/home/llr/cms/motta/HGCAL/CMSSW_11_1_0/src/GRAPHAnalysis/L1BDT/pklModels/calibration_C1skimC2C3_xgb/model_c2_th_PU200.pkl')

In [67]:
with open('C2model.json') as json_file:
    C2json = json.load(json_file)

In [68]:
C2InputVariables = [['cl3d_abseta', 'F', '1.5', '3.0'], ['cl3d_coreshowerlength', 'F', '0.0', '36.0'], ['cl3d_meanz', 'F', '320.0', '515.0'], ['cl3d_showerlength','F', '0.0', '50.0'], ['cl3d_spptot', 'F', '0.0', '0.015'], ['cl3d_srrmean', 'F', '0.0', '0.01']]

In [69]:
convert_model(C2model.get_booster().get_dump(), C2json, C2InputVariables, 'tmp.xml')

In [70]:
!rm C2model_nonRscld_xgb.xml
!xmllint --format tmp.xml >> C2model_nonRscld_xgb.xml
!rm tmp.xml

In [3]:
PUmodel = load_obj('/home/llr/cms/motta/HGCAL/CMSSW_11_1_0/src/GRAPHAnalysis/L1BDT/pklModels/PUrejection_skimPUnoPt/model_PUrejection_th_PU200.pkl')

In [4]:
PUInputVariables = [['cl3d_c3', 'F', '0.0', '15.0'], ['cl3d_coreshowerlength', 'F', '0.0', '36.0'], ['cl3d_srrtot', 'F', '0.0', '0.015'], ['cl3d_srrmean', 'F', '0.0', '0.01'], ['cl3d_hoe', 'F', '0.0', '2300.0'], ['cl3d_meanz', 'F', '320.0', '515.0']]

In [5]:
convert_model(PUmodel.get_dump(), PUInputVariables, "tmp.xml")

In [6]:
!rm PUmodel_nonRscld.xml
!xmllint --format tmp.xml >> PUmodel_nonRscld.xml
!rm tmp.xml

In [7]:
ISOmodel = load_obj('/home/llr/cms/motta/HGCAL/CMSSW_11_1_0/src/GRAPHAnalysis/L1BDT/pklModels/isolation_skimPUnoPt_skimISO90hardPUrej_againstPU/model_isolation_PUWP90_th_PU200.pkl')

In [8]:
ISOInputVariables =  [['cl3d_pt_tr', 'F', '0.', '1000.'], ['cl3d_abseta', 'F', '1.5', '3.0'], ['cl3d_spptot', 'F', '0.', '0.015'], ['cl3d_srrtot', 'F', '0.', '0.015'], ['cl3d_srrmean', 'F', '0.', '0.01'], ['cl3d_hoe', 'F', '0.', '2300.'], ['cl3d_meanz', 'F', '320.', '515.'], ['cl3d_NclIso_dR4', 'F', '0', '20'], ['tower_etSgn_dRsgn1', 'F', '0.', '600.'], ['tower_etSgn_dRsgn2', 'F', '0.', '700.'], ['tower_etIso_dRsgn1_dRiso3', 'F', '0.', '1100.']]

In [9]:
convert_model(ISOmodel.get_dump(), ISOInputVariables, "tmp.xml")

In [10]:
!rm ISOmodel_nonRscld.xml
!xmllint --format tmp.xml >> ISOmodel_nonRscld.xml
!rm tmp.xml