# Notebook Setup

In [None]:
import xgboost as xgb
import matplotlib.pyplot as plt
import numpy as np
import conifer
import os
import sys
os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']

# May need to update path to match local installation
sys.path.append("/home/{}/Locked-in-Leptons/helpers/".format(os.environ['USER']))
import plotting
from helper import get_traintest

In [None]:
# enable more output from conifer
import logging
logging.basicConfig(stream=sys.stdout, level=logging.WARNING)
logger = logging.getLogger('conifer')
logger.setLevel('DEBUG')

print(f'Using conifer version {conifer.__version__}')

# Load the Data 

In [None]:
# parquet data files need to be in same directory
x_train, x_test, y_train, y_test = get_traintest()

In [None]:
dtrain = xgb.DMatrix(x_train, label=y_train)
dtest = xgb.DMatrix(x_test, label=y_test)

# Train the Model (BDT)

In [None]:
param = {'max_depth': 5, 'eta': 1, 'objective': 'binary:logistic'}
num_round = 25

In [None]:
%%time
bst = xgb.train(param, dtrain, num_round)

Check feature importances of classifier for clue on how model works. Need to label features manually here first to understand output clearly.

In [None]:
from sklearn.metrics import accuracy_score
# Run classifier on test data
y_xgb = bst.predict(dtest)
print(y_xgb)
predictions_xgb = [round(value) for value in y_xgb]

from xgboost import plot_importance

bst.feature_names = [
    "Jet0_pt",
    "Jet0_eta",
    "Jet0_phi",
    "Jet1_pt",
    "Jet1_eta",
    "Jet1_phi",
    "Jet2_pt",
    "Jet2_eta",
    "Jet2_phi",
    "Lep0_pt",
    "Lep0_eta",
    "Lep0_phi",
    "Lep1_pt",
    "Lep1_eta",
    "Lep1_phi",
    "Met_et",
    "Met_phi",
]

plot_importance(bst)
plt.show()

# Validate Performance

In [None]:
print(f'Accuracy xgboost:   {accuracy_score(y_test, predictions_xgb):.5f}')

from sklearn.metrics import roc_curve, auc
# Calculate the false positive rate, true positive rate, and thresholds
fpr, tpr, thresholds = roc_curve(y_test, y_xgb)

# Calculate the area under the ROC curve (AUC)
roc_auc = auc(fpr, tpr)

# Plot the ROC curve
fig, ax = plt.subplots(figsize=(9, 9))
plt.plot(tpr, fpr, color='blue', label='{}, AUC = {:.1f}%'.format("xgboost", roc_auc * 100.0),)

plt.semilogy()
plt.xlabel("Signal Efficiency")
plt.ylabel("Background Efficiency")
plt.xlim(0.0, 1.0)
plt.ylim(0.001, 1)
plt.grid(True)
plt.figtext(0.25, 0.90, 'hls4ml', fontweight='bold', wrap=True, horizontalalignment='right', fontsize=14)
plt.legend(loc="lower right")
ax.set_yscale('log')
plt.show()

In [None]:
from pickle import dump
with open("model_5_vitis/xgb_BC_nRound_{}.pkl".format(num_round), "wb") as f:
    dump(bst, f, protocol=5)

# Connifer Conversion
<img src="https://github.com/thesps/conifer/blob/master/conifer_v1.png?raw=true" width="250" alt="conifer" />

In [None]:
cfg = conifer.backends.xilinxhls.auto_config()
# modify the config
cfg['OutputDir'] = 'model_bdt_xgboost/'
cfg['XilinxPart'] = 'xcu250-figd2104-2L-e'
# print the config again
print('Modified Configuration\n' + '-' * 50)
plotting.print_dict(cfg)
print('-' * 50)

In [None]:
# convert the model to the conifer representation
conifer_model = conifer.converters.convert_from_xgboost(bst, cfg)
# write the project (writing HLS project to disk)
conifer_model.write()

# Emulation

In [None]:
conifer_model.compile()

In [None]:
from scipy.special import expit
y_hls = conifer_model.decision_function(x_test)
y_hls_proba = expit(y_hls)
predictions = [round(value) for value in y_hls_proba]

## Verify Model Performance in Emulation

In [None]:
print(f'Accuracy xgboost:   {accuracy_score(y_test, predictions_xgb):.5f}')
print(f'Accuracy conifer:   {accuracy_score(y_test, predictions):.5f}')

from sklearn.metrics import roc_curve, auc
fig, ax = plt.subplots(figsize=(9, 9))
# Calculate the false positive rate, true positive rate, and thresholds
fpr, tpr, thresholds = roc_curve(y_test, y_xgb)
roc_auc = auc(fpr, tpr)
plt.plot(tpr, fpr, color='blue', label='{}, AUC = {:.1f}%'.format("xgboost", roc_auc * 100.0))

fpr, tpr, thresholds = roc_curve(y_test, y_hls_proba)
roc_auc = auc(fpr, tpr)
plt.plot(tpr, fpr, color='blue', ls = "--", label='{}, AUC = {:.1f}%'.format("conifer", roc_auc * 100.0),)

plt.semilogy()
plt.xlabel("Signal Efficiency")
plt.ylabel("Background Efficiency")
plt.xlim(0.0, 1.0)
plt.ylim(0.001, 1)
plt.grid(True)
plt.figtext(0.25, 0.90, 'hls4ml', fontweight='bold', wrap=True, horizontalalignment='right', fontsize=14)
plt.legend(loc="lower right")
ax.set_yscale('log')
plt.show()

# Synthesize

In [None]:
conifer_model.build(synth=True, vsynth=True)

In [None]:
report = conifer_model.read_report()
plotting.print_dict(report)

# Precision Comparison

In [None]:
precisions = ['ap_fixed<16,6>', 'ap_fixed<18,8>','ap_fixed<24,10>', 'ap_fixed<32,14>']
from scipy.special import expit
from sklearn.metrics import accuracy_score
print(f'Accuracy sklearn:   {accuracy_score(y_test, predictions_xgb):.5f}')


from sklearn.metrics import roc_curve, auc
fig, ax = plt.subplots(figsize=(9, 9))
# Calculate the false positive rate, true positive rate, and thresholds
fpr, tpr, thresholds = roc_curve(y_test, y_xgb)
roc_auc = auc(fpr, tpr)
plt.plot(tpr, fpr, color='blue', label='{}, AUC = {:.1f}%'.format("XGBoost", roc_auc * 100.0))

for precision in precisions:
    # Set config
    cfg = conifer.backends.xilinxhls.auto_config()
    # modify the config
    cfg['OutputDir'] = 'model_bdt_xgboost_{}/'.format(precision)
    cfg['XilinxPart'] = 'xcu250-figd2104-2L-e'
    cfg['Precision'] = precision
    # print the config
    print('Conifer HLS Configuration\n' + '-' * 50)
    plotting.print_dict(cfg)
    print('-' * 50)

    # convert the model to the conifer representation
    conifer_model = conifer.converters.convert_from_xgboost(bst, cfg)
    # write the project (writing HLS project to disk)
    conifer_model.write()
    conifer_model.compile()
    y_sklearn = conifer_model.decision_function(x_test)
    y_sklearn_proba = expit(y_sklearn)
    predictions = [round(value) for value in y_sklearn_proba]
    print("Precision: ", precision)
    print(f'Accuracy conifer:   {accuracy_score(y_test, predictions):.5f}')
    fpr, tpr, thresholds = roc_curve(y_test, y_sklearn_proba)
    roc_auc = auc(fpr, tpr)
    plt.plot(tpr, fpr, label='{} {}, AUC = {:.1f}%'.format("conifer", precision, roc_auc * 100.0),)

    conifer_model.build(synth=True, vsynth=True)
    report = conifer_model.read_report()
    plotting.print_dict(report)
    
plt.semilogy()
plt.xlabel("Signal Efficiency")
plt.ylabel("Background Efficiency")
plt.xlim(0.0, 1.0)
plt.ylim(0.001, 1)
plt.grid(True)
plt.figtext(0.25, 0.90, 'Conifer XGBoost BDT Conversion', fontweight='bold', wrap=True, horizontalalignment='right', fontsize=14)
plt.legend(loc="lower right")
ax.set_yscale('log')
plt.savefig("plots/xgboost_precision_optimization.png")
plt.show()