# Notebook Setup

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import numpy as np
import conifer
import os
import sys
os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']

# May need to update path to match local installation
sys.path.append("/home/{}/Locked-in-Leptons/helpers/".format(os.environ['USER']))
import plotting
from helper import get_traintest

In [None]:
# enable more output from conifer
import logging
logging.basicConfig(stream=sys.stdout, level=logging.WARNING)
logger = logging.getLogger('conifer')
logger.setLevel('DEBUG')

print(f'Using conifer version {conifer.__version__}')

# Load the Data 

In [None]:
# parquet data files need to be in same directory
x_train, x_test, y_train, y_test = get_traintest()

In [None]:
classes = [0, 1]
le = LabelEncoder().fit(classes)
y_test = le.fit_transform(y_test)
y_test = to_categorical(y_test, 2)
y_test = y_test.astype(np.int64)

y_train = np.expand_dims(y_train, axis=1)

# Train the Model (BDT)

In [None]:
%%time
clf = GradientBoostingClassifier(n_estimators=25, max_depth=5, learning_rate=1, verbose = 2).fit(x_train, y_train)

# Validate Performance

In [None]:
from sklearn.metrics import accuracy_score
# Run classifier on test data
y_skl = clf.predict_proba(x_test)
print(y_skl)

print(f'Accuracy sklearn:   {accuracy_score(np.argmax(y_test, axis = 1), np.argmax(y_skl, axis=1)):.5f}')

classes = ["Z w/o MET", "W w/ MET"]
fig, ax = plt.subplots(figsize=(9, 9))
_ = plotting.makeRoc(y_test, y_skl, classes, linestyle='-')

# add a legend
from matplotlib.lines import Line2D

lines = [
    Line2D([0], [0], ls='-'),

]
from matplotlib.legend import Legend

leg = Legend(ax, lines, labels=['sklearn'], loc='lower right', frameon=False)
ax.add_artist(leg)

ax.set_yscale('log')

# Connifer Conversion
<img src="https://github.com/thesps/conifer/blob/master/conifer_v1.png?raw=true" width="250" alt="conifer" />

In [None]:
cfg = conifer.backends.xilinxhls.auto_config()
# modify the config
cfg['OutputDir'] = 'model_bdt_sklearn/'
cfg['XilinxPart'] = 'xcu250-figd2104-2L-e'
# print the config
print('Conifer HLS Configuration\n' + '-' * 50)
plotting.print_dict(cfg)
print('-' * 50)

In [None]:
# convert the model to the conifer representation
conifer_model = conifer.converters.convert_from_sklearn(clf, cfg)
# write the project (writing HLS project to disk)
conifer_model.write()

# Emulation

In [None]:
conifer_model.compile()

In [None]:
from scipy.special import expit
y_sklearn = conifer_model.decision_function(x_test)
y_sklearn_proba = expit(y_sklearn)
predictions = [round(value) for value in y_sklearn_proba]

## Verify Model Performance in Emulation

In [None]:
from sklearn.metrics import accuracy_score
print(f'Accuracy sklearn:   {accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_skl, axis=1)):.5f}')
print(f'Accuracy conifer:   {accuracy_score(np.argmax(y_test, axis=1), predictions):.5f}')

from sklearn.metrics import roc_curve, auc
fig, ax = plt.subplots(figsize=(9, 9))
# Calculate the false positive rate, true positive rate, and thresholds
fpr, tpr, thresholds = roc_curve(np.argmax(y_test, axis=1), y_skl[:,1])
roc_auc = auc(fpr, tpr)
plt.plot(tpr, fpr, color='blue', label='{}, AUC = {:.1f}%'.format("sklearn", roc_auc * 100.0))

fpr, tpr, thresholds = roc_curve(np.argmax(y_test, axis=1), y_sklearn_proba)
roc_auc = auc(fpr, tpr)
plt.plot(tpr, fpr, color='orange', label='{}, AUC = {:.1f}%'.format("conifer", roc_auc * 100.0),)

plt.semilogy()
plt.xlabel("Signal Efficiency")
plt.ylabel("Background Efficiency")
plt.xlim(0.0, 1.0)
plt.ylim(0.001, 1)
plt.grid(True)
plt.figtext(0.25, 0.90, 'Conifer BDT Conversion', fontweight='bold', wrap=True, horizontalalignment='right', fontsize=14)
plt.legend(loc="lower right")
ax.set_yscale('log')
plt.savefig("plots/sklearn_{}.png".format(precision))
plt.show()

# Synthesize

In [None]:
conifer_model.build(synth=True, vsynth=True)

In [None]:
report = conifer_model.read_report()
plotting.print_dict(report)

# Precision Optimization Test

In [None]:
precisions = ['ap_fixed<16,6>', 'ap_fixed<18,8>','ap_fixed<24,10>', 'ap_fixed<32,14>']
from scipy.special import expit
from sklearn.metrics import accuracy_score
y_skl = clf.predict_proba(x_test)
print(f'Accuracy sklearn:   {accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_skl, axis=1)):.5f}')


# Plot the ROC curve
fig, ax = plt.subplots(figsize=(9, 9))
# Calculate the false positive rate, true positive rate, and thresholds
from sklearn.metrics import roc_curve, auc
fpr, tpr, thresholds = roc_curve(np.argmax(y_test, axis=1), y_skl[:,1])
roc_auc = auc(fpr, tpr)
plt.plot(tpr, fpr, color='blue', label='{}, AUC = {:.1f}%'.format("sklearn", roc_auc * 100.0))



for precision in precisions:
    # Set config
    cfg = conifer.backends.xilinxhls.auto_config()
    # modify the config
    cfg['OutputDir'] = 'model_bdt_sklearn_{}/'.format(precision)
    cfg['XilinxPart'] = 'xcu250-figd2104-2L-e'
    cfg['Precision'] = precision
    # print the config
    print('Conifer HLS Configuration\n' + '-' * 50)
    plotting.print_dict(cfg)
    print('-' * 50)

    # convert the model to the conifer representation
    conifer_model = conifer.converters.convert_from_sklearn(clf, cfg)
    # write the project (writing HLS project to disk)
    conifer_model.write()
    conifer_model.compile()
    y_sklearn = conifer_model.decision_function(x_test)
    y_sklearn_proba = expit(y_sklearn)
    predictions = [round(value) for value in y_sklearn_proba]
    print(f'Accuracy conifer:   {accuracy_score(np.argmax(y_test, axis=1), predictions):.5f}')
    fpr, tpr, thresholds = roc_curve(np.argmax(y_test, axis=1), y_sklearn_proba)
    roc_auc = auc(fpr, tpr)
    plt.plot(tpr, fpr, label='{} {}, AUC = {:.1f}%'.format("conifer", precision, roc_auc * 100.0),)

    

plt.semilogy()
plt.xlabel("Signal Efficiency")
plt.ylabel("Background Efficiency")
plt.xlim(0.0, 1.0)
plt.ylim(0.001, 1)
plt.grid(True)
plt.figtext(0.25, 0.90, 'Conifer Sci-kit BDT Conversion', fontweight='bold', wrap=True, horizontalalignment='right', fontsize=14)
plt.legend(loc="lower right")
ax.set_yscale('log')
plt.savefig("plots/sklearn_precision_optimization.png")
plt.show()