# imports:

In [1]:
from sklearn.svm import SVR as svr
from sklearn.svm import NuSVR
from sklearn.ensemble import ExtraTreesRegressor as etr
from sklearn.ensemble import RandomForestRegressor as rfr
from sklearn.neighbors import KNeighborsRegressor as Kneigh
from sklearn.ensemble import AdaBoostRegressor as Ada
from sklearn.ensemble import HistGradientBoostingRegressor as HG
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
import uproot
import pickle
import pandas as pd
import numpy as np

# load events

In [2]:
eventsWithReg_pt0to10_EB = pd.read_pickle("eventsWithReg_pt0to10_EB.pkl")
eventsWithReg_pt10to500_EB = pd.read_pickle("eventsWithReg_pt10to500_EB.pkl")
eventsWithReg_pt500to1000_EB = pd.read_pickle("eventsWithReg_pt500to1000_EB.pkl")
eventsWithReg_pt1000to1500_EB = pd.read_pickle("eventsWithReg_pt1000to1500_EB.pkl")

# get regression input variables and target variable:

In [3]:
regVars_pt0to10_EB = eventsWithReg_pt0to10_EB[["clusrawE", "clusIetaIx", "clusIphiIy", "ietamod20", "iphimod20", "nhits_mod"]]
tgtvar_pt0to10_EB = eventsWithReg_pt0to10_EB[["genEnergy"]]
regVars_pt0to10_EB = regVars_pt0to10_EB[:100000]
tgtvar_pt0to10_EB = tgtvar_pt0to10_EB[:100000]

regVars_pt10to500_EB = eventsWithReg_pt10to500_EB[["clusrawE", "clusIetaIx", "clusIphiIy", "ietamod20", "iphimod20", "nhits_mod"]]
tgtvar_pt10to500_EB = eventsWithReg_pt10to500_EB[["genEnergy"]]
regVars_pt10to500_EB = regVars_pt10to500_EB[:100000]
tgtvar_pt10to500_EB = tgtvar_pt10to500_EB[:100000]

regVars_pt500to1000_EB = eventsWithReg_pt500to1000_EB[["clusrawE", "clusIetaIx", "clusIphiIy", "ietamod20", "iphimod20", "nhits_mod"]]
tgtvar_pt500to1000_EB = eventsWithReg_pt500to1000_EB[["genEnergy"]]
regVars_pt500to1000_EB = regVars_pt500to1000_EB[:100000]
tgtvar_pt500to1000_EB = tgtvar_pt500to1000_EB[:100000]

regVars_pt1000to1500_EB = eventsWithReg_pt1000to1500_EB[["clusrawE", "clusIetaIx", "clusIphiIy", "ietamod20", "iphimod20", "nhits_mod"]]
tgtvar_pt1000to1500_EB = eventsWithReg_pt1000to1500_EB[["genEnergy"]]
regVars_pt1000to1500_EB = regVars_pt1000to1500_EB[:100000]
tgtvar_pt1000to1500_EB = tgtvar_pt1000to1500_EB[:100000]

# split data:

In [4]:
#split data
xtrain_pt0to10, xtest_pt0to10, ytrain_pt0to10, ytest_pt0to10 = train_test_split(regVars_pt0to10_EB, tgtvar_pt0to10_EB, test_size=0.15)
ytrain_pt0to10 = np.ravel(ytrain_pt0to10)

xtrain_pt10to500, xtest_pt10to500, ytrain_pt10to500, ytest_pt10to500 = train_test_split(regVars_pt10to500_EB, tgtvar_pt10to500_EB, test_size=0.15)
ytrain_pt10to500 = np.ravel(ytrain_pt10to500)

xtrain_pt500to1000, xtest_pt500to1000, ytrain_pt500to1000, ytest_pt500to1000 = train_test_split(regVars_pt500to1000_EB, tgtvar_pt500to1000_EB, test_size=0.15)
ytrain_pt500to1000 = np.ravel(ytrain_pt500to1000)

xtrain_pt1000to1500, xtest_pt1000to1500, ytrain_pt1000to1500, ytest_pt1000to1500 = train_test_split(regVars_pt1000to1500_EB, tgtvar_pt1000to1500_EB, test_size=0.15)
ytrain_pt1000to1500 = np.ravel(ytrain_pt1000to1500)


# run models:

In [5]:
#xgbr
XGBR_pt0to10 = XGBRegressor(verbosity = 0).fit(xtrain_pt0to10, ytrain_pt0to10)

#ETR
ETR_pt0to10 = etr(n_estimators=100, random_state=0).fit(xtrain_pt0to10, ytrain_pt0to10) 

In [11]:
#nusvr
NSVR_pt0to10 = NuSVR(C=1.0, cache_size=200, coef0=0.0, degree=3, gamma='scale', kernel='rbf',
      max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=False).fit(xtrain_pt0to10, ytrain_pt0to10)

#svr
SVR_pt0to10 = svr(C=1.0, epsilon=0.2).fit(xtrain_pt0to10, ytrain_pt0to10)

#xgbr
XGBR_pt0to10 = XGBRegressor(verbosity = 0).fit(xtrain_pt0to10, ytrain_pt0to10)

#HistGBR
HGBR_pt0to10 = HG().fit(xtrain_pt0to10, ytrain_pt0to10)

#ADA
ADA_pt0to10 = Ada(random_state=0, n_estimators=100).fit(xtrain_pt0to10, ytrain_pt0to10)

#KNR
KNR_pt0to10 = Kneigh(n_neighbors=2).fit(xtrain_pt0to10, ytrain_pt0to10)

#RFR
RFR_pt0to10 = rfr(max_depth=2, random_state=0).fit(xtrain_pt0to10, ytrain_pt0to10)

In [6]:
#xgbr
XGBR_pt10to500 = XGBRegressor(verbosity = 0).fit(xtrain_pt10to500, ytrain_pt10to500)

#ETR
ETR_pt10to500 = etr(n_estimators=100, random_state=0).fit(xtrain_pt10to500, ytrain_pt10to500) 

In [None]:
#nusvr
NSVR = NuSVR(C=1.0, cache_size=200, coef0=0.0, degree=3, gamma='scale', kernel='rbf',
      max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=False).fit(xtrain_pt10to500, ytrain_pt10to500)

#svr
SVR = svr(C=1.0, epsilon=0.2).fit(xtrain_pt10to500, ytrain_pt10to500)

#HistGBR
HGBR = HG().fit(xtrain_pt10to500, ytrain_pt10to500)

#ADA
ADA = Ada(random_state=0, n_estimators=100).fit(xtrain_pt10to500, ytrain_pt10to500)

#KNR
KNR = Kneigh(n_neighbors=2).fit(xtrain_pt10to500, ytrain_pt10to500)

#RFR
RFR = rfr(max_depth=2, random_state=0).fit(xtrain_pt10to500, ytrain_pt10to500)

In [7]:
#xgbr
XGBR_pt500to1000 = XGBRegressor(verbosity = 0).fit(xtrain_pt500to1000, ytrain_pt500to1000)

#ETR
ETR_pt500to1000 = etr(n_estimators=100, random_state=0).fit(xtrain_pt1000to1500, ytrain_pt1000to1500) 

In [None]:
#nusvr
NSVR = NuSVR(C=1.0, cache_size=200, coef0=0.0, degree=3, gamma='scale', kernel='rbf',
      max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=False).fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

#svr
SVR = svr(C=1.0, epsilon=0.2).fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

#HistGBR
HGBR = HG().fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

#ADA
ADA = Ada(random_state=0, n_estimators=100).fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

#KNR
KNR = Kneigh(n_neighbors=2).fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

#RFR
RFR = rfr(max_depth=2, random_state=0).fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

In [8]:
#xgbr
XGBR_pt1000to1500 = XGBRegressor(verbosity = 0).fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

#ETR
ETR_pt1000to1500 = etr(n_estimators=100, random_state=0).fit(xtrain_pt1000to1500, ytrain_pt1000to1500) 

In [None]:
#nusvr
NSVR = NuSVR(C=1.0, cache_size=200, coef0=0.0, degree=3, gamma='scale', kernel='rbf',
      max_iter=-1, nu=0.5, shrinking=True, tol=0.001, verbose=False).fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

#svr
SVR = svr(C=1.0, epsilon=0.2).fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

#HistGBR
HGBR = HG().fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

#ADA
ADA = Ada(random_state=0, n_estimators=100).fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

#KNR
KNR = Kneigh(n_neighbors=2).fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

#RFR
RFR = rfr(max_depth=2, random_state=0).fit(xtrain_pt1000to1500, ytrain_pt1000to1500)

# save models:

In [9]:
pickle.dump(XGBR_pt0to10, open('XGBR_model_pt0to10.sav', 'wb'))
pickle.dump(ETR_pt0to10, open('ETR_model_pt0to10.sav', 'wb'))
pickle.dump(NSVR_pt0to10, open('NSVR_model_pt0to10.sav', 'wb'))
pickle.dump(SVR_pt0to10, open('SVR_model_pt0to10.sav', 'wb'))
pickle.dump(HGBR_pt0to10, open('HGBR_model_pt0to10.sav', 'wb'))
pickle.dump(ADA_pt0to10, open('ADA_model_pt0to10.sav', 'wb'))
pickle.dump(KNR_pt0to10, open('KNR_model_pt0to10.sav', 'wb'))
pickle.dump(RFR_pt0to10, open('RFR_model_pt0to10.sav', 'wb'))

pickle.dump(XGBR_pt10to500, open('XGBR_model_pt10to500.sav', 'wb'))
pickle.dump(ETR_pt10to500, open('ETR_model_pt10to500.sav', 'wb'))
pickle.dump(NSVR_pt10to500, open('NSVR_model_pt10to500.sav', 'wb'))
pickle.dump(SVR_pt10to500, open('SVR_model_pt10to500.sav', 'wb'))
pickle.dump(HGBR_pt10to500, open('HGBR_model_pt10to500.sav', 'wb'))
pickle.dump(ADA_pt10to500, open('ADA_model_pt10to500.sav', 'wb'))
pickle.dump(KNR_pt10to500, open('KNR_model_pt10to500.sav', 'wb'))
pickle.dump(RFR_pt10to500, open('RFR_model_pt10to500.sav', 'wb'))

pickle.dump(XGBR_pt500to1000, open('XGBR_model_pt500to1000.sav', 'wb'))
pickle.dump(ETR_pt500to1000, open('ETR_model_pt500to1000.sav', 'wb'))
pickle.dump(NSVR_pt0to10, open('NSVR_model_pt0to10.sav', 'wb'))
pickle.dump(SVR_pt0to10, open('SVR_model_pt0to10.sav', 'wb'))
pickle.dump(HGBR_pt0to10, open('HGBR_model_pt0to10.sav', 'wb'))
pickle.dump(ADA_pt0to10, open('ADA_model_pt0to10.sav', 'wb'))
pickle.dump(KNR_pt0to10, open('KNR_model_pt0to10.sav', 'wb'))
pickle.dump(RFR_pt0to10, open('RFR_model_pt0to10.sav', 'wb'))

pickle.dump(XGBR_pt1000to1500, open('XGBR_model_pt1000to1500.sav', 'wb'))
pickle.dump(ETR_pt1000to1500, open('ETR_model_pt1000to1500.sav', 'wb'))
pickle.dump(NSVR_pt0to10, open('NSVR_model_pt0to10.sav', 'wb'))
pickle.dump(SVR_pt0to10, open('SVR_model_pt0to10.sav', 'wb'))
pickle.dump(HGBR_pt0to10, open('HGBR_model_pt0to10.sav', 'wb'))
pickle.dump(ADA_pt0to10, open('ADA_model_pt0to10.sav', 'wb'))
pickle.dump(KNR_pt0to10, open('KNR_model_pt0to10.sav', 'wb'))
pickle.dump(RFR_pt0to10, open('RFR_model_pt0to10.sav', 'wb'))


xtest_pt0to10.to_pickle("xtest_pt0to10.pkl")
ytest_pt0to10.to_pickle("ytest_pt0to10.pkl")

xtest_pt10to500.to_pickle("xtest_pt10to500.pkl")
ytest_pt10to500.to_pickle("ytest_pt10to500.pkl")

xtest_pt500to1000.to_pickle("xtest_pt500to1000.pkl")
ytest_pt500to1000.to_pickle("ytest_pt500to1000.pkl")

xtest_pt1000to1500.to_pickle("xtest_pt1000to1500.pkl")
ytest_pt1000to1500.to_pickle("ytest_pt1000to1500.pkl")

In [10]:
len(xtest_pt0to10)

12306