What variables should be used for training. We look at the performance of the default training vs the training with each variable removed. If the performance improves when we remove a variable, we know that variable is making the training worse. Further, by looking at the change in performance we can rank the variables to first order.

# Initalization

In [1]:
from bdt_training_scikit_tools import load_trimmed_sample, \
    test_train_samples, prep_samples, default_training, calc_performance, get_fraction_of_events, \
    default_training_variable_list
import matplotlib.pyplot as plt
plt.rc('font', size=14)
from matplotlib.colors import LogNorm
import pandas as pd
import numpy as np
import multiprocessing as mp
import itertools

# Load Data Samples

In [2]:
all_events_all = load_trimmed_sample("133")

Job 133:
  BIB: 800000 events
  Multijet: 800000 events
  Signal: 800000 events
  [800000, 800000, 473600]


In [3]:
all_events = get_fraction_of_events(all_events_all, 200000)
print ([len(e.index) for e in all_events])

[200387, 199675, 196736]


# Determine Full Variable List
We have to determine what variable list we want to start with before we start removing them according to their performance. Reasons for removal:

- Weight variables are MC only things
- Variables that have nothing to do with phyisics (like run #)
- DR to closest track because it is used later in the analysis
- JetPhi because physics *shouldn't* depend on that

In [4]:
all_events_all[0].columns

Index(['RunNumber', 'Weight', 'WeightMCEvent', 'WeightXSection',
       'WeightFlatten', 'mc_Lxy', 'mc_Lz', 'MHTOverHT', 'JetPt', 'JetPhi',
       'CalRatio', 'JetEta', 'NTracks', 'SumPtOfAllTracks', 'MaxTrackPt',
       'EventNumber', 'JetET', 'JetWidth', 'JetDRTo2GeVTrack', 'EnergyDensity',
       'HadronicLayer1Fraction', 'JetLat', 'JetLong', 'FirstClusterRadius',
       'NumberOfClusters', 'ShowerCenter', 'BIBDeltaTimingM',
       'BIBDeltaTimingP', 'FirstCellTiming', 'InteractionsPerCrossing',
       'RPredictedLxy', 'RPredictedLz', 'PredictedLxy', 'PredictedLz',
       'PredictedLxyHighEta', 'PredictedLxyLowEta', 'PredictedLzHighEta',
       'PredictedLzLowEta'],
      dtype='object')

In [5]:
variable_list = set(all_events_all[0].columns) - set(['RunNumber', 'Weight', 'WeightMCEvent', 'WeightXSection', 'WeightFlatten', 'mc_Lxy', 'mc_Lz', 'MHTOverHT', 'JetPhi', 'EventNumber', 'InteractionsPerCrossing', 'JetDRTo2GeVTrack', 'JetET'])
variable_list

{'BIBDeltaTimingM',
 'BIBDeltaTimingP',
 'CalRatio',
 'EnergyDensity',
 'FirstCellTiming',
 'FirstClusterRadius',
 'HadronicLayer1Fraction',
 'JetEta',
 'JetLat',
 'JetLong',
 'JetPt',
 'JetWidth',
 'MaxTrackPt',
 'NTracks',
 'NumberOfClusters',
 'PredictedLxy',
 'PredictedLxyHighEta',
 'PredictedLxyLowEta',
 'PredictedLz',
 'PredictedLzHighEta',
 'PredictedLzLowEta',
 'RPredictedLxy',
 'RPredictedLz',
 'ShowerCenter',
 'SumPtOfAllTracks'}

# Drop First Variable

In [6]:
%%writefile get_training_performance.py
from bdt_training_scikit_tools import load_default_samples, default_training_variable_list, \
    test_train_samples, prep_samples, default_training, calc_performance
    
def do_training (vlist):
    all_events, training_list = vlist
    return get_training_performance (all_events, training_list)
    
def get_training_performance (all_events, training_list):
    '''Run a training with the set of varaibles given. Return a performance table.'''
    
    # Split into testing and training samples
    train, test = test_train_samples(all_events)
        
    # Prep samples for training
    all_events, all_events_class, training_weight, evaluation_weight = prep_samples(train[0], train[1], train[2], training_variable_list=training_list)
    
    # Run training
    bdt = default_training(all_events, training_weight, all_events_class, estimators=400)
    
    # Create a thing of all the results
    return {tuple(training_list): calc_performance(bdt, test, training_variables = training_list)}

Overwriting get_training_performance.py


In [7]:
def all_but_one (vlist, number_to_drop = 1):
    '''Return vlist and vlist with each item removed
    
    Arguments:
        vlist - the source list or tuple
        number_to_drop - return a list that is len(vlist)-number_to_drop - all possible combinations
        
    Returns:
        A list of tuples that have len(vlist)-number_to_drop items.
    
    '''
    var_training_list = itertools.combinations(vlist, len(vlist)-number_to_drop)
    var_training_list = list(var_training_list) + [tuple(vlist)]
    return var_training_list

In [8]:
import get_training_performance
pool = mp.Pool(processes=10)

def unused_var(original_list, used_list):
    r = tuple(i for i in original_list if i not in used_list)
    r = r if len(r) != 0 else ('None',)
    return r

def calc_var_removal(all_events, training_list = default_training_variable_list):
    r_drop_first = pool.map(get_training_performance.do_training,
             [(all_events, tvar_list) for tvar_list in all_but_one(training_list)])

    one_dict = {}
    for kp in r_drop_first:
        one_dict.update(kp)

    return pd.DataFrame({unused_var(training_list, k):one_dict[k] for k in one_dict}).T

In [9]:
def remove_all_variables(all_events, training_list = variable_list):
    done = False
    count = 0
    rlist = []
    while not done:
        count = count + 1
        print ("Iteration #{0}".format(count))
        print ('  Training with variable list:')
        print ('  ' + str(training_list))
        result = calc_var_removal(all_events, training_list=training_list)
        result_sorted = result.sort_values("HSSSsqrtB")
        rlist = rlist + [result_sorted]
        print (result_sorted.HSSSsqrtB)
        last_var_name = result_sorted.index[-1][0]
        done = last_var_name == 'None'
        if not done:
            training_list = training_list - set([last_var_name])
    return rlist

In [10]:
%%time
#r_drop_first = calc_var_removal(all_events)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.53 µs


In [11]:
#print (r_drop_first.sort_values("HSSSsqrtB").HSSSsqrtB)

In [12]:
%%time
r = remove_all_variables(all_events, variable_list)

Iteration #1
  Training with variable list:
  {'PredictedLz', 'HadronicLayer1Fraction', 'SumPtOfAllTracks', 'ShowerCenter', 'RPredictedLxy', 'JetEta', 'PredictedLxy', 'PredictedLxyHighEta', 'RPredictedLz', 'FirstClusterRadius', 'BIBDeltaTimingP', 'JetWidth', 'JetLat', 'NumberOfClusters', 'CalRatio', 'PredictedLzHighEta', 'FirstCellTiming', 'MaxTrackPt', 'NTracks', 'JetPt', 'EnergyDensity', 'JetLong', 'PredictedLzLowEta', 'BIBDeltaTimingM', 'PredictedLxyLowEta'}
JetPt                     147.028807
FirstCellTiming           147.450753
CalRatio                  162.559416
NTracks                   164.253331
RPredictedLxy             170.724881
PredictedLxy              171.902400
JetWidth                  173.856741
MaxTrackPt                203.645356
HadronicLayer1Fraction    210.197526
ShowerCenter              211.001034
PredictedLxyHighEta       211.523058
JetLat                    216.511655
PredictedLxyLowEta        217.477752
SumPtOfAllTracks          217.925181
BIBDeltaTimingP 

In [13]:
len(r)

3

In [14]:
r[-1]

Unnamed: 0,BIBBack,BIBEff,BIBSsqrtB,BIBTotalCount,BIBTotalWeight,BIBinBIB,BIBinHSS,BIBinMJ,HSSBack,HSSEff,...,HSSinHSS,HSSinMJ,MJBack,MJEff,MJSsqrtB,MJTotalCount,MJTotalWeight,MJinBIB,MJinHSS,MJinMJ
JetPt,1805890.0,0.965245,48.070671,66925.0,66925.0,64599.0,2134.0,192.0,876915.341987,0.943811,...,148689.0,2148.0,2340.0,0.958252,1268791.0,66589.0,64049880.0,1799186.0,874781.341987,61375910.0
CalRatio,3199952.0,0.970908,36.324081,66925.0,66925.0,64978.0,1714.0,233.0,781812.204634,0.913984,...,143990.0,2631.0,2864.0,0.938031,1122661.0,66589.0,64049880.0,3189032.0,780098.204634,60080750.0
FirstCellTiming,2788562.0,0.969668,38.86164,66925.0,66925.0,64895.0,1823.0,207.0,751540.189834,0.902933,...,142249.0,2609.0,2816.0,0.944955,1140547.0,66589.0,64049880.0,2775879.0,749717.189834,60524280.0
NTracks,2709389.0,0.97666,39.709673,66925.0,66925.0,65363.0,1386.0,176.0,730791.27856,0.930501,...,146592.0,1944.0,2120.0,0.946451,1316584.0,66589.0,64049880.0,2700384.0,729405.27856,60620090.0
PredictedLxy,2668618.0,0.972596,39.845359,66925.0,66925.0,65091.0,1634.0,200.0,586365.778552,0.914594,...,144086.0,2635.0,2835.0,0.949375,1142035.0,66589.0,64049880.0,2657798.0,584731.778552,60807350.0
SumPtOfAllTracks,2655034.0,0.973104,39.968025,66925.0,66925.0,65125.0,1612.0,188.0,577477.017636,0.914746,...,144110.0,2584.0,2772.0,0.949726,1155367.0,66589.0,64049880.0,2644187.0,575865.017636,60829830.0
PredictedLxyLowEta,2643388.0,0.972551,40.033216,66925.0,66925.0,65088.0,1629.0,208.0,420199.676127,0.912486,...,143754.0,2578.0,2786.0,0.952369,1155668.0,66589.0,64049880.0,2632179.0,418570.676127,60999130.0
PredictedLxyHighEta,2526305.0,0.97285,40.962974,66925.0,66925.0,65108.0,1618.0,199.0,407366.855614,0.914219,...,144027.0,2635.0,2834.0,0.954392,1148273.0,66589.0,64049880.0,2515426.0,405748.855614,61128700.0
PredictedLzHighEta,2334141.0,0.972626,42.606,66925.0,66925.0,65093.0,1631.0,201.0,400658.375908,0.914809,...,144120.0,2645.0,2846.0,0.957496,1149576.0,66589.0,64049880.0,2323365.0,399027.375908,61327490.0
JetEta,2347869.0,0.972835,42.490392,66925.0,66925.0,65107.0,1624.0,194.0,392310.559119,0.914809,...,144120.0,2593.0,2787.0,0.957412,1161579.0,66589.0,64049880.0,2337041.0,390686.559119,61322150.0


In [15]:
r[-2]

Unnamed: 0,BIBBack,BIBEff,BIBSsqrtB,BIBTotalCount,BIBTotalWeight,BIBinBIB,BIBinHSS,BIBinMJ,HSSBack,HSSEff,...,HSSinHSS,HSSinMJ,MJBack,MJEff,MJSsqrtB,MJTotalCount,MJTotalWeight,MJinBIB,MJinHSS,MJinMJ
JetPt,1941957.0,0.968323,46.503829,66925.0,66925.0,64805.0,1919.0,201.0,1066447.0,0.939127,...,147951.0,2109.0,2310.0,0.953177,1270240.0,66589.0,64049880.0,1934476.0,1064528.0,61050870.0
FirstCellTiming,2639046.0,0.969757,39.951021,66925.0,66925.0,64901.0,1821.0,203.0,761163.5,0.905745,...,142692.0,2642.0,2845.0,0.947132,1137333.0,66589.0,64049880.0,2626839.0,759342.5,60663700.0
NTracks,2329069.0,0.977168,42.851561,66925.0,66925.0,65397.0,1350.0,178.0,733908.2,0.9325,...,146907.0,1930.0,2108.0,0.952335,1328535.0,66589.0,64049880.0,2320365.0,732558.2,60996950.0
CalRatio,3229938.0,0.970968,36.157299,66925.0,66925.0,64982.0,1709.0,234.0,708193.6,0.916441,...,144377.0,2521.0,2755.0,0.938707,1145481.0,66589.0,64049880.0,3219295.0,706484.6,60124100.0
,2354058.0,0.972865,42.435809,66925.0,66925.0,65109.0,1624.0,192.0,670093.8,0.917323,...,144516.0,2579.0,2771.0,0.952973,1159526.0,66589.0,64049880.0,2343612.0,668469.8,61037800.0
RPredictedLxy,2364800.0,0.972432,42.320453,66925.0,66925.0,65080.0,1652.0,193.0,650363.4,0.917958,...,144616.0,2561.0,2754.0,0.953112,1163270.0,66589.0,64049880.0,2354436.0,648711.4,61046730.0
SumPtOfAllTracks,2331982.0,0.973328,42.656495,66925.0,66925.0,65140.0,1587.0,198.0,620702.1,0.916974,...,144461.0,2567.0,2765.0,0.954089,1162143.0,66589.0,64049880.0,2321469.0,619115.1,61109290.0
PredictedLxy,2568888.0,0.972761,40.6183,66925.0,66925.0,65102.0,1624.0,199.0,581954.9,0.917298,...,144512.0,2576.0,2775.0,0.950995,1156285.0,66589.0,64049880.0,2558435.0,580330.9,60911110.0
ShowerCenter,2442219.0,0.972955,41.666663,66925.0,66925.0,65115.0,1621.0,189.0,435489.4,0.915755,...,144269.0,2662.0,2851.0,0.955262,1145888.0,66589.0,64049880.0,2431609.0,433868.4,61184400.0
BIBDeltaTimingP,2358877.0,0.972626,42.382016,66925.0,66925.0,65093.0,1642.0,190.0,435543.6,0.917755,...,144584.0,2564.0,2754.0,0.956559,1167476.0,66589.0,64049880.0,2348484.0,433901.6,61267490.0


# Look at high $\eta$ and low $\eta$ list

In [16]:
%%time
r_low_high = remove_all_variables(all_events, {'PredictedLxyLowEta', \
                                              'CalRatio', 'HadronicLayer1Fraction', 'EnergyDensity', \
                                              'JetEta', 'SumPtOfAllTracks', 'ShowerCenter', \
                                              'JetPt', 'NTracks', 'NumberOfClusters', \
                                              'FirstCellTiming', 'BIBDeltaTimingM', 'JetLat', 'JetWidth', 'FirstClusterRadius', \
                                              'PredictedLzLowEta', 'BIBDeltaTimingP', 'PredictedLzHighEta', \
                                              'PredictedLxyHighEta', 'JetLong', 'MaxTrackPt'})

Iteration #1
  Training with variable list:
  {'HadronicLayer1Fraction', 'SumPtOfAllTracks', 'ShowerCenter', 'JetEta', 'PredictedLxyHighEta', 'FirstClusterRadius', 'BIBDeltaTimingP', 'JetWidth', 'JetLat', 'NumberOfClusters', 'CalRatio', 'PredictedLzHighEta', 'FirstCellTiming', 'MaxTrackPt', 'NTracks', 'JetPt', 'EnergyDensity', 'JetLong', 'PredictedLzLowEta', 'BIBDeltaTimingM', 'PredictedLxyLowEta'}
JetPt                     128.442961
HadronicLayer1Fraction    157.527794
ShowerCenter              160.457798
JetLong                   161.555923
NumberOfClusters          162.096123
MaxTrackPt                162.402702
NTracks                   163.100311
SumPtOfAllTracks          163.280128
None                      163.511721
BIBDeltaTimingM           163.750333
PredictedLzLowEta         168.284459
JetWidth                  168.699996
BIBDeltaTimingP           169.149988
JetLat                    169.329569
FirstCellTiming           169.605331
EnergyDensity             171.226467
JetEta

JetPt                     111.317308
FirstCellTiming           177.809027
NTracks                   201.154912
None                      201.854129
HadronicLayer1Fraction    204.415598
JetWidth                  212.820248
PredictedLzHighEta        229.005201
FirstClusterRadius        236.903253
JetEta                    238.198207
NumberOfClusters          244.650115
PredictedLxyLowEta        252.648375
MaxTrackPt                271.688000
PredictedLxyHighEta       275.798230
SumPtOfAllTracks          277.863357
Name: HSSSsqrtB, dtype: float64
Iteration #10
  Training with variable list:
  {'NumberOfClusters', 'HadronicLayer1Fraction', 'PredictedLzHighEta', 'FirstCellTiming', 'PredictedLxyHighEta', 'MaxTrackPt', 'NTracks', 'FirstClusterRadius', 'JetPt', 'JetEta', 'JetWidth', 'PredictedLxyLowEta'}
JetPt                     105.913953
FirstCellTiming           125.741998
JetWidth                  175.804113
NTracks                   189.087155
HadronicLayer1Fraction    210.676505
Predict

In [17]:
r_low_high[-1]

Unnamed: 0,BIBBack,BIBEff,BIBSsqrtB,BIBTotalCount,BIBTotalWeight,BIBinBIB,BIBinHSS,BIBinMJ,HSSBack,HSSEff,...,HSSinHSS,HSSinMJ,MJBack,MJEff,MJSsqrtB,MJTotalCount,MJTotalWeight,MJinBIB,MJinHSS,MJinMJ
JetPt,2731542.0,0.956728,38.741172,66925.0,66925.0,64029.0,2658.0,238.0,1995538.0,0.933839,...,147118.0,2039.0,2277.0,0.926369,1243429.0,66589.0,64049880.0,2723158.0,1992880.0,59333840.0
FirstCellTiming,5659758.0,0.940949,26.470092,66925.0,66925.0,62973.0,3706.0,246.0,273279.8,0.536216,...,84476.0,2327.0,2573.0,0.908531,1147197.0,66589.0,64049880.0,5589020.0,269573.8,58191280.0
JetWidth,4294577.0,0.963302,31.109337,66925.0,66925.0,64469.0,2174.0,282.0,588613.1,0.881098,...,138809.0,2322.0,2604.0,0.92405,1159827.0,66589.0,64049880.0,4278167.0,586439.1,59185270.0
MaxTrackPt,4127858.0,0.963571,31.740209,66925.0,66925.0,64487.0,2161.0,277.0,411633.2,0.882107,...,138968.0,2710.0,2987.0,0.929407,1089198.0,66589.0,64049880.0,4111995.0,409472.2,59528410.0
PredictedLzHighEta,4505452.0,0.964079,30.397084,66925.0,66925.0,64521.0,2114.0,290.0,417326.7,0.88933,...,140106.0,2274.0,2564.0,0.923411,1168031.0,66589.0,64049880.0,4490291.0,415212.7,59144370.0
NTracks,4103194.0,0.967516,31.96579,66925.0,66925.0,64751.0,1897.0,277.0,360988.2,0.90287,...,142239.0,2145.0,2422.0,0.930537,1211057.0,66589.0,64049880.0,4090037.0,359091.2,59600750.0
HadronicLayer1Fraction,4457404.0,0.961868,30.490375,66925.0,66925.0,64373.0,2287.0,265.0,319073.2,0.886899,...,139723.0,2226.0,2491.0,0.925705,1187966.0,66589.0,64049880.0,4441812.0,316786.2,59291280.0
PredictedLxyHighEta,4732687.0,0.96393,29.653768,66925.0,66925.0,64511.0,2127.0,287.0,289434.8,0.890809,...,140339.0,2316.0,2603.0,0.921856,1157296.0,66589.0,64049880.0,4717801.0,287307.8,59044770.0
NumberOfClusters,4722503.0,0.964557,29.70505,66925.0,66925.0,64553.0,2099.0,273.0,287844.1,0.892403,...,140590.0,2251.0,2524.0,0.922037,1175498.0,66589.0,64049880.0,4707803.0,285745.1,59056330.0
JetEta,5079107.0,0.963437,28.610001,66925.0,66925.0,64478.0,2150.0,297.0,272144.7,0.889476,...,140129.0,2314.0,2611.0,0.916721,1149085.0,66589.0,64049880.0,5064009.0,269994.7,58715870.0


# Look at full $\eta$ range predictions

In [18]:
r_full = remove_all_variables(all_events, {'PredictedLxy', \
                                              'CalRatio', 'HadronicLayer1Fraction', 'EnergyDensity', \
                                              'JetEta', 'SumPtOfAllTracks', 'ShowerCenter', \
                                              'JetPt', 'NTracks', 'NumberOfClusters', \
                                              'FirstCellTiming', 'BIBDeltaTimingM', 'JetLat', 'JetWidth', 'FirstClusterRadius', \
                                              'PredictedLz', 'BIBDeltaTimingP', \
                                              'JetLong', 'MaxTrackPt'})

Iteration #1
  Training with variable list:
  {'NumberOfClusters', 'PredictedLxy', 'CalRatio', 'HadronicLayer1Fraction', 'PredictedLz', 'FirstCellTiming', 'SumPtOfAllTracks', 'ShowerCenter', 'NTracks', 'FirstClusterRadius', 'MaxTrackPt', 'JetPt', 'BIBDeltaTimingP', 'EnergyDensity', 'JetEta', 'BIBDeltaTimingM', 'JetWidth', 'JetLat', 'JetLong'}
NTracks                   134.396543
JetPt                     137.859663
HadronicLayer1Fraction    159.864160
JetEta                    162.473386
JetWidth                  165.475649
SumPtOfAllTracks          167.090512
MaxTrackPt                167.475195
ShowerCenter              168.139958
PredictedLz               168.667466
None                      169.385510
PredictedLxy              170.623653
EnergyDensity             170.868948
NumberOfClusters          171.430412
BIBDeltaTimingP           174.156600
BIBDeltaTimingM           174.196528
FirstClusterRadius        174.371387
JetLong                   174.707323
JetLat                    

In [19]:
r_full[-1]

Unnamed: 0,BIBBack,BIBEff,BIBSsqrtB,BIBTotalCount,BIBTotalWeight,BIBinBIB,BIBinHSS,BIBinMJ,HSSBack,HSSEff,...,HSSinHSS,HSSinMJ,MJBack,MJEff,MJSsqrtB,MJTotalCount,MJTotalWeight,MJinBIB,MJinHSS,MJinMJ
JetPt,2826853.0,0.956444,38.07117,66925.0,66925.0,64010.0,2669.0,246.0,1652804.0,0.916606,...,144403.0,2508.0,2754.0,0.930268,1135388.0,66589.0,64049880.0,2816223.0,1650135.0,59583520.0
NTracks,4455811.0,0.968681,30.711847,66925.0,66925.0,64829.0,1884.0,212.0,634627.2,0.910887,...,143502.0,2173.0,2385.0,0.920739,1207565.0,66589.0,64049880.0,4443945.0,632743.2,58973190.0
BIBDeltaTimingM,4271320.0,0.960538,31.104402,66925.0,66925.0,64284.0,2391.0,250.0,389230.7,0.824052,...,129822.0,2417.0,2667.0,0.927668,1150534.0,66589.0,64049880.0,4246018.0,386839.7,59417020.0
HadronicLayer1Fraction,4601429.0,0.959417,29.932953,66925.0,66925.0,64209.0,2461.0,255.0,392370.6,0.879301,...,138526.0,2681.0,2936.0,0.922326,1090247.0,66589.0,64049880.0,4585095.0,389909.6,59074870.0
BIBDeltaTimingP,4548319.0,0.962809,30.213643,66925.0,66925.0,64436.0,2248.0,241.0,287595.8,0.808932,...,127440.0,2455.0,2696.0,0.924964,1140994.0,66589.0,64049880.0,4520673.0,285347.8,59243860.0
JetEta,4654511.0,0.962899,29.869777,66925.0,66925.0,64442.0,2224.0,259.0,294613.7,0.876781,...,138129.0,2627.0,2886.0,0.923027,1100486.0,66589.0,64049880.0,4637726.0,292389.7,59119760.0
PredictedLz,4700285.0,0.961808,29.690307,66925.0,66925.0,64369.0,2313.0,243.0,284615.7,0.880971,...,138789.0,2522.0,2765.0,0.922461,1123618.0,66589.0,64049880.0,4684055.0,282302.7,59083520.0
PredictedLxy,4254167.0,0.960164,31.154924,66925.0,66925.0,64259.0,2424.0,242.0,275587.3,0.878698,...,138431.0,2474.0,2716.0,0.929575,1142452.0,66589.0,64049880.0,4237531.0,273163.3,59539180.0
NumberOfClusters,4472993.0,0.962734,30.464621,66925.0,66925.0,64431.0,2243.0,251.0,273901.8,0.882856,...,139086.0,2758.0,3009.0,0.926168,1081426.0,66589.0,64049880.0,4457296.0,271658.8,59320920.0
SumPtOfAllTracks,4631761.0,0.963317,29.956052,66925.0,66925.0,64470.0,2207.0,248.0,272920.7,0.881828,...,138924.0,2654.0,2902.0,0.923708,1098257.0,66589.0,64049880.0,4615798.0,270713.7,59163370.0


In [20]:
r_full[-2]

Unnamed: 0,BIBBack,BIBEff,BIBSsqrtB,BIBTotalCount,BIBTotalWeight,BIBinBIB,BIBinHSS,BIBinMJ,HSSBack,HSSEff,...,HSSinHSS,HSSinMJ,MJBack,MJEff,MJSsqrtB,MJTotalCount,MJTotalWeight,MJinBIB,MJinHSS,MJinMJ
JetPt,3111732.0,0.96263,36.521331,66925.0,66925.0,64424.0,2258.0,243.0,1458628.0,0.932214,...,146862.0,2215.0,2458.0,0.928811,1199927.0,66589.0,64049880.0,3103268.0,1456370.0,59490240.0
PredictedLxy,4128359.0,0.967845,31.879043,66925.0,66925.0,64773.0,1919.0,233.0,564809.7,0.899759,...,141749.0,2549.0,2782.0,0.926963,1125647.0,66589.0,64049880.0,4115116.0,562890.7,59371870.0
BIBDeltaTimingM,4446413.0,0.968278,30.731481,66925.0,66925.0,64802.0,1872.0,251.0,516070.9,0.906005,...,142733.0,2505.0,2756.0,0.922743,1125795.0,66589.0,64049880.0,4434110.0,514198.9,59101570.0
,4248337.0,0.968427,31.44459,66925.0,66925.0,64812.0,1864.0,249.0,508213.6,0.906024,...,142736.0,2484.0,2733.0,0.925958,1134462.0,66589.0,64049880.0,4236016.0,506349.6,59307510.0
NTracks,4900633.0,0.971999,29.385148,66925.0,66925.0,65051.0,1664.0,210.0,528124.5,0.924521,...,145650.0,2037.0,2247.0,0.915422,1236909.0,66589.0,64049880.0,4890779.0,526460.5,58632640.0
HadronicLayer1Fraction,4477125.0,0.966918,30.582887,66925.0,66925.0,64711.0,1970.0,244.0,411339.4,0.905402,...,142638.0,2547.0,2791.0,0.923901,1120118.0,66589.0,64049880.0,4464769.0,409369.4,59175740.0
JetEta,4474856.0,0.968487,30.640277,66925.0,66925.0,64816.0,1849.0,260.0,410862.4,0.905631,...,142674.0,2514.0,2774.0,0.923942,1123595.0,66589.0,64049880.0,4462503.0,409013.4,59178360.0
MaxTrackPt,4156668.0,0.968427,31.789431,66925.0,66925.0,64812.0,1872.0,241.0,411196.6,0.907986,...,143045.0,2463.0,2704.0,0.9289,1144152.0,66589.0,64049880.0,4144635.0,409324.6,59495920.0
JetWidth,4245963.0,0.967396,31.419895,66925.0,66925.0,64743.0,1929.0,253.0,375958.6,0.90478,...,142540.0,2481.0,2734.0,0.928064,1136834.0,66589.0,64049880.0,4233443.0,374029.6,59442400.0
SumPtOfAllTracks,4039946.0,0.968502,32.24788,66925.0,66925.0,64817.0,1863.0,245.0,376310.5,0.906291,...,142778.0,2528.0,2773.0,0.93127,1132710.0,66589.0,64049880.0,4027711.0,374447.5,59647720.0


# Using original Rachel $L_{xy}$ and $L_{z}$ predictions

In [21]:
r_rachel = remove_all_variables(all_events, {'RPredictedLxy', \
                                              'CalRatio', 'HadronicLayer1Fraction', 'EnergyDensity', \
                                              'JetEta', 'SumPtOfAllTracks', 'ShowerCenter', \
                                              'JetPt', 'NTracks', 'NumberOfClusters', \
                                              'FirstCellTiming', 'BIBDeltaTimingM', 'JetLat', 'JetWidth', 'FirstClusterRadius', \
                                              'RPredictedLz', 'BIBDeltaTimingP', \
                                              'JetLong', 'MaxTrackPt'})

Iteration #1
  Training with variable list:
  {'NumberOfClusters', 'CalRatio', 'HadronicLayer1Fraction', 'FirstCellTiming', 'SumPtOfAllTracks', 'RPredictedLz', 'ShowerCenter', 'RPredictedLxy', 'NTracks', 'FirstClusterRadius', 'JetPt', 'BIBDeltaTimingP', 'EnergyDensity', 'JetEta', 'BIBDeltaTimingM', 'MaxTrackPt', 'JetWidth', 'JetLat', 'JetLong'}
FirstCellTiming           145.551161
NTracks                   155.218947
JetPt                     158.215490
HadronicLayer1Fraction    162.964118
CalRatio                  166.952844
JetWidth                  168.263313
BIBDeltaTimingM           168.653280
JetEta                    168.772058
BIBDeltaTimingP           170.593249
ShowerCenter              170.846248
NumberOfClusters          170.897665
JetLat                    170.994720
EnergyDensity             171.067778
MaxTrackPt                171.782128
SumPtOfAllTracks          172.145175
None                      174.691754
FirstClusterRadius        175.566942
JetLong                 

In [22]:
len(r_rachel)

8

In [23]:
r_rachel[-1]

Unnamed: 0,BIBBack,BIBEff,BIBSsqrtB,BIBTotalCount,BIBTotalWeight,BIBinBIB,BIBinHSS,BIBinMJ,HSSBack,HSSEff,...,HSSinHSS,HSSinMJ,MJBack,MJEff,MJSsqrtB,MJTotalCount,MJTotalWeight,MJinBIB,MJinHSS,MJinMJ
JetPt,2355920.0,0.959163,41.8216,66925.0,66925.0,64192.0,2539.0,194.0,1254747.0,0.912455,...,143749.0,2524.0,2718.0,0.943843,1159560.0,66589.0,64049880.0,2344652.0,1252208.0,60453020.0
NTracks,3342243.0,0.970265,35.518929,66925.0,66925.0,64935.0,1794.0,196.0,801363.5,0.910633,...,143462.0,2289.0,2485.0,0.935519,1202009.0,66589.0,64049880.0,3330453.0,799569.5,59919860.0
HadronicLayer1Fraction,3619443.0,0.962854,33.87099,66925.0,66925.0,64439.0,2268.0,218.0,723285.9,0.88078,...,138759.0,2896.0,3114.0,0.932481,1070284.0,66589.0,64049880.0,3603557.0,721017.9,59725300.0
FirstClusterRadius,3349609.0,0.965917,35.320854,66925.0,66925.0,64644.0,2071.0,210.0,711260.0,0.884386,...,139327.0,2918.0,3128.0,0.936869,1072912.0,66589.0,64049880.0,3334313.0,709189.0,60006380.0
RPredictedLz,3407045.0,0.965723,35.014822,66925.0,66925.0,64631.0,2075.0,219.0,680526.8,0.877308,...,138212.0,3011.0,3230.0,0.936469,1055383.0,66589.0,64049880.0,3390727.0,678451.8,59980700.0
EnergyDensity,3399738.0,0.965693,35.051348,66925.0,66925.0,64629.0,2074.0,222.0,625770.5,0.883186,...,139138.0,2965.0,3187.0,0.937424,1063563.0,66589.0,64049880.0,3384300.0,623696.5,60041880.0
CalRatio,3835678.0,0.963571,32.926918,66925.0,66925.0,64487.0,2185.0,253.0,568243.3,0.877264,...,138205.0,3101.0,3354.0,0.93153,1030229.0,66589.0,64049880.0,3819443.0,566058.3,59664380.0
BIBDeltaTimingP,3198466.0,0.969623,36.284431,66925.0,66925.0,64892.0,1811.0,222.0,382546.6,0.746352,...,117581.0,3257.0,3479.0,0.944692,1025844.0,66589.0,64049880.0,3161763.0,380735.6,60507380.0
BIBDeltaTimingM,3733869.0,0.96768,33.515116,66925.0,66925.0,64762.0,1942.0,221.0,365081.5,0.757885,...,119398.0,3171.0,3392.0,0.93658,1029995.0,66589.0,64049880.0,3698897.0,363139.5,59987840.0
SumPtOfAllTracks,3230641.0,0.965738,35.958641,66925.0,66925.0,64632.0,2068.0,225.0,473906.3,0.877657,...,138267.0,3413.0,3638.0,0.942441,1000786.0,66589.0,64049880.0,3214780.0,471838.3,60363260.0


In [24]:
r_rachel[-2]

Unnamed: 0,BIBBack,BIBEff,BIBSsqrtB,BIBTotalCount,BIBTotalWeight,BIBinBIB,BIBinHSS,BIBinMJ,HSSBack,HSSEff,...,HSSinHSS,HSSinMJ,MJBack,MJEff,MJSsqrtB,MJTotalCount,MJTotalWeight,MJinBIB,MJinHSS,MJinMJ
JetPt,2392912.0,0.959731,41.521644,66925.0,66925.0,64230.0,2505.0,190.0,1277773.0,0.909903,...,143347.0,2491.0,2681.0,0.942912,1166383.0,66589.0,64049880.0,2381209.0,1275268.0,60393400.0
HadronicLayer1Fraction,3209670.0,0.962346,35.949216,66925.0,66925.0,64405.0,2300.0,220.0,717974.8,0.882183,...,138980.0,2809.0,3029.0,0.93896,1092737.0,66589.0,64049880.0,3193918.0,715674.8,60140280.0
NTracks,3924036.0,0.970728,32.795905,66925.0,66925.0,64966.0,1766.0,193.0,744024.1,0.911407,...,143584.0,2181.0,2374.0,0.92733,1219024.0,66589.0,64049880.0,3912260.0,742258.1,59395360.0
RPredictedLz,3566864.0,0.965992,34.230916,66925.0,66925.0,64649.0,2070.0,206.0,688284.6,0.878387,...,138382.0,2952.0,3158.0,0.93385,1064362.0,66589.0,64049880.0,3550657.0,686214.6,59813010.0
EnergyDensity,3083331.0,0.965573,36.80134,66925.0,66925.0,64621.0,2079.0,225.0,622967.8,0.882158,...,138976.0,2923.0,3148.0,0.942411,1075824.0,66589.0,64049880.0,3067689.0,620888.8,60361300.0
FirstClusterRadius,3712825.0,0.965812,33.545087,66925.0,66925.0,64637.0,2081.0,207.0,609994.1,0.884125,...,139286.0,2850.0,3057.0,0.932782,1080564.0,66589.0,64049880.0,3697420.0,607913.1,59744540.0
BIBDeltaTimingP,3266131.0,0.970026,35.921549,66925.0,66925.0,64919.0,1783.0,223.0,403439.5,0.752065,...,118481.0,3094.0,3317.0,0.943297,1049045.0,66589.0,64049880.0,3230165.0,401656.5,60418060.0
CalRatio,4028789.0,0.96384,32.13706,66925.0,66925.0,64505.0,2168.0,252.0,550555.6,0.880914,...,138780.0,2991.0,3243.0,0.928784,1044622.0,66589.0,64049880.0,4013019.0,548387.6,59488470.0
BIBDeltaTimingM,3380756.0,0.967949,35.231733,66925.0,66925.0,64780.0,1925.0,220.0,378343.2,0.759015,...,119576.0,3134.0,3354.0,0.941884,1041679.0,66589.0,64049880.0,3345925.0,376418.2,60327530.0
SumPtOfAllTracks,3525958.0,0.965588,34.414531,66925.0,66925.0,64622.0,2086.0,217.0,467612.5,0.878184,...,138350.0,3307.0,3524.0,0.93793,1011977.0,66589.0,64049880.0,3510074.0,465526.5,60074280.0
