## Core Functions
These handle importing necessary libraries, preparation of the feature arrays for Machine Learning, and execution of Machine Learning training and testing.

In [2]:
import ROOT
from os import path
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
import numpy as np
from statistics import mean, median
import math
from array import array
import csv
from datetime import datetime



def Build_ML_Feature_Arrays_ptTrue(
    input_file_path, input_tree_name, pt_true_min, pt_true_max) :
    """
    Creates an array of all features exported from ROOT.
    Applies a cut using pT_True values between pt_true_min and pt_true_max.
    """

    input_file = None;
    if (ROOT.gSystem.AccessPathName(input_file_path)) :
        print("Input file path does not exist:", input_file)
        exit()
    else :
        input_file = ROOT.TFile.Open(input_file_path, "READ")
        print("Input file accessed successfully. Output file generated.")
    
    print("Accessing input tree...")
    input_tree = input_file.Get(input_tree_name)
    print("Input tree accessed successfully.")
    
    # Setup Arrays
    X_values_A  = []  # Array of arrays of inputs corresponding to pT_true as PYTHIA jet pT
    y_values_A  = []  # Array of targets for regression, pT_true is PYTHIA jet pT
    
    X_values_B  = []  # Array of arrays of inputs corresponding to pT_true as jet pT * PYTHIA pT / const. pT
    y_values_B  = []  # Array of targets for regression, pT_true is jet pT * PYTHIA pT / const. pT

    # Predictors
    jet_pt_raw       = None  # Raw/uncorrected jet pt
    jet_pt_corr      = None  # Corrected jet pt
    jet_mass         = None
    jet_area         = None
    jet_area_err     = None
    jet_const_n      = None
    const_pt_mean    = None  # Mean pt of jet constituents
    const_pt_median  = None  # Mean pt of jet constituents
    const_1_pt       = None  # pt of jet constituent particle 1
    const_2_pt       = None  # pt of jet constituent particle 2
    const_3_pt       = None  # pt of jet constituent particle 3
    const_4_pt       = None  # pt of jet constituent particle 4
    const_5_pt       = None  # pt of jet constituent particle 5
    const_6_pt       = None  # pt of jet constituent particle 6
    const_7_pt       = None  # pt of jet constituent particle 7
    const_8_pt       = None  # pt of jet constituent particle 8
    const_9_pt       = None  # pt of jet constituent particle 9
    const_10_pt      = None  # pt of jet constituent particle 10
    jet_y            = None
    jet_phi          = None
    jet_rho          = None

    # Targets
    jet_pt_true_A    = None  # True jet pt (determined from PYTHIA jets)
    jet_pt_true_B    = None

    # Helper Variables
    event_counter    = 0
    event_n_total    = 20000
    jet_n            = None  # Number of jets in an event
    jet_n_counter_A  = 0
    jet_n_counter_B  = 0
    jet_const_pt_arr = []    # Array of jet constituents and their values
    sc_correction_arr_A = []
    sc_correction_arr_B = []
    
    print("Preparing to collect data from tree...")

    # Collecting from TTree
    for event in input_tree :  
        jet_n = event.jet_n

        for jet in range(0, jet_n) :
            jet_pt_raw      = input_tree.jet_pt_raw[jet]
            jet_pt_corr     = input_tree.jet_pt_corr[jet]
            jet_mass        = input_tree.jet_mass[jet]
            jet_area        = input_tree.jet_area[jet]
            jet_area_err    = input_tree.jet_area_err[jet]
            jet_const_n     = input_tree.jet_const_n[jet]
            const_pt_mean   = input_tree.const_pt_mean[jet]
            const_pt_median = input_tree.const_pt_median[jet]
            const_1_pt      = input_tree.const_1_pt[jet]
            const_2_pt      = input_tree.const_2_pt[jet]
            const_3_pt      = input_tree.const_3_pt[jet]
            const_4_pt      = input_tree.const_4_pt[jet]
            const_5_pt      = input_tree.const_5_pt[jet]
            const_6_pt      = input_tree.const_6_pt[jet]
            const_7_pt      = input_tree.const_7_pt[jet]
            const_8_pt      = input_tree.const_8_pt[jet]
            const_9_pt      = input_tree.const_9_pt[jet]
            const_10_pt     = input_tree.const_10_pt[jet]
            jet_y           = input_tree.jet_y[jet]
            jet_phi         = input_tree.jet_phi[jet]
            jet_rho         = input_tree.jet_rho[jet]
            
            jet_pt_true_A   = input_tree.jet_pt_true_pythia[jet]
            jet_pt_true_B   = input_tree.jet_pt_true_paper[jet]
            
            temp_jet_arr = [
                    jet_pt_raw,      jet_pt_corr,     jet_mass,        jet_area, 
                    jet_area_err,    jet_const_n,     const_pt_mean,   const_pt_median, 
                    const_1_pt,      const_2_pt,      const_3_pt,      const_4_pt,
                    const_5_pt,      const_6_pt,      const_7_pt,      const_8_pt,
                    const_9_pt,      const_10_pt,     jet_y,           jet_phi,
                    jet_rho]
            
            if (jet_pt_true_A != 0.0) and (jet_pt_true_A > pt_true_min) and (jet_pt_true_A < pt_true_max) :
                X_values_A.append(temp_jet_arr)

                y_values_A.append(jet_pt_true_A)
                
                sc_correction_arr_A.append(jet_pt_corr)
                
                jet_n_counter_A   = jet_n_counter_A + 1
                
                if event_counter % 1000 == 0 : print(f"Event: {event_counter:3.0f} | Jet: {jet:2.0f} | pTraw: {jet_pt_raw:3.3f} | pTcorr: {jet_pt_corr: 3.3f} | pTtrue_A: {jet_pt_true_A: 5.3f}")
            
            if (jet_pt_true_B != 0.0) and (jet_pt_true_B > pt_true_min) and (jet_pt_true_B < pt_true_max) :
                X_values_B.append(temp_jet_arr)
                
                y_values_B.append(jet_pt_true_B)

                sc_correction_arr_B.append(jet_pt_corr)
                
                jet_n_counter_B   = jet_n_counter_B + 1

                if event_counter % 1000 == 0 : print(f"Event: {event_counter:3.0f} | Jet: {jet:2.0f} | pTraw: {jet_pt_raw:3.3f} | pTcorr: {jet_pt_corr: 3.3f} | pTtrue_B: {jet_pt_true_B: 5.3f}")
            
        event_counter += 1

    print(f"All data transferred to array. Testing with {jet_n_counter_A} A-jets and {jet_n_counter_B} B-jets.\n")
    print(f"Training set A: {len(X_values_A)} / {len(y_values_A)} / {len(sc_correction_arr_A)}")
    print(f"Training set B: {len(X_values_B)} / {len(y_values_B)} / {len(sc_correction_arr_B)}")

    input_file.Close()
    print("Input file closed.")
    
    return X_values_A, y_values_A, sc_correction_arr_A, X_values_B, y_values_B, sc_correction_arr_B

    

def Build_ML_Feature_Arrays_ptCorr(
    input_file_path, input_tree_name, pt_corr_min, pt_corr_max) :
    """
    WARNING: CODE MAY BE OUTDATED
    Creates an array of all features exported from ROOT.
    Applies a cut using pT_Corrected values between pt_corr_min and pt_corr_max.
    This was used once for an alternative selection method and may no longer work.
    """

    input_file = None;
    if (ROOT.gSystem.AccessPathName(input_file_path)) :
        print("Input file path does not exist:", input_file)
        exit()
    else :
        input_file = ROOT.TFile.Open(input_file_path, "READ")
        print("Input file accessed successfully. Output file generated.")
    
    print("Accessing input tree...")
    input_tree = input_file.Get(input_tree_name)
    print("Input tree accessed successfully.")

    # Setup Arrays
    X_values_A  = []  # Array of arrays of inputs corresponding to pT_true as PYTHIA jet pT
    y_values_A  = []  # Array of targets for regression, pT_true is PYTHIA jet pT
    
    X_values_B  = []  # Array of arrays of inputs corresponding to pT_true as jet pT * PYTHIA pT / const. pT
    y_values_B  = []  # Array of targets for regression, pT_true is jet pT * PYTHIA pT / const. pT

    # Predictors
    jet_pt_raw       = None  # Raw/uncorrected jet pt
    jet_pt_corr      = None  # Corrected jet pt
    jet_mass         = None
    jet_area         = None
    jet_area_err     = None
    jet_const_n      = None
    const_pt_mean    = None  # Mean pt of jet constituents
    const_pt_median  = None  # Mean pt of jet constituents
    const_1_pt       = None  # pt of jet constituent particle 1
    const_2_pt       = None  # pt of jet constituent particle 2
    const_3_pt       = None  # pt of jet constituent particle 3
    const_4_pt       = None  # pt of jet constituent particle 4
    const_5_pt       = None  # pt of jet constituent particle 5
    const_6_pt       = None  # pt of jet constituent particle 6
    const_7_pt       = None  # pt of jet constituent particle 7
    const_8_pt       = None  # pt of jet constituent particle 8
    const_9_pt       = None  # pt of jet constituent particle 9
    const_10_pt      = None  # pt of jet constituent particle 10
    jet_y            = None
    jet_phi          = None
    jet_rho          = None
    

    # Targets
    jet_pt_true_A    = None  # True jet pt (determined from PYTHIA jets)
    jet_pt_true_B    = None

    # Helper Variables
    event_n          = 0
    event_n_total    = 20000
    jet_n            = None  # Number of jets in an event
    jet_n_counter_A  = 0
    jet_n_counter_B  = 0
    jet_const_pt_arr = []    # Array of jet constituents and their values
    sc_correction_arr_A = []
    sc_correction_arr_B = []

    print("Preparing to collect data from tree...")
    
    # Collecting from TTree
    for event in input_tree :  
        jet_n = event.jet_n
        
        for jet in range(0, jet_n) :
            jet_pt_raw      = input_tree.jet_pt_raw[jet]
            jet_pt_corr     = input_tree.jet_pt_corr[jet]
            jet_mass        = input_tree.jet_mass[jet]
            jet_area        = input_tree.jet_area[jet]
            jet_area_err    = input_tree.jet_area_err[jet]
            jet_const_n     = input_tree.jet_const_n[jet]
            const_pt_mean   = input_tree.const_pt_mean[jet]
            const_pt_median = input_tree.const_pt_median[jet]
            const_1_pt      = input_tree.const_1_pt[jet]
            const_2_pt      = input_tree.const_2_pt[jet]
            const_3_pt      = input_tree.const_3_pt[jet]
            const_4_pt      = input_tree.const_4_pt[jet]
            const_5_pt      = input_tree.const_5_pt[jet]
            const_6_pt      = input_tree.const_6_pt[jet]
            const_7_pt      = input_tree.const_7_pt[jet]
            const_8_pt      = input_tree.const_8_pt[jet]
            const_9_pt      = input_tree.const_9_pt[jet]
            const_10_pt     = input_tree.const_10_pt[jet]
            jet_y           = input_tree.jet_y[jet]
            jet_phi         = input_tree.jet_phi[jet]
            jet_rho         = input_tree.jet_rho[jet]
            
            jet_pt_true_A   = input_tree.jet_pt_true_pythia[jet]
            jet_pt_true_B   = input_tree.jet_pt_true_paper[jet]
            
            temp_jet_arr = [
                    jet_pt_raw,      jet_pt_corr,     jet_mass,        jet_area, 
                    jet_area_err,    jet_const_n,     const_pt_mean,   const_pt_median, 
                    const_1_pt,      const_2_pt,      const_3_pt,      const_4_pt,
                    const_5_pt,      const_6_pt,      const_7_pt,      const_8_pt,
                    const_9_pt,      const_10_pt,     jet_y,           jet_phi,
                    jet_rho]
            
            if (jet_pt_true_A != 0.0) and (jet_pt_corr > pt_corr_min) and (jet_pt_corr < pt_corr_max) :
                
                X_values_A.append(temp_jet_arr)

                y_values_A.append(jet_pt_true_A)
                
                sc_correction_arr_A.append(jet_pt_corr)
                
                jet_n_counter_A   = jet_n_counter_A + 1
                
                if jet_n_counter_A % 10 == 0 :
                    print(f"Event: {event_n:3.0f} | Jet: {jet:2.0f} | pTraw: {jet_pt_raw:3.3f} | pTcorr: {jet_pt_corr: 3.3f} | pTtrue_A: {jet_pt_true_A: 5.3f}")
            
            if (jet_pt_true_B != 0.0) and (jet_pt_corr > pt_corr_min) and (jet_pt_corr < pt_corr_max) :
                X_values_B.append(temp_jet_arr)
                
                y_values_B.append(jet_pt_true_B)

                sc_correction_arr_B.append(jet_pt_corr)
                
                jet_n_counter_B   = jet_n_counter_B + 1

                if jet_n_counter_B % 10 == 0 :
                    print(f"Event: {event_n:3.0f} | Jet: {jet:2.0f} | pTraw: {jet_pt_raw:3.3f} | pTcorr: {jet_pt_corr: 3.3f} | pTtrue_B: {jet_pt_true_A: 5.3f}")

        event_n = event_n + 1

    print(f"All data transferred to array. Testing with {jet_n_counter_A} A-jets and {jet_n_counter_B} B-jets.\n")
    print(f"Training set A: {len(X_values_A)} / {len(y_values_A)} / {len(sc_correction_arr_A)}")
    print(f"Training set B: {len(X_values_B)} / {len(y_values_B)} / {len(sc_correction_arr_B)}")

    input_file.Close()
    print("Input file closed.")
    
    return X_values_A, y_values_A, sc_correction_arr_A, X_values_B, y_values_B, sc_correction_arr_B
    
    
    
def Train_ML_pt_Estimators(
    X_train, # Array of array of input features
    y_train, # Array of target values
    features_labels):
    """
    Function for training Machine Learning Estimators.
    Takes in feature array (X_train) - this is actually an array of arrays, 
    a target array (y_train), and an array of feature labels (feature_arr_labels).
    
    Note that X_train and y_train MUST be the same length, 
    and feature_arr_lables should be 1 longer than the number of features used.
    """
    
    # --- LINEAR REGRESSION ---
    
    print("\n----- Fitting Linear Regression Estimator -----\n")

    # Creates a linear regression model in a pipeline
    lr_estimator = LinearRegression()
    lr_pipeline = make_pipeline(
#         StandardScaler,
        lr_estimator )

    # Fits the regression model
    output = lr_pipeline.fit(X_train, y_train)
    print("\nLinear Regression Fit:\n", output)

    # Outputs regression coefficients
    output = lr_estimator.coef_
    print("Regression Coefficients:\n", output)

    # --- RANDOM FOREST REGRESSION ---
    
    print("\n----- Fitting Random Forest Regression Estimator -----\n")

    # Creates a random forest estimator in a pipeline
    rf_estimator = RandomForestRegressor()
    rf_pipeline = make_pipeline(
#         StandardScaler,
        rf_estimator )

    # Fits the regression model
    output = rf_pipeline.fit(X_train, y_train)
    print("\nRandom Tree Regression Fit:\n", output)

    # Outputs regression coefficients
    features_arr = rf_estimator.feature_importances_
    
    print(features_labels[0])
    for i in range(len(features_arr)) :
        print(features_labels[i+1], features_arr[i])
        
    # --- MULTILAYER PERCEPTRON REGRESSION ---
    
    print("\n----- Fitting Neural Network Regression Estimator -----\n")

    # Creates a MLP estimator in a pipeline
    nn_estimator = MLPRegressor(max_iter=1000)
    nn_pipeline = make_pipeline(
#         StandardScaler,
        nn_estimator )

    # Fits the regression model
    output = nn_pipeline.fit(X_train, y_train)
    print("\nMultilayer Perceptron Regression Fit:\n", output)
    
    return lr_pipeline, rf_pipeline, nn_pipeline, features_arr
    
    

def Test_ML_pt_Estimators(
    X_test, # Array of arrays of input features
    y_test, # Array of target values for pT
    sc_correction_arr, # Array of simple correction values to compare against
    lr_pipeline, rf_pipeline, nn_pipeline, features_arr, # Estimators and their feature array
    file_prefix,
    output_file_path,
    x_bins, x_min, x_max,
    bool_features,
    bool_compare,
    X_values) :
    """
    Function for testing the Machine Learning estimators on separate testing data sets.
    
    Note: The estimators used for this MUST be trained with the same features used to test!
    You cannot change (increase, decrease, or swap) the estimators between training and testing.
    Note: The number of datapoints for testing CAN be different between training and testing.
    For example, you can train with 100k points and test 500k points.
    """
    
    output_file = ROOT.TFile.Open(output_file_path, "UPDATE")
    output_tree = ROOT.TTree("Tree_ML_" + file_prefix[5:10], "TTree of data from machine learning")
    
    min_GeV = file_prefix[5:7]
    max_GeV = file_prefix[8:10]
    
    # --- LINEAR REGRESSION ---
    
    print("\n----- Testing Linear Regression Estimator -----\n")

    # Outputs mean square error
    output = np.mean((lr_pipeline.predict(X_test) - y_test)**2)
    print("Mean Square Error:\n", output)

    # Outputs the variance score
    output = lr_pipeline.score(X_test, y_test)
    print("Variance Score:\n", output)

    # --- RANDOM FOREST REGRESSION ---
    
    print("\n----- Testing Random Forest Regression Estimator -----\n")

    # Outputs mean square error
    output = np.mean((rf_pipeline.predict(X_test) - y_test)**2)
    print("Mean Square Error:\n", output)

    # Outputs the variance score
    output = rf_pipeline.score(X_test, y_test)
    print("Variance Score:\n", output)

    # --- MULTILAYER PERCEPTRON REGRESSION ---
    
    print("\n----- Testing Neural Network Regression Estimator -----\n")

    # Outputs mean square error
    output = np.mean((nn_pipeline.predict(X_test) - y_test)**2)
    print("Mean Square Error:\n", output)

    # Outputs the variance score
    output = nn_pipeline.score(X_test, y_test)
    print("Variance Score:\n", output)
    
    # --- GENERATE HISTOGRAMS ---
    
    th1d_data_feature_importance = ROOT.TH1D(
        "th1d_" + file_prefix + "_feature_importance","", len(features_arr), 0, 1)
    
    name_simple_correction = "th1d_" + file_prefix + "_simple_correction"
    name_linear_regression = "th1d_" + file_prefix + "_linear_regression"
    name_random_forest     = "th1d_" + file_prefix + "_random_forest"
    name_neural_network    = "th1d_" + file_prefix + "_neural_network"
    title_infix            = min_GeV + " GeV to " + max_GeV + " GeV"
    
    th1d_simple_correction = ROOT.TH1D(
        name_simple_correction,
        "Jet p_{T} Delta for " + title_infix + ", Background Subtraction; (p_{T, reco} - p_{T, true})/p_{T, true} [GeV]; N_{ch jets}",
        x_bins, x_min, x_max)
    th1d_linear_regression = ROOT.TH1D(
        name_linear_regression,
        "Jet p_{T} Delta for " + title_infix + ", Linear Regression; (p_{T, reco} - p_{T, true})/p_{T, true} [GeV]; N_{ch jets}",
        x_bins, x_min, x_max)
    th1d_random_forest = ROOT.TH1D(
        name_random_forest,
        "Jet p_{T} Delta for " + title_infix + ", Random Forest; (p_{T, reco} - p_{T, true})/p_{T, true} [GeV]; N_{ch jets}",
        x_bins, x_min, x_max)
    th1d_neural_network = ROOT.TH1D(
        name_neural_network,
        "Jet p_{T} Delta for " + title_infix + ", Neural Network; (p_{T, reco} - p_{T, true})/p_{T, true} [GeV]; N_{ch jets}",
        x_bins, x_min, x_max)

    th1d_simple_correction.SetDirectory(0)
    th1d_linear_regression.SetDirectory(0)
    th1d_random_forest.SetDirectory(0)
    th1d_neural_network.SetDirectory(0)

    th1d_simple_correction.Sumw2()
    th1d_linear_regression.Sumw2()
    th1d_random_forest.Sumw2()
    th1d_neural_network.Sumw2()
        
    for i in range(len(features_arr)): th1d_data_feature_importance.SetBinContent(i+1, features_arr[i])
    
    lr_prediction_arr = lr_pipeline.predict(X_test)
    rf_prediction_arr = rf_pipeline.predict(X_test)
    nn_prediction_arr = nn_pipeline.predict(X_test)

    output_file.cd()
    
    for i in range(len(X_test)) :
        
        # Normal
        sc_correction = sc_correction_arr[i]
        lr_prediction = lr_prediction_arr[i]
        rf_prediction = rf_prediction_arr[i]
        nn_prediction = nn_prediction_arr[i]

        target = y_test[i]
        
        sc_delta = sc_correction - target
        lr_delta = lr_prediction - target
        rf_delta = rf_prediction - target
        nn_delta = nn_prediction - target
        
        if bool_compare:
            th1d_simple_correction.Fill( sc_delta )
            th1d_linear_regression.Fill( lr_delta )
            th1d_random_forest.Fill( rf_delta )
            th1d_neural_network.Fill( nn_delta )
        else:
            if target != 0 :
                th1d_simple_correction.Fill( sc_delta / target )
                th1d_linear_regression.Fill( lr_delta / target )
                th1d_random_forest.Fill( rf_delta / target )
                th1d_neural_network.Fill( nn_delta / target )
        
        if i % 100 == 0 :
            print(f"Test {i:4.0f}: True: {y_test[i]:3.3f} , ", end="")
            print(f"Pred(line): {lr_prediction:4.3f}({lr_delta: 4.3f}) , ", end="")
            print(f"Pred(tree): {rf_prediction:4.3f}({rf_delta: 4.3f}) , ", end="")
            print(f"Pred(perc): {nn_prediction:4.3f}({nn_delta: 4.3f})")
    
    th1d_simple_correction.Write("", ROOT.TObject.kOverwrite)
    th1d_linear_regression.Write("", ROOT.TObject.kOverwrite)
    th1d_random_forest.Write("", ROOT.TObject.kOverwrite)
    th1d_neural_network.Write("", ROOT.TObject.kOverwrite)
    th1d_data_feature_importance.Write("", ROOT.TObject.kOverwrite)
    
    output_file.Write()
    print("Output file written to.")

    output_file.Close()
    print("Output file closed.")
    
    output_csv  = open((output_file_path[:-5] + "_" + file_prefix + ".csv"), 'w', newline='')
    csv_writer  = csv.writer(output_csv)
    csv_header = ['Jet Area', 'Jet pT Raw', 'Jet pT Corrected', 
                'Jet pT True', 'Jet pT ML-LR', 'Jet pT ML-RF', 'Jet pT ML-NN']
    
    o_jet_area     = 0.
    o_jet_pt_raw   = 0.
    o_jet_pt_corr  = 0.
    o_jet_pt_true  = 0.
    o_jet_pt_ml_lr = 0.
    o_jet_pt_ml_rf = 0.
    o_jet_pt_ml_nn = 0.
    
    for i in range(len(X_test)) :
        
        # Normal
        sc_correction = sc_correction_arr[i]
        lr_prediction = lr_prediction_arr[i]
        rf_prediction = rf_prediction_arr[i]
        nn_prediction = nn_prediction_arr[i]

        target = y_test[i]
        
        sc_delta = sc_correction - target
        lr_delta = lr_prediction - target
        rf_delta = rf_prediction - target
        nn_delta = nn_prediction - target
        
        # Adds data to output CSV
        o_jet_area     = X_values[i][3]
        o_jet_pt_raw   = X_values[i][0]
        o_jet_pt_true  = target
        o_jet_pt_corr  = sc_correction
        o_jet_pt_ml_lr = lr_prediction
        o_jet_pt_ml_rf = rf_prediction
        o_jet_pt_ml_nn = nn_prediction
        
        csv_row = [o_jet_area, o_jet_pt_raw, o_jet_pt_true, 
                   o_jet_pt_corr, o_jet_pt_ml_lr, o_jet_pt_ml_rf, o_jet_pt_ml_nn]
        
        csv_writer.writerow(csv_row)
        
        if i % 100 == 0 :
            print(f"Test {i:4.0f}: True: {y_test[i]:3.3f} , ", end="")
            print(f"Pred(line): {lr_prediction:4.3f}({lr_delta: 4.3f}) , ", end="")
            print(f"Pred(tree): {rf_prediction:4.3f}({rf_delta: 4.3f}) , ", end="")
            print(f"Pred(perc): {nn_prediction:4.3f}({nn_delta: 4.3f})")
    
    output_csv.close()
    
    print("Predictions and histogram filling complete.")
    
    return


now = datetime.now()
dt_string = now.strftime("%Y/%m/%d %H:%M:%S")

print("Ready!", dt_string)

Welcome to JupyROOT 6.26/04
Ready! 2022/10/07 13:53:15


## Data Preparation
Sets up input directories/files for training and testing. Creates initial training data arrays.

In [3]:
train_directory    = "../Files/ALICE_kinematics_trial2/Data/"
train_file_name    = "ML_Prep_10_90_Train.root"
train_tree_name    = "Tree_10_90_Train"
train_file_path    = train_directory + train_file_name

test_directory     = "../Files/ALICE_kinematics_trial2/Data/"
test_file_names    = ["ML_Prep_20_40_Test.root", "ML_Prep_40_60_Test.root", "ML_Prep_60_80_Test.root"]
# test_file_names    = ["ML_Prep_40_60_Test.root"]
test_tree_names    = ["Tree_20_40_Test", "Tree_40_60_Test", "Tree_60_80_Test"]
# test_tree_names    = ["Tree_40_60_Test"]
test_file_paths    = [test_directory + test_file_names[i] for i in range(len(test_file_names))]

output_directory   = "../Files/ALICE_kinematics_trial2/Data/"
output_file_name   = "ML_Results_10_90.root"
output_file_path   = output_directory + output_file_name

X_train_A, y_train_A, sc_corr_train_arr_A, X_train_B, y_train_B, sc_corr_train_arr_B = Build_ML_Feature_Arrays_ptTrue(
    train_file_path, train_tree_name, 10.0, 90.0)

now = datetime.now()
dt_string = now.strftime("%Y/%m/%d %H:%M:%S")

print("Ready!", dt_string)

Input file accessed successfully. Output file generated.
Accessing input tree...
Input tree accessed successfully.
Preparing to collect data from tree...
Event:   0 | Jet:  2 | pTraw: 75.736 | pTcorr:  18.050 | pTtrue_A:  10.595
Event: 1000 | Jet:  1 | pTraw: 72.102 | pTcorr:  22.519 | pTtrue_A:  11.410
Event: 2000 | Jet:  2 | pTraw: 134.037 | pTcorr:  83.440 | pTtrue_A:  84.135
Event: 2000 | Jet:  2 | pTraw: 134.037 | pTcorr:  83.440 | pTtrue_B:  83.734
Event: 2000 | Jet:  3 | pTraw: 73.835 | pTcorr:  14.627 | pTtrue_A:  10.835
Event: 2000 | Jet:  7 | pTraw: 55.182 | pTcorr:  13.198 | pTtrue_A:  10.341
Event: 2000 | Jet:  7 | pTraw: 55.182 | pTcorr:  13.198 | pTtrue_B:  10.303
Event: 3000 | Jet:  2 | pTraw: 85.841 | pTcorr:  42.181 | pTtrue_A:  56.269
Event: 3000 | Jet:  2 | pTraw: 85.841 | pTcorr:  42.181 | pTtrue_B:  56.026
Event: 4000 | Jet:  1 | pTraw: 133.919 | pTcorr:  70.242 | pTtrue_A:  81.457
Event: 4000 | Jet:  1 | pTraw: 133.919 | pTcorr:  70.242 | pTtrue_B:  33.882
Event: 

Event: 34000 | Jet:  0 | pTraw: 154.200 | pTcorr:  96.654 | pTtrue_A:  86.432
Event: 34000 | Jet:  0 | pTraw: 154.200 | pTcorr:  96.654 | pTtrue_B:  85.532
Event: 34000 | Jet:  1 | pTraw: 121.597 | pTcorr:  71.557 | pTtrue_A:  74.278
Event: 34000 | Jet:  2 | pTraw: 102.405 | pTcorr:  46.111 | pTtrue_A:  29.878
Event: 34000 | Jet:  2 | pTraw: 102.405 | pTcorr:  46.111 | pTtrue_B:  29.575
Event: 35000 | Jet:  2 | pTraw: 117.852 | pTcorr:  65.578 | pTtrue_A:  59.556
Event: 35000 | Jet:  2 | pTraw: 117.852 | pTcorr:  65.578 | pTtrue_B:  48.972
Event: 36000 | Jet:  2 | pTraw: 66.533 | pTcorr:  9.411 | pTtrue_A:  14.272
Event: 36000 | Jet:  2 | pTraw: 66.533 | pTcorr:  9.411 | pTtrue_B:  14.093
Event: 37000 | Jet:  2 | pTraw: 93.129 | pTcorr:  60.285 | pTtrue_A:  58.114
Event: 37000 | Jet:  2 | pTraw: 93.129 | pTcorr:  60.285 | pTtrue_B:  57.307
Event: 37000 | Jet:  3 | pTraw: 86.581 | pTcorr:  36.349 | pTtrue_A:  39.642
Event: 37000 | Jet:  4 | pTraw: 82.337 | pTcorr:  39.833 | pTtrue_A:  3

Event: 73000 | Jet:  1 | pTraw: 100.355 | pTcorr:  58.035 | pTtrue_A:  50.939
Event: 74000 | Jet:  1 | pTraw: 151.762 | pTcorr:  80.974 | pTtrue_A:  77.382
Event: 74000 | Jet: 12 | pTraw: 60.242 | pTcorr:  5.184 | pTtrue_A:  17.441
Event: 74000 | Jet: 12 | pTraw: 60.242 | pTcorr:  5.184 | pTtrue_B:  17.152
Event: 75000 | Jet:  2 | pTraw: 110.330 | pTcorr:  56.411 | pTtrue_A:  56.898
Event: 75000 | Jet:  2 | pTraw: 110.330 | pTcorr:  56.411 | pTtrue_B:  56.225
Event: 75000 | Jet:  3 | pTraw: 78.479 | pTcorr:  35.344 | pTtrue_A:  35.189
Event: 76000 | Jet:  2 | pTraw: 115.244 | pTcorr:  56.367 | pTtrue_A:  50.158
Event: 76000 | Jet:  2 | pTraw: 115.244 | pTcorr:  56.367 | pTtrue_B:  49.875
Event: 76000 | Jet:  3 | pTraw: 74.168 | pTcorr:  21.179 | pTtrue_A:  20.338
Event: 77000 | Jet:  1 | pTraw: 72.163 | pTcorr:  28.929 | pTtrue_A:  20.574
Event: 78000 | Jet:  2 | pTraw: 152.420 | pTcorr:  91.786 | pTtrue_B:  46.548
Event: 78000 | Jet:  3 | pTraw: 104.747 | pTcorr:  52.436 | pTtrue_A:  

Event: 111000 | Jet:  0 | pTraw: 73.314 | pTcorr:  27.498 | pTtrue_A:  35.838
Event: 111000 | Jet:  0 | pTraw: 73.314 | pTcorr:  27.498 | pTtrue_B:  34.342
Event: 112000 | Jet:  1 | pTraw: 110.289 | pTcorr:  45.867 | pTtrue_A:  50.411
Event: 112000 | Jet:  4 | pTraw: 68.456 | pTcorr:  14.770 | pTtrue_A:  19.935
Event: 112000 | Jet:  4 | pTraw: 68.456 | pTcorr:  14.770 | pTtrue_B:  19.673
Event: 113000 | Jet:  3 | pTraw: 63.104 | pTcorr:  6.227 | pTtrue_A:  12.468
Event: 113000 | Jet:  3 | pTraw: 63.104 | pTcorr:  6.227 | pTtrue_B:  11.982
Event: 114000 | Jet:  1 | pTraw: 125.273 | pTcorr:  78.173 | pTtrue_A:  74.766
Event: 114000 | Jet:  2 | pTraw: 92.668 | pTcorr:  39.145 | pTtrue_A:  38.674
Event: 114000 | Jet:  2 | pTraw: 92.668 | pTcorr:  39.145 | pTtrue_B:  37.760
Event: 115000 | Jet:  3 | pTraw: 68.359 | pTcorr:  21.872 | pTtrue_A:  30.444
Event: 115000 | Jet:  3 | pTraw: 68.359 | pTcorr:  21.872 | pTtrue_B:  30.333
Event: 115000 | Jet: 23 | pTraw: 18.313 | pTcorr:  9.218 | pTtru

Event: 147000 | Jet:  1 | pTraw: 114.157 | pTcorr:  56.707 | pTtrue_A:  48.405
Event: 147000 | Jet:  6 | pTraw: 66.911 | pTcorr:  20.702 | pTtrue_A:  15.661
Event: 147000 | Jet:  6 | pTraw: 66.911 | pTcorr:  20.702 | pTtrue_B:  15.461
Event: 148000 | Jet:  2 | pTraw: 72.972 | pTcorr:  15.477 | pTtrue_A:  14.388
Event: 148000 | Jet:  2 | pTraw: 72.972 | pTcorr:  15.477 | pTtrue_B:  14.215
Event: 149000 | Jet:  0 | pTraw: 130.609 | pTcorr:  80.150 | pTtrue_A:  77.103
Event: 149000 | Jet:  0 | pTraw: 130.609 | pTcorr:  80.150 | pTtrue_B:  76.426
Event: 149000 | Jet:  1 | pTraw: 97.563 | pTcorr:  39.255 | pTtrue_A:  47.021
Event: 149000 | Jet:  3 | pTraw: 69.625 | pTcorr:  15.802 | pTtrue_A:  26.657
Event: 150000 | Jet:  0 | pTraw: 127.107 | pTcorr:  80.030 | pTtrue_A:  64.951
Event: 150000 | Jet:  2 | pTraw: 80.125 | pTcorr:  37.142 | pTtrue_A:  45.834
Event: 150000 | Jet:  2 | pTraw: 80.125 | pTcorr:  37.142 | pTtrue_B:  44.544
Event: 150000 | Jet: 17 | pTraw: 31.486 | pTcorr:  14.088 | 

Event: 188000 | Jet:  2 | pTraw: 99.921 | pTcorr:  44.058 | pTtrue_A:  32.594
Event: 188000 | Jet:  2 | pTraw: 99.921 | pTcorr:  44.058 | pTtrue_B:  32.392
Event: 188000 | Jet:  3 | pTraw: 82.358 | pTcorr:  24.066 | pTtrue_A:  17.613
Event: 188000 | Jet:  3 | pTraw: 82.358 | pTcorr:  24.066 | pTtrue_B:  17.400
Event: 188000 | Jet:  5 | pTraw: 81.668 | pTcorr:  30.662 | pTtrue_A:  28.174
Event: 188000 | Jet:  7 | pTraw: 64.336 | pTcorr:  7.258 | pTtrue_A:  15.699
Event: 189000 | Jet:  0 | pTraw: 116.865 | pTcorr:  62.122 | pTtrue_A:  46.025
Event: 189000 | Jet:  1 | pTraw: 112.932 | pTcorr:  59.354 | pTtrue_A:  48.358
Event: 189000 | Jet:  1 | pTraw: 112.932 | pTcorr:  59.354 | pTtrue_B:  47.882
Event: 190000 | Jet:  1 | pTraw: 81.384 | pTcorr:  22.497 | pTtrue_A:  26.920
Event: 190000 | Jet: 21 | pTraw: 23.759 | pTcorr:  5.221 | pTtrue_A:  15.578
Event: 191000 | Jet:  1 | pTraw: 98.448 | pTcorr:  49.117 | pTtrue_A:  48.300
Event: 191000 | Jet:  2 | pTraw: 88.190 | pTcorr:  24.163 | pTt

Event: 230000 | Jet:  3 | pTraw: 71.711 | pTcorr: -0.252 | pTtrue_A:  13.586
Event: 231000 | Jet:  2 | pTraw: 81.452 | pTcorr:  27.758 | pTtrue_A:  29.085
Event: 231000 | Jet:  2 | pTraw: 81.452 | pTcorr:  27.758 | pTtrue_B:  28.814
Event: 232000 | Jet:  0 | pTraw: 137.307 | pTcorr:  70.838 | pTtrue_A:  83.472
Event: 232000 | Jet:  0 | pTraw: 137.307 | pTcorr:  70.838 | pTtrue_B:  80.237
Event: 233000 | Jet:  0 | pTraw: 111.527 | pTcorr:  73.554 | pTtrue_A:  69.740
Event: 233000 | Jet:  1 | pTraw: 108.669 | pTcorr:  84.645 | pTtrue_A:  88.592
Event: 233000 | Jet:  1 | pTraw: 108.669 | pTcorr:  84.645 | pTtrue_B:  87.987
Event: 234000 | Jet:  2 | pTraw: 80.360 | pTcorr:  29.136 | pTtrue_A:  24.716
Event: 234000 | Jet:  2 | pTraw: 80.360 | pTcorr:  29.136 | pTtrue_B:  23.252
Event: 235000 | Jet:  0 | pTraw: 134.996 | pTcorr:  75.799 | pTtrue_A:  80.762
Event: 235000 | Jet:  0 | pTraw: 134.996 | pTcorr:  75.799 | pTtrue_B:  80.094
Event: 235000 | Jet:  1 | pTraw: 100.149 | pTcorr:  59.940

Event: 269000 | Jet:  3 | pTraw: 86.686 | pTcorr:  15.696 | pTtrue_A:  20.456
Event: 269000 | Jet:  3 | pTraw: 86.686 | pTcorr:  15.696 | pTtrue_B:  20.269
Event: 270000 | Jet:  2 | pTraw: 76.702 | pTcorr:  27.563 | pTtrue_A:  18.125
Event: 270000 | Jet:  2 | pTraw: 76.702 | pTcorr:  27.563 | pTtrue_B:  17.967
Event: 271000 | Jet:  2 | pTraw: 79.931 | pTcorr:  22.561 | pTtrue_A:  34.329
Event: 271000 | Jet:  2 | pTraw: 79.931 | pTcorr:  22.561 | pTtrue_B:  34.080
Event: 271000 | Jet:  6 | pTraw: 55.562 | pTcorr:  14.881 | pTtrue_A:  20.426
Event: 272000 | Jet:  1 | pTraw: 62.448 | pTcorr:  14.033 | pTtrue_A:  12.453
Event: 273000 | Jet: 14 | pTraw: 50.812 | pTcorr: -2.135 | pTtrue_A:  14.178
Event: 273000 | Jet: 17 | pTraw: 41.672 | pTcorr: -4.657 | pTtrue_A:  16.225
Event: 273000 | Jet: 17 | pTraw: 41.672 | pTcorr: -4.657 | pTtrue_B:  11.693
Event: 274000 | Jet:  2 | pTraw: 101.269 | pTcorr:  55.363 | pTtrue_A:  35.893
Event: 274000 | Jet:  2 | pTraw: 101.269 | pTcorr:  55.363 | pTtru

Event: 306000 | Jet:  0 | pTraw: 129.544 | pTcorr:  72.838 | pTtrue_A:  74.984
Event: 306000 | Jet:  0 | pTraw: 129.544 | pTcorr:  72.838 | pTtrue_B:  73.779
Event: 306000 | Jet:  1 | pTraw: 102.943 | pTcorr:  46.237 | pTtrue_A:  52.632
Event: 307000 | Jet:  1 | pTraw: 88.030 | pTcorr:  36.991 | pTtrue_A:  31.514
Event: 307000 | Jet:  6 | pTraw: 54.406 | pTcorr:  6.491 | pTtrue_A:  12.328
Event: 307000 | Jet: 15 | pTraw: 40.338 | pTcorr:  8.047 | pTtrue_A:  14.776
Event: 307000 | Jet: 15 | pTraw: 40.338 | pTcorr:  8.047 | pTtrue_B:  14.292
Event: 308000 | Jet:  2 | pTraw: 93.401 | pTcorr:  50.532 | pTtrue_A:  44.502
Event: 308000 | Jet:  2 | pTraw: 93.401 | pTcorr:  50.532 | pTtrue_B:  17.942
Event: 309000 | Jet:  2 | pTraw: 83.088 | pTcorr:  28.376 | pTtrue_A:  11.153
Event: 309000 | Jet:  6 | pTraw: 75.922 | pTcorr:  25.588 | pTtrue_A:  14.985
Event: 309000 | Jet:  6 | pTraw: 75.922 | pTcorr:  25.588 | pTtrue_B:  14.765
Event: 310000 | Jet:  5 | pTraw: 62.305 | pTcorr:  13.124 | pTtr

Event: 344000 | Jet:  0 | pTraw: 138.507 | pTcorr:  82.860 | pTtrue_A:  80.556
Event: 344000 | Jet:  0 | pTraw: 138.507 | pTcorr:  82.860 | pTtrue_B:  79.961
Event: 344000 | Jet:  1 | pTraw: 92.441 | pTcorr:  36.794 | pTtrue_A:  27.281
Event: 345000 | Jet:  0 | pTraw: 117.101 | pTcorr:  57.261 | pTtrue_A:  66.890
Event: 345000 | Jet:  1 | pTraw: 101.191 | pTcorr:  65.730 | pTtrue_A:  80.580
Event: 345000 | Jet:  1 | pTraw: 101.191 | pTcorr:  65.730 | pTtrue_B:  80.260
Event: 345000 | Jet:  6 | pTraw: 61.000 | pTcorr:  10.025 | pTtrue_A:  14.685
Event: 345000 | Jet:  6 | pTraw: 61.000 | pTcorr:  10.025 | pTtrue_B:  14.537
Event: 347000 | Jet:  1 | pTraw: 84.068 | pTcorr:  30.621 | pTtrue_A:  30.538
Event: 348000 | Jet:  1 | pTraw: 60.442 | pTcorr:  12.938 | pTtrue_A:  23.302
Event: 349000 | Jet:  1 | pTraw: 88.268 | pTcorr:  44.720 | pTtrue_A:  35.555
Event: 349000 | Jet:  5 | pTraw: 68.057 | pTcorr:  11.562 | pTtrue_A:  30.897
Event: 349000 | Jet:  5 | pTraw: 68.057 | pTcorr:  11.562 |

Event: 385000 | Jet:  0 | pTraw: 82.254 | pTcorr:  12.861 | pTtrue_A:  39.881
Event: 385000 | Jet:  0 | pTraw: 82.254 | pTcorr:  12.861 | pTtrue_B:  29.644
Event: 385000 | Jet: 15 | pTraw: 51.794 | pTcorr: -3.993 | pTtrue_B:  11.329
Event: 386000 | Jet:  0 | pTraw: 126.208 | pTcorr:  62.700 | pTtrue_A:  64.069
Event: 386000 | Jet:  0 | pTraw: 126.208 | pTcorr:  62.700 | pTtrue_B:  63.446
Event: 386000 | Jet: 12 | pTraw: 55.073 | pTcorr:  0.457 | pTtrue_A:  10.436
Event: 387000 | Jet:  1 | pTraw: 80.964 | pTcorr:  16.281 | pTtrue_A:  16.789
Event: 388000 | Jet:  2 | pTraw: 87.028 | pTcorr:  57.642 | pTtrue_A:  63.148
Event: 388000 | Jet:  2 | pTraw: 87.028 | pTcorr:  57.642 | pTtrue_B:  44.187
Event: 388000 | Jet:  3 | pTraw: 85.200 | pTcorr:  40.204 | pTtrue_A:  31.241
Event: 388000 | Jet:  4 | pTraw: 62.502 | pTcorr:  21.179 | pTtrue_A:  20.910
Event: 388000 | Jet:  4 | pTraw: 62.502 | pTcorr:  21.179 | pTtrue_B:  18.127
Event: 389000 | Jet:  1 | pTraw: 125.572 | pTcorr:  69.429 | pTt

Event: 422000 | Jet:  2 | pTraw: 96.615 | pTcorr:  48.943 | pTtrue_A:  48.694
Event: 422000 | Jet:  2 | pTraw: 96.615 | pTcorr:  48.943 | pTtrue_B:  42.689
Event: 423000 | Jet:  2 | pTraw: 68.794 | pTcorr:  17.688 | pTtrue_A:  13.418
Event: 423000 | Jet:  4 | pTraw: 57.683 | pTcorr:  10.749 | pTtrue_A:  10.635
Event: 423000 | Jet:  4 | pTraw: 57.683 | pTcorr:  10.749 | pTtrue_B:  10.493
Event: 423000 | Jet:  6 | pTraw: 55.964 | pTcorr:  7.986 | pTtrue_A:  14.952
Event: 423000 | Jet:  6 | pTraw: 55.964 | pTcorr:  7.986 | pTtrue_B:  14.768
Event: 424000 | Jet:  0 | pTraw: 108.616 | pTcorr:  59.577 | pTtrue_A:  52.307
Event: 424000 | Jet:  0 | pTraw: 108.616 | pTcorr:  59.577 | pTtrue_B:  49.974
Event: 424000 | Jet:  1 | pTraw: 90.708 | pTcorr:  35.131 | pTtrue_A:  43.527
Event: 424000 | Jet:  2 | pTraw: 74.821 | pTcorr:  26.871 | pTtrue_A:  17.790
Event: 424000 | Jet:  2 | pTraw: 74.821 | pTcorr:  26.871 | pTtrue_B:  16.457
Event: 425000 | Jet:  1 | pTraw: 109.726 | pTcorr:  50.532 | pTt

Event: 462000 | Jet:  2 | pTraw: 102.122 | pTcorr:  46.469 | pTtrue_A:  57.098
Event: 462000 | Jet:  2 | pTraw: 102.122 | pTcorr:  46.469 | pTtrue_B:  10.901
Event: 463000 | Jet:  1 | pTraw: 61.059 | pTcorr:  7.734 | pTtrue_A:  10.059
Event: 463000 | Jet:  2 | pTraw: 59.868 | pTcorr:  14.300 | pTtrue_A:  13.413
Event: 463000 | Jet:  2 | pTraw: 59.868 | pTcorr:  14.300 | pTtrue_B:  12.196
Event: 464000 | Jet:  2 | pTraw: 73.532 | pTcorr:  14.455 | pTtrue_A:  19.895
Event: 464000 | Jet:  2 | pTraw: 73.532 | pTcorr:  14.455 | pTtrue_B:  19.723
Event: 464000 | Jet: 12 | pTraw: 49.182 | pTcorr:  1.688 | pTtrue_A:  15.103
Event: 465000 | Jet:  0 | pTraw: 66.291 | pTcorr:  22.217 | pTtrue_A:  16.880
Event: 465000 | Jet:  0 | pTraw: 66.291 | pTcorr:  22.217 | pTtrue_B:  16.717
Event: 466000 | Jet:  0 | pTraw: 135.048 | pTcorr:  88.582 | pTtrue_A:  83.524
Event: 466000 | Jet:  0 | pTraw: 135.048 | pTcorr:  88.582 | pTtrue_B:  82.826
Event: 467000 | Jet:  1 | pTraw: 85.590 | pTcorr:  23.584 | pT

Event: 499000 | Jet:  1 | pTraw: 123.491 | pTcorr:  66.463 | pTtrue_A:  66.241
All data transferred to array. Testing with 895510 A-jets and 471838 B-jets.

Training set A: 895510 / 895510 / 895510
Training set B: 471838 / 471838 / 471838
Input file closed.
Ready! 2022/10/07 17:50:30


# --- code below can be run in any order without errors ---

## Training & Testing - 3 Input Features
Uses: pt_corr, jet_area, jet_rho

In [7]:
# Set Features to train with
#X_values[
#    0  jet_pt_raw,      1  jet_pt_corr,     2  jet_mass,        3  jet_area, 
#    4  jet_area_err,    5  jet_const_n,     6  const_pt_mean,   7  const_pt_median, 
#    8  const_1_pt,      9  const_2_pt,      10 const_3_pt,      11 const_4_pt,
#    12 const_5_pt,      13 const_6_pt,      14 const_7_pt,      15 const_8_pt,
#    16 const_9_pt,      17 const_10_pt,     18 jet_y,           19 jet_phi,
#    20 jet_rho]

# Training with 3 features
features_labels_3feat = [
    "Feature Importances:", 
    "    jet_pt_corr:    ", "    jet_area:       ", "    jet_rho:        "]

# Train and test with pT_True = pT_PYTHIA

X_3feat_train_A = [ 
    [X_train_A[i][1],  X_train_A[i][3],  X_train_A[i][20]]
    for i in range(len(X_train_A)) ]

lr_pipeline_A, rf_pipeline_A, nn_pipeline_A, features_arr_A = Train_ML_pt_Estimators(
    X_3feat_train_A, y_train_A, features_labels_3feat)

for i in range(len(test_file_names)) :
    
    pt_true_min = float(test_tree_names[i][5:7])
    pt_true_max = float(test_tree_names[i][8:10])
    print(pt_true_min, pt_true_max)
    
    X_test_A, y_test_A, sc_corr_test_arr_A, X_test_B, y_test_B, sc_corr_test_arr_B = Build_ML_Feature_Arrays_ptTrue(
        test_file_paths[i], test_tree_names[i], pt_true_min, pt_true_max)
    
    X_3feat_test_A = [ 
        [X_test_A[i][1],  X_test_A[i][3],  X_test_A[i][20]]
        for i in range(len(X_test_A)) ]
    
    Test_ML_pt_Estimators(
        X_3feat_test_A, 
        y_test_A, 
        sc_corr_test_arr_A,
        lr_pipeline_A, rf_pipeline_A, nn_pipeline_A, features_arr_A,
        test_tree_names[i] + "_3feat_ptTruePythia", 
        output_file_path, 
        40, -40., 40.,
        True, # If true, outputs feature importance
        True, # If true, compares to paper plots using 40, -40., 40., as limits
        X_test_A)
    
    
    
# Train and test with pT_True = pT_raw * SUM(pT_constituents, PYTHIA)/SUM(pt_constituents, Jet)
use_ptTrue_paper = False
if use_ptTrue_paper :
    
    X_3feat_train_B = [ 
    [X_train_B[i][1],  X_train_B[i][3],  X_train_B[i][20]]
    for i in range(len(X_train_B)) ]
    
    lr_pipeline_B, rf_pipeline_B, nn_pipeline_B, features_arr_B = Train_ML_pt_Estimators(
    X_3feat_train_B, y_train_B, features_labels_3feat)
    
    for i in range(len(test_file_names)) :
        
        pt_true_min = float(test_tree_names[i][5:7])
        pt_true_max = float(test_tree_names[i][8:10])
        print(pt_true_min, pt_true_max)
        
        X_test_A, y_test_A, sc_corr_test_arr_A, X_test_B, y_test_B, sc_corr_test_arr_B = Build_ML_Feature_Arrays_ptTrue(
            test_file_paths[i], test_tree_name[i], pt_true_min, pt_true_max)
        
        X_3feat_test_B = [ 
            [X_test_B[i][1],  X_test_B[i][3],  X_test_B[i][20]]
            for i in range(len(X_test_B)) ]

        Test_ML_pt_Estimators(
            X_3feat_test_B, 
            y_test_B, 
            sc_corr_test_arr_B,
            lr_pipeline_B, rf_pipeline_B, nn_pipeline_B, features_arr_B,
            test_tree_names[i] + "_3feat_ptTruePaper", 
            output_file_path, 
            40, -40., 40.,
            True, # If true, outputs feature importance
            True, # If true, compares to paper plots using 40, -40., 40., as limits
            X_test_B)

now = datetime.now()
dt_string = now.strftime("%Y/%m/%d %H:%M:%S")
    
print("Complete!", dt_string)

895510 895510

----- Fitting Linear Regression Estimator -----


Linear Regression Fit:
 Pipeline(steps=[('linearregression', LinearRegression())])
Regression Coefficients:
 [ 0.83334943 -4.2224771   0.05810091]

----- Fitting Random Forest Regression Estimator -----


Random Tree Regression Fit:
 Pipeline(steps=[('randomforestregressor', RandomForestRegressor())])
Feature Importances:
    jet_pt_corr:     0.9241317304169581
    jet_area:        0.017936379836373934
    jet_rho:         0.057931889746667964

----- Fitting Neural Network Regression Estimator -----


Multilayer Perceptron Regression Fit:
 Pipeline(steps=[('mlpregressor', MLPRegressor(max_iter=1000))])
40.0 60.0
Input file accessed successfully. Output file generated.
Accessing input tree...
Input tree accessed successfully.
Preparing to collect data from tree...
Event:   0 | Jet:  0 | pTraw: 86.186 | pTcorr:  32.942 | pTtrue_A:  43.324
Event:   0 | Jet:  0 | pTraw: 86.186 | pTcorr:  32.942 | pTtrue_B:  42.961
Event: 1000

Event: 50000 | Jet:  0 | pTraw: 102.667 | pTcorr:  44.915 | pTtrue_A:  42.647
Event: 50000 | Jet:  0 | pTraw: 102.667 | pTcorr:  44.915 | pTtrue_B:  42.361
Event: 51000 | Jet:  0 | pTraw: 113.849 | pTcorr:  61.564 | pTtrue_A:  45.363
Event: 51000 | Jet:  0 | pTraw: 113.849 | pTcorr:  61.564 | pTtrue_B:  44.934
Event: 52000 | Jet:  1 | pTraw: 83.100 | pTcorr:  28.701 | pTtrue_A:  40.060
Event: 53000 | Jet:  0 | pTraw: 115.643 | pTcorr:  57.510 | pTtrue_A:  42.211
Event: 53000 | Jet:  0 | pTraw: 115.643 | pTcorr:  57.510 | pTtrue_B:  41.717
Event: 53000 | Jet:  1 | pTraw: 93.305 | pTcorr:  31.539 | pTtrue_A:  41.603
Event: 54000 | Jet:  0 | pTraw: 88.440 | pTcorr:  38.562 | pTtrue_A:  40.245
Event: 55000 | Jet:  0 | pTraw: 95.201 | pTcorr:  39.856 | pTtrue_A:  44.111
Event: 55000 | Jet:  0 | pTraw: 95.201 | pTcorr:  39.856 | pTtrue_B:  42.866
Event: 56000 | Jet:  0 | pTraw: 109.386 | pTcorr:  45.385 | pTtrue_A:  44.189
Event: 56000 | Jet:  0 | pTraw: 109.386 | pTcorr:  45.385 | pTtrue_B:

Test 104600: True: 40.595 , Pred(line): 44.105( 3.510) , Pred(tree): 44.331( 3.736) , Pred(perc): 43.842( 3.247)
Test 104700: True: 44.637 , Pred(line): 33.501(-11.135) , Pred(tree): 31.881(-12.756) , Pred(perc): 31.726(-12.910)
Test 104800: True: 57.180 , Pred(line): 50.601(-6.579) , Pred(tree): 48.749(-8.431) , Pred(perc): 52.709(-4.471)
Output file written to.
Output file closed.
Test    0: True: 43.324 , Pred(line): 34.299(-9.024) , Pred(tree): 26.244(-17.080) , Pred(perc): 31.593(-11.731)
Test  100: True: 47.524 , Pred(line): 48.243( 0.719) , Pred(tree): 47.216(-0.308) , Pred(perc): 49.614( 2.090)
Test  200: True: 47.250 , Pred(line): 52.029( 4.778) , Pred(tree): 56.600( 9.349) , Pred(perc): 55.105( 7.855)
Test  300: True: 41.902 , Pred(line): 60.635( 18.732) , Pred(tree): 57.466( 15.563) , Pred(perc): 65.101( 23.198)
Test  400: True: 41.461 , Pred(line): 34.870(-6.591) , Pred(tree): 31.090(-10.371) , Pred(perc): 32.631(-8.830)
Test  500: True: 49.186 , Pred(line): 54.498( 5.313) 

Test 40700: True: 44.224 , Pred(line): 42.006(-2.218) , Pred(tree): 38.256(-5.969) , Pred(perc): 41.151(-3.073)
Test 40800: True: 44.831 , Pred(line): 36.767(-8.064) , Pred(tree): 40.112(-4.720) , Pred(perc): 35.563(-9.269)
Test 40900: True: 52.108 , Pred(line): 33.853(-18.255) , Pred(tree): 34.728(-17.380) , Pred(perc): 32.270(-19.838)
Test 41000: True: 55.741 , Pred(line): 48.197(-7.544) , Pred(tree): 54.808(-0.933) , Pred(perc): 49.110(-6.631)
Test 41100: True: 40.884 , Pred(line): 50.771( 9.887) , Pred(tree): 48.486( 7.602) , Pred(perc): 53.838( 12.954)
Test 41200: True: 43.106 , Pred(line): 44.584( 1.478) , Pred(tree): 40.392(-2.714) , Pred(perc): 44.480( 1.374)
Test 41300: True: 40.875 , Pred(line): 34.622(-6.253) , Pred(tree): 34.826(-6.049) , Pred(perc): 33.699(-7.176)
Test 41400: True: 45.029 , Pred(line): 36.369(-8.660) , Pred(tree): 42.950(-2.079) , Pred(perc): 35.164(-9.865)
Test 41500: True: 43.266 , Pred(line): 40.771(-2.495) , Pred(tree): 37.223(-6.043) , Pred(perc): 39.

Test 99900: True: 44.706 , Pred(line): 45.065( 0.359) , Pred(tree): 45.359( 0.652) , Pred(perc): 45.073( 0.366)
Test 100000: True: 42.427 , Pred(line): 37.037(-5.390) , Pred(tree): 36.192(-6.235) , Pred(perc): 36.787(-5.640)
Test 100100: True: 40.128 , Pred(line): 31.725(-8.403) , Pred(tree): 29.538(-10.590) , Pred(perc): 29.917(-10.211)
Test 100200: True: 49.710 , Pred(line): 50.968( 1.257) , Pred(tree): 53.253( 3.542) , Pred(perc): 53.938( 4.228)
Test 100300: True: 46.182 , Pred(line): 55.295( 9.113) , Pred(tree): 56.156( 9.974) , Pred(perc): 58.008( 11.825)
Test 100400: True: 44.093 , Pred(line): 44.621( 0.528) , Pred(tree): 37.845(-6.248) , Pred(perc): 44.860( 0.767)
Test 100500: True: 48.211 , Pred(line): 39.881(-8.330) , Pred(tree): 36.601(-11.610) , Pred(perc): 39.485(-8.726)
Test 100600: True: 44.220 , Pred(line): 43.993(-0.226) , Pred(tree): 46.485( 2.266) , Pred(perc): 43.658(-0.562)
Test 100700: True: 44.286 , Pred(line): 35.232(-9.054) , Pred(tree): 24.881(-19.405) , Pred(p

## Training & Testing - 8 Input Features
Uses: jet_pt_raw, jet_pt_corr, const_pt_mean, const_pt_median, const_1_pt, const_2_pt, const_3_pt, const_4_pt

In [11]:
# Set Features to train with
#X_values[
#    0  jet_pt_raw,      1  jet_pt_corr,     2  jet_mass,        3  jet_area, 
#    4  jet_area_err,    5  jet_const_n,     6  const_pt_mean,   7  const_pt_median, 
#    8  const_1_pt,      9  const_2_pt,      10 const_3_pt,      11 const_4_pt,
#    12 const_5_pt,      13 const_6_pt,      14 const_7_pt,      15 const_8_pt,
#    16 const_9_pt,      17 const_10_pt,     18 jet_y,           19 jet_phi,
#    20 jet_rho]

# Training with 8 features
features_labels_8feat = [
    "Feature Importances:", 
    "    jet_pt_raw:     ", "    jet_pt_corr:    ", "    const_pt_mean:  ", "    const_pt_median:", 
    "    const_1_pt:     ", "    const_2_pt:     ", "    const_3_pt:     ", "    const_4_pt:     "]

# Train and test with pT_True = pT_PYTHIA

X_8feat_train_A = [ 
    [X_train_A[i][0],  X_train_A[i][1],  X_train_A[i][6],  X_train_A[i][7],
     X_train_A[i][8],  X_train_A[i][9],  X_train_A[i][10], X_train_A[i][11]]
    for i in range(len(X_train_A)) ]
        
lr_pipeline_A, rf_pipeline_A, nn_pipeline_A, features_arr_A = Train_ML_pt_Estimators(
    X_8feat_train_A, y_train_A, features_labels_8feat)

for i in range(len(test_file_names)) :
    
    pt_true_min = float(test_tree_names[i][5:7])
    pt_true_max = float(test_tree_names[i][8:10])
    print(pt_true_min, pt_true_max)
    
    X_test_A, y_test_A, sc_corr_test_arr_A, X_test_B, y_test_B, sc_corr_test_arr_B = Build_ML_Feature_Arrays_ptTrue(
        test_file_paths[i], test_tree_names[i], pt_true_min, pt_true_max)
    
    X_8feat_test_A = [ 
        [X_test_A[i][0],  X_test_A[i][1],  X_test_A[i][6],  X_test_A[i][7],
         X_test_A[i][8],  X_test_A[i][9],  X_test_A[i][10], X_test_A[i][11]]
        for i in range(len(X_test_A)) ]
    
    Test_ML_pt_Estimators(
        X_8feat_test_A, 
        y_test_A, 
        sc_corr_test_arr_A,
        lr_pipeline_A, rf_pipeline_A, nn_pipeline_A, features_arr_A,
        test_tree_names[i] + "_8feat_ptTruePythia", 
        output_file_path, 
        40, -40., 40.,
        True, # If true, outputs feature importance
        True, # If true, compares to paper plots using 40, -40., 40., as limits
        X_test_A)

    
    
# Train and test with pT_True = pT_raw * SUM(pT_constituents, PYTHIA)/SUM(pt_constituents, Jet)

use_ptTrue_paper = False
if use_ptTrue_paper : 

    X_8feat_train_B = [ 
        [X_train_B[i][0],  X_train_B[i][1],  X_train_B[i][6],  X_train_B[i][7],
        X_train_B[i][8],  X_train_B[i][9],  X_train_B[i][10], X_train_B[i][11]]
        for i in range(len(X_train_B)) ]
    
    lr_pipeline_B, rf_pipeline_B, nn_pipeline_B, features_arr_B = Train_ML_pt_Estimators(
        X_8feat_train_B, y_train_B, features_labels_8feat)
    
    for testing_tree_name in testing_tree_names :
        
        pt_true_min = float(testing_tree_name[5:7])
        pt_true_max = float(testing_tree_name[8:10])
        print(pt_true_min, pt_true_max)
        
        X_8feat_test_B = [ 
            [X_test_B[i][0],  X_test_B[i][1],  X_test_B[i][6],  X_test_B[i][7],
             X_test_B[i][8],  X_test_B[i][9],  X_test_B[i][10], X_test_B[i][11]]
            for i in range(len(X_test_B)) ]

        Test_ML_pt_Estimators(
            X_8feat_test_B, 
            y_test_B, 
            sc_corr_test_arr_B,
            lr_pipeline_B, rf_pipeline_B, nn_pipeline_B, features_arr_B,
            test_tree_names[i] + "_8feat_ptTruePaper", 
            output_file_path, 
            40, -40., 40.,
            True, # If true, outputs feature importance
            True, # If true, compares to paper plots using 40, -40., 40., as limits
            X_test_B)
        
        

        
now = datetime.now()
dt_string = now.strftime("%Y/%m/%d %H:%M:%S")
    
print("Complete!", dt_string)


----- Fitting Linear Regression Estimator -----


Linear Regression Fit:
 Pipeline(steps=[('linearregression', LinearRegression())])
Regression Coefficients:
 [  0.15161215   0.06322086  20.91012072 -19.1660505   -0.18805413
  -0.22549045  -0.22756319  -0.22692017]

----- Fitting Random Forest Regression Estimator -----


Random Tree Regression Fit:
 Pipeline(steps=[('randomforestregressor', RandomForestRegressor())])
Feature Importances:
    jet_pt_raw:      0.14124678521781353
    jet_pt_corr:     0.2405749704920108
    const_pt_mean:   0.21265129912976025
    const_pt_median: 0.13933756723770674
    const_1_pt:      0.06635569569391943
    const_2_pt:      0.06670595958207276
    const_3_pt:      0.06631942638807656
    const_4_pt:      0.06680829625864004

----- Fitting Neural Network Regression Estimator -----


Multilayer Perceptron Regression Fit:
 Pipeline(steps=[('mlpregressor', MLPRegressor(max_iter=1000))])

----- Fitting Linear Regression Estimator -----


Linear Regressio

Event: 42000 | Jet:  0 | pTraw: 115.660 | pTcorr:  62.628 | pTtrue_A:  48.329
Event: 42000 | Jet:  0 | pTraw: 115.660 | pTcorr:  62.628 | pTtrue_B:  48.069
Event: 43000 | Jet:  0 | pTraw: 82.579 | pTcorr:  37.599 | pTtrue_A:  41.771
Event: 44000 | Jet:  0 | pTraw: 102.490 | pTcorr:  52.479 | pTtrue_A:  45.285
Event: 44000 | Jet:  0 | pTraw: 102.490 | pTcorr:  52.479 | pTtrue_B:  45.059
Event: 45000 | Jet:  0 | pTraw: 113.819 | pTcorr:  51.082 | pTtrue_A:  47.749
Event: 45000 | Jet:  0 | pTraw: 113.819 | pTcorr:  51.082 | pTtrue_B:  47.305
Event: 46000 | Jet:  0 | pTraw: 108.341 | pTcorr:  57.965 | pTtrue_A:  57.599
Event: 46000 | Jet:  0 | pTraw: 108.341 | pTcorr:  57.965 | pTtrue_B:  57.072
Event: 46000 | Jet:  1 | pTraw: 107.450 | pTcorr:  56.106 | pTtrue_A:  55.858
Event: 47000 | Jet:  0 | pTraw: 100.689 | pTcorr:  45.584 | pTtrue_A:  42.862
Event: 47000 | Jet:  0 | pTraw: 100.689 | pTcorr:  45.584 | pTtrue_B:  42.454
Event: 48000 | Jet:  0 | pTraw: 109.107 | pTcorr:  54.669 | pTtru

Event: 98000 | Jet:  0 | pTraw: 117.412 | pTcorr:  60.572 | pTtrue_A:  58.232
Event: 98000 | Jet:  0 | pTraw: 117.412 | pTcorr:  60.572 | pTtrue_B:  52.602
Event: 98000 | Jet:  1 | pTraw: 106.389 | pTcorr:  42.729 | pTtrue_A:  43.927
Event: 99000 | Jet:  1 | pTraw: 99.761 | pTcorr:  36.338 | pTtrue_A:  46.837
All data transferred to array. Testing with 104877 A-jets and 82889 B-jets.

Training set A: 104877 / 104877 / 104877
Training set B: 82889 / 82889 / 82889
Input file closed.

----- Starting ML testing for 40_60_Test -----




Starting testing with 8 input features.


----- Testing Linear Regression Estimator -----

Mean Square Error:
 16.576880494372237
Variance Score:
 0.39608774041959494

----- Testing Random Forest Regression Estimator -----

Mean Square Error:
 15.726029312101998
Variance Score:
 0.42708509605752243

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 15.24471202610635
Variance Score:
 0.4446199639634243
Starting event 0...
Test    0:

Starting event 16505...
Starting event 16506...
Starting event 16507...
Starting event 16508...
Starting event 16509...
Starting event 16510...
Starting event 16511...
Starting event 16512...
Starting event 16513...
Starting event 16514...
Starting event 16515...
Starting event 16516...
Starting event 16517...
Starting event 16518...
Starting event 16519...
Starting event 16520...
Starting event 16521...
Starting event 16522...
Starting event 16523...
Starting event 16524...
Starting event 16525...
Starting event 16526...
Starting event 16527...
Starting event 16528...
Starting event 16529...
Starting event 16530...
Starting event 16531...
Starting event 16532...
Starting event 16533...
Starting event 16534...
Starting event 16535...
Starting event 16536...
Starting event 16537...
Starting event 16538...
Starting event 16539...
Starting event 16540...
Starting event 16541...
Starting event 16542...
Starting event 16543...
Starting event 16544...
Starting event 16545...
Starting event 1

Starting event 64825...
Starting event 64826...
Starting event 64827...
Starting event 64828...
Starting event 64829...
Starting event 64830...
Starting event 64831...
Starting event 64832...
Starting event 64833...
Starting event 64834...
Starting event 64835...
Starting event 64836...
Starting event 64837...
Starting event 64838...
Starting event 64839...
Starting event 64840...
Starting event 64841...
Starting event 64842...
Starting event 64843...
Starting event 64844...
Starting event 64845...
Starting event 64846...
Starting event 64847...
Starting event 64848...
Starting event 64849...
Starting event 64850...
Starting event 64851...
Starting event 64852...
Starting event 64853...
Starting event 64854...
Starting event 64855...
Starting event 64856...
Starting event 64857...
Starting event 64858...
Starting event 64859...
Starting event 64860...
Starting event 64861...
Starting event 64862...
Starting event 64863...
Starting event 64864...
Starting event 64865...
Starting event 6

Mean Square Error:
 13.70272060990778
Variance Score:
 0.46314757072622914

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 16.07297522969151
Variance Score:
 0.3702844826685038
Starting event 0...
Test    0: True: 42.961 , Pred(line): 44.313( 1.352) , Pred(tree): 42.348(-0.612) , Pred(perc): 45.763( 2.802)
Starting event 1...
Starting event 2...
Starting event 3...
Starting event 4...
Starting event 5...
Starting event 6...
Starting event 7...
Starting event 8...
Starting event 9...
Starting event 10...
Starting event 11...
Starting event 12...
Starting event 13...
Starting event 14...
Starting event 15...
Starting event 16...
Starting event 17...
Starting event 18...
Starting event 19...
Starting event 20...
Starting event 21...
Starting event 22...
Starting event 23...
Starting event 24...
Starting event 25...
Starting event 26...
Starting event 27...
Starting event 28...
Starting event 29...
Starting event 30...
Starting event 31...
Starting event 32...

Starting event 17892...
Starting event 17893...
Starting event 17894...
Starting event 17895...
Starting event 17896...
Starting event 17897...
Starting event 17898...
Starting event 17899...
Starting event 17900...
Test 17900: True: 45.735 , Pred(line): 43.893(-1.842) , Pred(tree): 43.761(-1.974) , Pred(perc): 45.296(-0.439)
Starting event 17901...
Starting event 17902...
Starting event 17903...
Starting event 17904...
Starting event 17905...
Starting event 17906...
Starting event 17907...
Starting event 17908...
Starting event 17909...
Starting event 17910...
Starting event 17911...
Starting event 17912...
Starting event 17913...
Starting event 17914...
Starting event 17915...
Starting event 17916...
Starting event 17917...
Starting event 17918...
Starting event 17919...
Starting event 17920...
Starting event 17921...
Starting event 17922...
Starting event 17923...
Starting event 17924...
Starting event 17925...
Starting event 17926...
Starting event 17927...
Starting event 17928...


Starting event 44918...
Starting event 44919...
Starting event 44920...
Starting event 44921...
Starting event 44922...
Starting event 44923...
Starting event 44924...
Starting event 44925...
Starting event 44926...
Starting event 44927...
Starting event 44928...
Starting event 44929...
Starting event 44930...
Starting event 44931...
Starting event 44932...
Starting event 44933...
Starting event 44934...
Starting event 44935...
Starting event 44936...
Starting event 44937...
Starting event 44938...
Starting event 44939...
Starting event 44940...
Starting event 44941...
Starting event 44942...
Starting event 44943...
Starting event 44944...
Starting event 44945...
Starting event 44946...
Starting event 44947...
Starting event 44948...
Starting event 44949...
Starting event 44950...
Starting event 44951...
Starting event 44952...
Starting event 44953...
Starting event 44954...
Starting event 44955...
Starting event 44956...
Starting event 44957...
Starting event 44958...
Starting event 4

## Training with 12 Features
#### WARNING: NOT YET TESTED!

In [10]:
# Set Features to train with
#X_values[
#    0  jet_pt_raw,      1  jet_pt_corr,     2  jet_mass,        3  jet_area, 
#    4  jet_area_err,    5  jet_const_n,     6  const_pt_mean,   7  const_pt_median, 
#    8  const_1_pt,      9  const_2_pt,      10 const_3_pt,      11 const_4_pt,
#    12 const_5_pt,      13 const_6_pt,      14 const_7_pt,      15 const_8_pt,
#    16 const_9_pt,      17 const_10_pt,     18 jet_y,           19 jet_phi,
#    20 jet_rho]

# Training with 12 features
features_labels_12feat = [
    "Feature Importances:", 
    "    jet_pt_raw:     ", "    jet_pt_corr:    ", "    jet_mass:       ", "    jet_area:       ", 
    "    jet_const_n:    ", "    const_pt_mean:  ", "    const_1_pt:     ", "    const_2_pt:     ",
    "    const_3_pt:     ", "    const_4_pt:     ", "    jet_y:          ", "    jet_rho:        "]

# Train and test with pT_True = pT_PYTHIA

X_12feat_train_A = [ 
    [X_train_A[i][0],  X_train_A[i][1],  X_train_A[i][2],  X_train_A[i][3],
     X_train_A[i][5],  X_train_A[i][6],  X_train_A[i][8],  X_train_A[i][9],
     X_train_A[i][10], X_train_A[i][11], X_train_A[i][18], X_train_A[i][20]]
    for i in range(len(X_train_A)) ]

lr_pipeline_A, rf_pipeline_A, nn_pipeline_A, features_arr_A = Train_ML_pt_Estimators(
    X_12feat_train_A, y_train_A, features_labels_12feat)

for i in range(len(test_file_names)) :
    
    pt_true_min = float(test_tree_names[i][5:7])
    pt_true_max = float(test_tree_names[i][8:10])
    print(pt_true_min, pt_true_max)
    
    X_test_A, y_test_A, sc_corr_test_arr_A, X_test_B, y_test_B, sc_corr_test_arr_B = Build_ML_Feature_Arrays_ptTrue(
        test_file_paths[i], test_tree_names[i], pt_true_min, pt_true_max)
    
    X_12feat_test_A = [ 
        [X_test_A[i][0],  X_test_A[i][1],  X_test_A[i][2],  X_test_A[i][3],
         X_test_A[i][5],  X_test_A[i][6],  X_test_A[i][8],  X_test_A[i][9],
         X_test_A[i][10], X_test_A[i][11], X_test_A[i][18], X_test_A[i][20]]
        for i in range(len(X_test_A)) ]
    
    Test_ML_pt_Estimators(
        X_12feat_test_A, 
        y_test_A, 
        sc_corr_test_arr_A,
        lr_pipeline_A, rf_pipeline_A, nn_pipeline_A, features_arr_A,
        test_tree_names[i] + "_12feat_ptTruePythia", 
        output_file_path, 
        40, -40., 40.,
        True, # If true, outputs feature importance
        True, # If true, compares to paper plots using 40, -40., 40., as limits
        X_test_A)

now = datetime.now()
dt_string = now.strftime("%Y/%m/%d %H:%M:%S")
    
print("Complete!", dt_string)


----- Fitting Linear Regression Estimator -----


Linear Regression Fit:
 Pipeline(steps=[('linearregression', LinearRegression())])
Regression Coefficients:
 [ 0.54526227  0.33159283  0.27309236 -0.55033334 -0.43472298  0.75897621
 -0.98699078 -0.95467312 -0.93194075 -0.952019    0.00439249  0.02078717]

----- Fitting Random Forest Regression Estimator -----


Random Tree Regression Fit:
 Pipeline(steps=[('randomforestregressor', RandomForestRegressor())])
Feature Importances:
    jet_pt_raw:      0.01909797138187302
    jet_pt_corr:     0.8576344023880679
    jet_mass:        0.0115637058734918
    jet_area:        0.004272150292271757
    jet_const_n:     0.0027860897367777402
    const_pt_mean:   0.050159053656443894
    const_1_pt:      0.0071717951814764475
    const_2_pt:      0.0072116107112265865
    const_3_pt:      0.0073006814940647
    const_4_pt:      0.007307201006323738
    jet_y:           0.018920006809082495
    jet_rho:         0.006575331468899958

----- Fitting N

Event: 47000 | Jet:  0 | pTraw: 100.689 | pTcorr:  40.497 | pTtrue_A:  42.862
Event: 47000 | Jet:  0 | pTraw: 100.689 | pTcorr:  40.497 | pTtrue_B:  42.454
Event: 48000 | Jet:  0 | pTraw: 109.107 | pTcorr:  59.008 | pTtrue_A:  42.728
Event: 48000 | Jet:  0 | pTraw: 109.107 | pTcorr:  59.008 | pTtrue_B:  42.127
Event: 49000 | Jet:  0 | pTraw: 75.542 | pTcorr:  28.228 | pTtrue_A:  46.305
Event: 49000 | Jet:  0 | pTraw: 75.542 | pTcorr:  28.228 | pTtrue_B:  45.966
Event: 50000 | Jet:  0 | pTraw: 102.667 | pTcorr:  44.915 | pTtrue_A:  42.647
Event: 50000 | Jet:  0 | pTraw: 102.667 | pTcorr:  44.915 | pTtrue_B:  42.361
Event: 51000 | Jet:  0 | pTraw: 113.849 | pTcorr:  61.564 | pTtrue_A:  45.363
Event: 51000 | Jet:  0 | pTraw: 113.849 | pTcorr:  61.564 | pTtrue_B:  44.934
Event: 52000 | Jet:  1 | pTraw: 83.100 | pTcorr:  28.701 | pTtrue_A:  40.060
Event: 53000 | Jet:  0 | pTraw: 115.643 | pTcorr:  57.510 | pTtrue_A:  42.211
Event: 53000 | Jet:  0 | pTraw: 115.643 | pTcorr:  57.510 | pTtrue_

Variance Score:
 -0.9866939373531707

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 52.122865981416204
Variance Score:
 -0.8988878988015976
Test    0: True: 43.324 , Pred(line): 38.215(-5.108) , Pred(tree): 39.047(-4.277) , Pred(perc): 40.374(-2.950)
Test  100: True: 47.524 , Pred(line): 48.687( 1.163) , Pred(tree): 50.276( 2.752) , Pred(perc): 50.420( 2.897)
Test  200: True: 47.250 , Pred(line): 53.077( 5.827) , Pred(tree): 52.590( 5.339) , Pred(perc): 49.065( 1.814)
Test  300: True: 41.902 , Pred(line): 53.448( 11.546) , Pred(tree): 53.796( 11.894) , Pred(perc): 51.322( 9.420)
Test  400: True: 41.461 , Pred(line): 33.063(-8.398) , Pred(tree): 37.614(-3.848) , Pred(perc): 36.327(-5.134)
Test  500: True: 49.186 , Pred(line): 48.362(-0.823) , Pred(tree): 50.876( 1.691) , Pred(perc): 51.091( 1.905)
Test  600: True: 44.661 , Pred(line): 51.095( 6.435) , Pred(tree): 53.434( 8.773) , Pred(perc): 54.355( 9.694)
Test  700: True: 40.501 , Pred(line): 39.519(-0.98

Test 95000: True: 45.758 , Pred(line): 38.632(-7.125) , Pred(tree): 39.871(-5.887) , Pred(perc): 41.864(-3.894)
Test 95100: True: 42.864 , Pred(line): 34.550(-8.314) , Pred(tree): 34.826(-8.038) , Pred(perc): 37.851(-5.013)
Test 95200: True: 50.875 , Pred(line): 46.921(-3.954) , Pred(tree): 49.890(-0.985) , Pred(perc): 49.581(-1.294)
Test 95300: True: 44.191 , Pred(line): 43.226(-0.965) , Pred(tree): 46.697( 2.506) , Pred(perc): 42.486(-1.704)
Test 95400: True: 45.469 , Pred(line): 47.441( 1.972) , Pred(tree): 49.010( 3.541) , Pred(perc): 50.479( 5.010)
Test 95500: True: 53.318 , Pred(line): 51.078(-2.241) , Pred(tree): 56.360( 3.041) , Pred(perc): 56.179( 2.860)
Test 95600: True: 53.286 , Pred(line): 54.379( 1.092) , Pred(tree): 58.138( 4.852) , Pred(perc): 56.843( 3.557)
Test 95700: True: 43.180 , Pred(line): 38.882(-4.297) , Pred(tree): 44.339( 1.160) , Pred(perc): 43.581( 0.401)
Test 95800: True: 56.352 , Pred(line): 53.130(-3.222) , Pred(tree): 60.106( 3.754) , Pred(perc): 60.498(

Test 29300: True: 49.757 , Pred(line): 41.466(-8.291) , Pred(tree): 42.688(-7.069) , Pred(perc): 42.459(-7.297)
Test 29400: True: 47.739 , Pred(line): 47.897( 0.158) , Pred(tree): 50.311( 2.571) , Pred(perc): 51.611( 3.871)
Test 29500: True: 52.928 , Pred(line): 54.592( 1.664) , Pred(tree): 56.758( 3.830) , Pred(perc): 57.335( 4.408)
Test 29600: True: 42.782 , Pred(line): 45.140( 2.358) , Pred(tree): 46.185( 3.404) , Pred(perc): 45.144( 2.362)
Test 29700: True: 47.862 , Pred(line): 39.868(-7.994) , Pred(tree): 44.373(-3.490) , Pred(perc): 42.011(-5.851)
Test 29800: True: 47.089 , Pred(line): 55.241( 8.152) , Pred(tree): 58.853( 11.764) , Pred(perc): 57.801( 10.713)
Test 29900: True: 54.756 , Pred(line): 54.644(-0.113) , Pred(tree): 60.639( 5.883) , Pred(perc): 59.408( 4.651)
Test 30000: True: 41.318 , Pred(line): 39.150(-2.168) , Pred(tree): 40.901(-0.417) , Pred(perc): 37.275(-4.043)
Test 30100: True: 44.902 , Pred(line): 43.309(-1.594) , Pred(tree): 47.585( 2.682) , Pred(perc): 45.81

Test 63900: True: 50.202 , Pred(line): 64.239( 14.038) , Pred(tree): 65.059( 14.857) , Pred(perc): 67.131( 16.930)
Test 64000: True: 47.507 , Pred(line): 44.456(-3.051) , Pred(tree): 45.257(-2.249) , Pred(perc): 48.097( 0.590)
Test 64100: True: 58.997 , Pred(line): 57.307(-1.690) , Pred(tree): 59.928( 0.930) , Pred(perc): 60.153( 1.156)
Test 64200: True: 46.748 , Pred(line): 43.571(-3.177) , Pred(tree): 51.271( 4.523) , Pred(perc): 47.911( 1.163)
Test 64300: True: 40.843 , Pred(line): 40.664(-0.179) , Pred(tree): 39.572(-1.271) , Pred(perc): 38.455(-2.388)
Test 64400: True: 44.389 , Pred(line): 45.408( 1.020) , Pred(tree): 50.642( 6.254) , Pred(perc): 49.629( 5.241)
Test 64500: True: 50.251 , Pred(line): 61.583( 11.333) , Pred(tree): 64.312( 14.062) , Pred(perc): 65.556( 15.306)
Test 64600: True: 55.668 , Pred(line): 49.095(-6.573) , Pred(tree): 57.812( 2.144) , Pred(perc): 54.045(-1.623)
Test 64700: True: 40.996 , Pred(line): 35.323(-5.674) , Pred(tree): 37.267(-3.729) , Pred(perc): 3

Test 101800: True: 51.230 , Pred(line): 48.567(-2.663) , Pred(tree): 51.413( 0.183) , Pred(perc): 49.650(-1.580)
Test 101900: True: 40.092 , Pred(line): 42.484( 2.391) , Pred(tree): 40.175( 0.083) , Pred(perc): 36.971(-3.122)
Test 102000: True: 43.003 , Pred(line): 43.565( 0.562) , Pred(tree): 45.483( 2.480) , Pred(perc): 42.146(-0.857)
Test 102100: True: 41.267 , Pred(line): 43.877( 2.610) , Pred(tree): 47.816( 6.549) , Pred(perc): 46.233( 4.966)
Test 102200: True: 40.259 , Pred(line): 46.158( 5.898) , Pred(tree): 48.611( 8.352) , Pred(perc): 48.694( 8.435)
Test 102300: True: 40.412 , Pred(line): 34.609(-5.803) , Pred(tree): 37.358(-3.054) , Pred(perc): 36.343(-4.069)
Test 102400: True: 41.181 , Pred(line): 42.393( 1.212) , Pred(tree): 45.358( 4.177) , Pred(perc): 44.669( 3.488)
Test 102500: True: 43.415 , Pred(line): 41.591(-1.825) , Pred(tree): 39.148(-4.267) , Pred(perc): 40.482(-2.934)
Test 102600: True: 46.325 , Pred(line): 50.927( 4.602) , Pred(tree): 47.901( 1.576) , Pred(perc)

## Testing with Individual Features
#### WARNING: THIS CODE IS OUT OF DATE AND NOT EXPECTED TO WORK

In [5]:
# Testing with individual Features

# NOTE: This block tests with individual features (IF) by replacing the ML Estimators 
# after each feature is checked.

X_ptRaw_train_A    = [ [X_train_A[i][0]] for i in range(len(X_train_A)) ]
X_ptRaw_train_B    = [ [X_train_B[i][0]] for i in range(len(X_train_B)) ]
X_ptCorr_train_A   = [ [X_train_A[i][1]] for i in range(len(X_train_A)) ]
X_ptCorr_train_B   = [ [X_train_B[i][1]] for i in range(len(X_train_B)) ]
#X_ptMean_train   = [ [X_values_A[i][6]] for i in range(len(X_values_A)) ]
#X_ptMedian_train = [ [X_values_A[i][7]] for i in range(len(X_values_A)) ]
#X_ptConst1_train = [ [X_values_A[i][8]] for i in range(len(X_values_A)) ]
#X_ptConst2_train = [ [X_values_A[i][9]] for i in range(len(X_values_A)) ]
#X_ptConst3_train = [ [X_values_A[i][10]] for i in range(len(X_values_A)) ]
#X_ptConst4_train = [ [X_values_A[i][11]] for i in range(len(X_values_A)) ]

lr_pipeline_ptRaw, rf_pipeline_ptRaw, nn_pipeline_ptRaw, features_arr_ptRaw = Train_ML_pt_Estimators(
    X_ptRaw_train_A, y_train_A)
lr_pipeline_ptCorr, rf_pipeline_ptCorr, nn_pipeline_ptCorr, features_arr_ptCorr = Train_ML_pt_Estimators(
    X_ptCorr_train_A, y_train_A)

for testing_tree_name in testing_tree_names :
    output_base_name = testing_tree_name[5:]
    pt_true_min = float(testing_tree_name[5:7])
    pt_true_max = float(testing_tree_name[8:10])
    print(pt_true_min, pt_true_max)
    
    X_test_A, y_test_A, sc_corr_test_arr_A, X_test_B, y_test_B, sc_corr_test_arr_B = Build_ML_Feature_Arrays(
        input_file_path, testing_tree_name, pt_true_min, pt_true_max)
    
    X_ptRaw_A  = [ [X_test_A[i][0]] for i in range(len(X_test_A)) ]
    X_ptCorr_A = [ [X_test_A[i][1]] for i in range(len(X_test_A)) ]
    #X_ptMean   = [ [X_values_A[i][6]] for i in range(len(X_values_A)) ]
    #X_ptMedian = [ [X_values_A[i][7]] for i in range(len(X_values_A)) ]
    #X_ptConst1 = [ [X_values_A[i][8]] for i in range(len(X_values_A)) ]
    #X_ptConst2 = [ [X_values_A[i][9]] for i in range(len(X_values_A)) ]
    #X_ptConst3 = [ [X_values_A[i][10]] for i in range(len(X_values_A)) ]
    #X_ptConst4 = [ [X_values_A[i][11]] for i in range(len(X_values_A)) ]

    Test_ML_pt_Estimators(
        X_ptRaw_A, 
        y_test_A, 
        sc_corr_test_arr_A,
        lr_pipeline_ptRaw, rf_pipeline_ptRaw, nn_pipeline_ptRaw, features_arr_ptRaw,
        output_base_name + "_only_ptRaw_ptTrueA_compare", 
        input_file_path, 
        40, -40., 40.,
        False, # If true, outputs feature importance
        True) # If true, compares to paper plots using 40, -40., 40., as limits

    Test_ML_pt_Estimators(
        X_ptCorr_A, 
        y_test_A, 
        sc_corr_test_arr_A,
        lr_pipeline_ptCorr, rf_pipeline_ptCorr, nn_pipeline_ptCorr, features_arr_ptCorr,
        output_base_name + "_only_ptCorr_ptTrueA_compare", 
        input_file_path,
        40, -40., 40.,
        False, # If true, outputs feature importance
        True) # If true, compares to paper plots using 40, -40., 40., as limits
    
print("Complete!")


----- Fitting Linear Regression Estimator -----


Linear Regression Fit:
 Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression())])
Regression Coefficients:
 [2.37426878]

----- Fitting Random Forest Regression Estimator -----


Random Tree Regression Fit:
 Pipeline(steps=[('standardscaler', StandardScaler()),
                ('randomforestregressor', RandomForestRegressor())])

----- Fitting Neural Network Regression Estimator -----


Multilayer Perceptron Regression Fit:
 Pipeline(steps=[('standardscaler', StandardScaler()),
                ('mlpregressor', MLPRegressor(max_iter=1000))])

----- Fitting Linear Regression Estimator -----


Linear Regression Fit:
 Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression())])
Regression Coefficients:
 [2.69166762]

----- Fitting Random Forest Regression Estimator -----


Random Tree Regression Fit:
 Pipeline(steps=[('standardscaler'

Mean Square Error:
 177.22514464090773
Variance Score:
 -86.35759756548218

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 167.63720289942077
Variance Score:
 -81.63152126388738
Test    0: True: 32.320 , Pred(line): 40.685( 8.366) , Pred(tree): 42.612( 10.292) , Pred(perc): 44.061( 11.741)
Test  100: True: 32.742 , Pred(line): 47.016( 14.274) , Pred(tree): 44.013( 11.271) , Pred(perc): 46.526( 13.784)
Test  200: True: 30.123 , Pred(line): 50.145( 20.022) , Pred(tree): 52.423( 22.300) , Pred(perc): 50.659( 20.536)
Test  300: True: 34.147 , Pred(line): 44.720( 10.574) , Pred(tree): 41.592( 7.445) , Pred(perc): 44.633( 10.486)
Test  400: True: 32.650 , Pred(line): 44.960( 12.310) , Pred(tree): 42.262( 9.612) , Pred(perc): 44.736( 12.085)
Test  500: True: 32.279 , Pred(line): 48.416( 16.137) , Pred(tree): 50.740( 18.460) , Pred(perc): 48.402( 16.123)
Test  600: True: 31.149 , Pred(line): 43.858( 12.709) , Pred(tree): 46.597( 15.448) , Pred(perc): 44.094( 12.94

Event: 2000 | Jet:  1 | pTraw: 101.643 | pTcorr:  31.621 | pTtrue_A:  35.155
Event: 4000 | Jet:  0 | pTraw: 86.517 | pTcorr:  37.874 | pTtrue_A:  39.727
Event: 4000 | Jet:  0 | pTraw: 86.517 | pTcorr:  37.874 | pTtrue_B:  39.727
Event: 6000 | Jet:  0 | pTraw: 86.605 | pTcorr:  33.852 | pTtrue_A:  39.433
Event: 6000 | Jet:  0 | pTraw: 86.605 | pTcorr:  33.852 | pTtrue_B:  39.433
Event: 8000 | Jet:  0 | pTraw: 103.382 | pTcorr:  37.318 | pTtrue_A:  39.213
Event: 8000 | Jet:  0 | pTraw: 103.382 | pTcorr:  37.318 | pTtrue_B:  39.213
Event: 10000 | Jet:  0 | pTraw: 94.222 | pTcorr:  44.807 | pTtrue_A:  39.047
Event: 10000 | Jet:  0 | pTraw: 94.222 | pTcorr:  44.807 | pTtrue_B:  39.047
Event: 12000 | Jet:  0 | pTraw: 80.976 | pTcorr:  27.169 | pTtrue_A:  38.587
Event: 12000 | Jet:  0 | pTraw: 80.976 | pTcorr:  27.169 | pTtrue_B:  38.587
Event: 14000 | Jet:  0 | pTraw: 81.294 | pTcorr:  25.363 | pTtrue_A:  38.042
Event: 14000 | Jet:  0 | pTraw: 81.294 | pTcorr:  25.363 | pTtrue_B:  38.042
Eve

Mean Square Error:
 87.8389505094384
Variance Score:
 -42.38809335491045

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 78.91306944658798
Variance Score:
 -37.97914995811784
Test    0: True: 37.012 , Pred(line): 45.154( 8.142) , Pred(tree): 45.154( 8.142) , Pred(perc): 44.802( 7.790)
Test  100: True: 37.957 , Pred(line): 41.961( 4.003) , Pred(tree): 41.089( 3.132) , Pred(perc): 43.651( 5.694)
Test  200: True: 37.255 , Pred(line): 44.205( 6.950) , Pred(tree): 48.126( 10.871) , Pred(perc): 44.275( 7.020)
Test  300: True: 37.144 , Pred(line): 47.109( 9.965) , Pred(tree): 50.582( 13.438) , Pred(perc): 46.650( 9.506)
Test  400: True: 37.315 , Pred(line): 49.317( 12.002) , Pred(tree): 53.372( 16.058) , Pred(perc): 49.609( 12.294)
Test  500: True: 35.896 , Pred(line): 43.575( 7.678) , Pred(tree): 43.079( 7.182) , Pred(perc): 43.956( 8.060)
Test  600: True: 39.903 , Pred(line): 47.478( 7.575) , Pred(tree): 54.311( 14.408) , Pred(perc): 47.145( 7.242)
Test  700: T

Event: 2000 | Jet:  0 | pTraw: 105.659 | pTcorr:  58.277 | pTtrue_A:  42.339
Event: 2000 | Jet:  0 | pTraw: 105.659 | pTcorr:  58.277 | pTtrue_B:  42.339
Event: 4000 | Jet:  0 | pTraw: 93.174 | pTcorr:  32.203 | pTtrue_A:  44.957
Event: 6000 | Jet:  0 | pTraw: 88.398 | pTcorr:  28.402 | pTtrue_A:  43.846
Event: 8000 | Jet:  0 | pTraw: 88.698 | pTcorr:  42.697 | pTtrue_A:  40.226
Event: 10000 | Jet:  1 | pTraw: 101.773 | pTcorr:  45.188 | pTtrue_A:  43.082
Event: 10000 | Jet:  1 | pTraw: 101.773 | pTcorr:  45.188 | pTtrue_B:  43.082
Event: 12000 | Jet:  1 | pTraw: 89.067 | pTcorr:  31.081 | pTtrue_A:  42.932
Event: 12000 | Jet:  1 | pTraw: 89.067 | pTcorr:  31.081 | pTtrue_B:  42.932
Event: 14000 | Jet:  1 | pTraw: 102.415 | pTcorr:  49.139 | pTtrue_A:  43.692
Event: 14000 | Jet:  1 | pTraw: 102.415 | pTcorr:  49.139 | pTtrue_B:  43.692
Event: 16000 | Jet:  0 | pTraw: 97.547 | pTcorr:  50.491 | pTtrue_A:  41.607
Event: 16000 | Jet:  0 | pTraw: 97.547 | pTcorr:  50.491 | pTtrue_B:  41.60

Mean Square Error:
 37.17735507670468
Variance Score:
 -17.428206570785832

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 27.705537765602283
Variance Score:
 -12.733181718974567
Test    0: True: 40.226 , Pred(line): 46.808( 6.582) , Pred(tree): 43.728( 3.502) , Pred(perc): 46.247( 6.021)
Test  100: True: 43.808 , Pred(line): 48.403( 4.595) , Pred(tree): 51.775( 7.967) , Pred(perc): 48.384( 4.576)
Test  200: True: 40.529 , Pred(line): 47.043( 6.514) , Pred(tree): 43.556( 3.027) , Pred(perc): 46.562( 6.033)
Test  300: True: 40.394 , Pred(line): 47.988( 7.595) , Pred(tree): 48.207( 7.813) , Pred(perc): 47.829( 7.435)
Test  400: True: 40.691 , Pred(line): 51.044( 10.353) , Pred(tree): 47.392( 6.701) , Pred(perc): 51.760( 11.070)
Test  500: True: 41.328 , Pred(line): 47.586( 6.258) , Pred(tree): 42.785( 1.457) , Pred(perc): 47.289( 5.961)
Test  600: True: 40.748 , Pred(line): 49.018( 8.270) , Pred(tree): 50.630( 9.882) , Pred(perc): 49.208( 8.460)
Test  700: T

Event: 2000 | Jet:  0 | pTraw: 97.284 | pTcorr:  49.155 | pTtrue_A:  49.029
Event: 2000 | Jet:  0 | pTraw: 97.284 | pTcorr:  49.155 | pTtrue_B:  49.029
Event: 4000 | Jet:  0 | pTraw: 115.718 | pTcorr:  62.020 | pTtrue_A:  46.960
Event: 4000 | Jet:  0 | pTraw: 115.718 | pTcorr:  62.020 | pTtrue_B:  46.960
Event: 6000 | Jet:  0 | pTraw: 98.683 | pTcorr:  49.835 | pTtrue_A:  49.760
Event: 6000 | Jet:  0 | pTraw: 98.683 | pTcorr:  49.835 | pTtrue_B:  49.760
Event: 8000 | Jet:  0 | pTraw: 100.575 | pTcorr:  41.222 | pTtrue_A:  49.615
Event: 8000 | Jet:  0 | pTraw: 100.575 | pTcorr:  41.222 | pTtrue_B:  49.615
Event: 10000 | Jet:  0 | pTraw: 103.002 | pTcorr:  46.122 | pTtrue_A:  46.441
Event: 10000 | Jet:  0 | pTraw: 103.002 | pTcorr:  46.122 | pTtrue_B:  46.441
Event: 12000 | Jet:  0 | pTraw: 90.786 | pTcorr:  25.096 | pTtrue_A:  45.350
Event: 14000 | Jet:  0 | pTraw: 97.512 | pTcorr:  42.162 | pTtrue_A:  49.497
Event: 14000 | Jet:  0 | pTraw: 97.512 | pTcorr:  42.162 | pTtrue_B:  49.497
E

Mean Square Error:
 18.718011566988586
Variance Score:
 -8.335351681757944

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 8.548297429511296
Variance Score:
 -3.2633461625536375
Test    0: True: 49.200 , Pred(line): 50.921( 1.722) , Pred(tree): 51.298( 2.098) , Pred(perc): 51.614( 2.414)
Test  100: True: 47.157 , Pred(line): 47.292( 0.135) , Pred(tree): 51.908( 4.751) , Pred(perc): 46.895(-0.262)
Test  200: True: 46.537 , Pred(line): 48.407( 1.870) , Pred(tree): 53.292( 6.755) , Pred(perc): 48.389( 1.852)
Test  300: True: 46.237 , Pred(line): 53.314( 7.076) , Pred(tree): 55.801( 9.564) , Pred(perc): 53.886( 7.648)
Test  400: True: 46.922 , Pred(line): 47.618( 0.695) , Pred(tree): 47.371( 0.448) , Pred(perc): 47.332( 0.410)
Test  500: True: 48.415 , Pred(line): 45.951(-2.464) , Pred(tree): 53.261( 4.846) , Pred(perc): 45.284(-3.131)
Test  600: True: 49.055 , Pred(line): 47.355(-1.701) , Pred(tree): 48.111(-0.945) , Pred(perc): 46.980(-2.076)
Test  700: True

Event: 2000 | Jet:  0 | pTraw: 113.944 | pTcorr:  54.206 | pTtrue_A:  54.981
Event: 2000 | Jet:  0 | pTraw: 113.944 | pTcorr:  54.206 | pTtrue_B:  54.981
Event: 4000 | Jet:  0 | pTraw: 107.116 | pTcorr:  62.462 | pTtrue_A:  54.033
Event: 4000 | Jet:  0 | pTraw: 107.116 | pTcorr:  62.462 | pTtrue_B:  54.033
Event: 6000 | Jet:  0 | pTraw: 107.223 | pTcorr:  46.898 | pTtrue_A:  51.611
Event: 8000 | Jet:  0 | pTraw: 99.058 | pTcorr:  53.588 | pTtrue_A:  54.169
Event: 8000 | Jet:  0 | pTraw: 99.058 | pTcorr:  53.588 | pTtrue_B:  54.169
Event: 10000 | Jet:  0 | pTraw: 101.284 | pTcorr:  40.610 | pTtrue_A:  53.155
Event: 12000 | Jet:  0 | pTraw: 137.404 | pTcorr:  69.883 | pTtrue_A:  50.998
Event: 14000 | Jet:  0 | pTraw: 113.595 | pTcorr:  55.326 | pTtrue_A:  52.214
Event: 16000 | Jet:  0 | pTraw: 125.903 | pTcorr:  70.282 | pTtrue_A:  53.737
Event: 16000 | Jet:  0 | pTraw: 125.903 | pTcorr:  70.282 | pTtrue_B:  53.737
Event: 18000 | Jet:  0 | pTraw: 107.609 | pTcorr:  55.517 | pTtrue_A:  51

Mean Square Error:
 28.48741247657458
Variance Score:
 -12.93443394971074

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 16.256574279170298
Variance Score:
 -6.951798385618181
Test    0: True: 50.895 , Pred(line): 49.272(-1.623) , Pred(tree): 45.571(-5.324) , Pred(perc): 49.548(-1.347)
Test  100: True: 51.828 , Pred(line): 46.886(-4.942) , Pred(tree): 46.808(-5.020) , Pred(perc): 46.352(-5.476)
Test  200: True: 50.423 , Pred(line): 52.410( 1.987) , Pred(tree): 54.051( 3.627) , Pred(perc): 53.258( 2.834)
Test  300: True: 54.220 , Pred(line): 53.500(-0.720) , Pred(tree): 48.873(-5.347) , Pred(perc): 54.049(-0.170)
Test  400: True: 51.269 , Pred(line): 46.633(-4.636) , Pred(tree): 45.089(-6.181) , Pred(perc): 46.013(-5.257)
Test  500: True: 52.600 , Pred(line): 45.382(-7.217) , Pred(tree): 44.968(-7.631) , Pred(perc): 44.892(-7.707)
Test  600: True: 50.548 , Pred(line): 50.715( 0.168) , Pred(tree): 50.622( 0.074) , Pred(perc): 51.348( 0.800)
Test  700: True:

Event: 2000 | Jet:  0 | pTraw: 121.300 | pTcorr:  68.213 | pTtrue_A:  57.547
Event: 2000 | Jet:  0 | pTraw: 121.300 | pTcorr:  68.213 | pTtrue_B:  57.547
Event: 4000 | Jet:  0 | pTraw: 126.431 | pTcorr:  76.160 | pTtrue_A:  59.005
Event: 4000 | Jet:  0 | pTraw: 126.431 | pTcorr:  76.160 | pTtrue_B:  59.005
Event: 6000 | Jet:  1 | pTraw: 108.152 | pTcorr:  58.530 | pTtrue_A:  57.039
Event: 6000 | Jet:  1 | pTraw: 108.152 | pTcorr:  58.530 | pTtrue_B:  57.039
Event: 8000 | Jet:  0 | pTraw: 109.391 | pTcorr:  53.822 | pTtrue_A:  59.543
Event: 8000 | Jet:  0 | pTraw: 109.391 | pTcorr:  53.822 | pTtrue_B:  59.543
Event: 10000 | Jet:  0 | pTraw: 111.047 | pTcorr:  56.219 | pTtrue_A:  56.037
Event: 10000 | Jet:  0 | pTraw: 111.047 | pTcorr:  56.219 | pTtrue_B:  56.037
Event: 12000 | Jet:  0 | pTraw: 123.660 | pTcorr:  71.747 | pTtrue_A:  59.413
Event: 12000 | Jet:  0 | pTraw: 123.660 | pTcorr:  71.747 | pTtrue_B:  59.413
Event: 14000 | Jet:  0 | pTraw: 117.942 | pTcorr:  69.831 | pTtrue_A:  5

Mean Square Error:
 64.20522578853884
Variance Score:
 -30.3487974132997

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 50.36157698283881
Variance Score:
 -23.58950739382238
Test    0: True: 55.110 , Pred(line): 52.734(-2.376) , Pred(tree): 55.713( 0.602) , Pred(perc): 53.470(-1.641)
Test  100: True: 59.729 , Pred(line): 51.206(-8.523) , Pred(tree): 52.828(-6.901) , Pred(perc): 51.954(-7.775)
Test  200: True: 57.250 , Pred(line): 48.894(-8.356) , Pred(tree): 47.017(-10.233) , Pred(perc): 49.041(-8.209)
Test  300: True: 55.813 , Pred(line): 50.840(-4.973) , Pred(tree): 52.213(-3.600) , Pred(perc): 51.517(-4.296)
Test  400: True: 55.470 , Pred(line): 58.918( 3.447) , Pred(tree): 54.739(-0.732) , Pred(perc): 53.846(-1.625)
Test  500: True: 56.540 , Pred(line): 52.000(-4.540) , Pred(tree): 52.737(-3.803) , Pred(perc): 52.894(-3.646)
Test  600: True: 57.358 , Pred(line): 52.945(-4.413) , Pred(tree): 57.300(-0.057) , Pred(perc): 53.621(-3.737)
Test  700: True: 

Event: 2000 | Jet:  1 | pTraw: 102.663 | pTcorr:  60.134 | pTtrue_A:  62.066
Event: 4000 | Jet:  0 | pTraw: 129.174 | pTcorr:  71.428 | pTtrue_A:  63.460
Event: 4000 | Jet:  0 | pTraw: 129.174 | pTcorr:  71.428 | pTtrue_B:  63.460
Event: 6000 | Jet:  0 | pTraw: 121.815 | pTcorr:  61.672 | pTtrue_A:  62.315
Event: 6000 | Jet:  0 | pTraw: 121.815 | pTcorr:  61.672 | pTtrue_B:  62.315
Event: 8000 | Jet:  0 | pTraw: 98.746 | pTcorr:  48.790 | pTtrue_A:  60.013
Event: 10000 | Jet:  0 | pTraw: 130.960 | pTcorr:  62.106 | pTtrue_A:  63.438
Event: 10000 | Jet:  0 | pTraw: 130.960 | pTcorr:  62.106 | pTtrue_B:  63.438
Event: 12000 | Jet:  1 | pTraw: 121.818 | pTcorr:  62.898 | pTtrue_A:  60.788
Event: 12000 | Jet:  1 | pTraw: 121.818 | pTcorr:  62.898 | pTtrue_B:  60.788
Event: 12000 | Jet:  2 | pTraw: 115.544 | pTcorr:  55.446 | pTtrue_A:  64.684
Event: 12000 | Jet:  2 | pTraw: 115.544 | pTcorr:  55.446 | pTtrue_B:  64.684
Event: 14000 | Jet:  0 | pTraw: 114.292 | pTcorr:  57.397 | pTtrue_A:  

Mean Square Error:
 128.29327601592104
Variance Score:
 -61.396969771130344

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 113.34947148816615
Variance Score:
 -54.12887164205702
Test    0: True: 60.661 , Pred(line): 49.811(-10.849) , Pred(tree): 47.745(-12.916) , Pred(perc): 50.271(-10.390)
Test  100: True: 63.092 , Pred(line): 51.270(-11.823) , Pred(tree): 44.751(-18.341) , Pred(perc): 52.035(-11.058)
Test  200: True: 61.208 , Pred(line): 48.960(-12.249) , Pred(tree): 46.095(-15.114) , Pred(perc): 49.130(-12.078)
Test  300: True: 63.727 , Pred(line): 52.168(-11.559) , Pred(tree): 51.303(-12.424) , Pred(perc): 53.036(-10.691)
Test  400: True: 61.946 , Pred(line): 50.004(-11.942) , Pred(tree): 47.094(-14.853) , Pred(perc): 50.497(-11.449)
Test  500: True: 60.655 , Pred(line): 53.245(-7.410) , Pred(tree): 52.582(-8.073) , Pred(perc): 53.837(-6.818)
Test  600: True: 63.622 , Pred(line): 53.399(-10.222) , Pred(tree): 54.497(-9.125) , Pred(perc): 53.950(-9.672

Event: 2000 | Jet:  0 | pTraw: 120.923 | pTcorr:  65.994 | pTtrue_A:  69.714
Event: 2000 | Jet:  0 | pTraw: 120.923 | pTcorr:  65.994 | pTtrue_B:  69.714
Event: 2000 | Jet:  1 | pTraw: 110.764 | pTcorr:  68.424 | pTtrue_A:  69.259
Event: 2000 | Jet:  1 | pTraw: 110.764 | pTcorr:  68.424 | pTtrue_B:  69.259
Event: 4000 | Jet:  0 | pTraw: 127.072 | pTcorr:  68.939 | pTtrue_A:  69.261
Event: 4000 | Jet:  0 | pTraw: 127.072 | pTcorr:  68.939 | pTtrue_B:  69.261
Event: 6000 | Jet:  0 | pTraw: 125.427 | pTcorr:  76.861 | pTtrue_A:  65.476
Event: 6000 | Jet:  0 | pTraw: 125.427 | pTcorr:  76.861 | pTtrue_B:  65.476
Event: 8000 | Jet:  0 | pTraw: 135.081 | pTcorr:  69.292 | pTtrue_A:  68.772
Event: 8000 | Jet:  0 | pTraw: 135.081 | pTcorr:  69.292 | pTtrue_B:  68.772
Event: 10000 | Jet:  0 | pTraw: 123.890 | pTcorr:  72.527 | pTtrue_A:  65.354
Event: 12000 | Jet:  0 | pTraw: 138.805 | pTcorr:  83.919 | pTtrue_B:  70.477
Event: 14000 | Jet:  0 | pTraw: 133.346 | pTcorr:  72.589 | pTtrue_A:  68.

Mean Square Error:
 226.85679775088138
Variance Score:
 -107.96682897328641

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 210.69503444412814
Variance Score:
 -100.20379909887359
Test    0: True: 66.167 , Pred(line): 51.297(-14.870) , Pred(tree): 46.747(-19.420) , Pred(perc): 52.073(-14.094)
Test  100: True: 65.520 , Pred(line): 51.884(-13.635) , Pred(tree): 52.036(-13.484) , Pred(perc): 52.795(-12.725)
Test  200: True: 67.646 , Pred(line): 56.244(-11.402) , Pred(tree): 51.152(-16.494) , Pred(perc): 55.018(-12.628)
Test  300: True: 65.463 , Pred(line): 49.661(-15.802) , Pred(tree): 52.650(-12.813) , Pred(perc): 50.069(-15.394)
Test  400: True: 69.902 , Pred(line): 48.962(-20.940) , Pred(tree): 52.671(-17.231) , Pred(perc): 49.133(-20.770)
Test  500: True: 68.612 , Pred(line): 53.459(-15.153) , Pred(tree): 51.499(-17.113) , Pred(perc): 54.007(-14.604)
Test  600: True: 66.922 , Pred(line): 55.243(-11.679) , Pred(tree): 55.125(-11.797) , Pred(perc): 55.457(-

Event: 2000 | Jet:  0 | pTraw: 104.020 | pTcorr:  53.517 | pTtrue_A:  48.571
Event: 2000 | Jet:  0 | pTraw: 104.020 | pTcorr:  53.517 | pTtrue_B:  48.571
Event: 4000 | Jet:  0 | pTraw: 118.363 | pTcorr:  63.432 | pTtrue_A:  50.021
Event: 4000 | Jet:  0 | pTraw: 118.363 | pTcorr:  63.432 | pTtrue_B:  50.021
Event: 6000 | Jet:  0 | pTraw: 124.345 | pTcorr:  64.672 | pTtrue_A:  58.833
Event: 6000 | Jet:  0 | pTraw: 124.345 | pTcorr:  64.672 | pTtrue_B:  58.833
Event: 8000 | Jet:  0 | pTraw: 116.274 | pTcorr:  57.630 | pTtrue_A:  48.636
Event: 8000 | Jet:  0 | pTraw: 116.274 | pTcorr:  57.630 | pTtrue_B:  48.636
Event: 10000 | Jet:  0 | pTraw: 98.387 | pTcorr:  38.531 | pTtrue_A:  40.572
Event: 10000 | Jet:  0 | pTraw: 98.387 | pTcorr:  38.531 | pTtrue_B:  40.572
Event: 12000 | Jet:  0 | pTraw: 117.776 | pTcorr:  68.281 | pTtrue_A:  46.697
Event: 12000 | Jet:  0 | pTraw: 117.776 | pTcorr:  68.281 | pTtrue_B:  46.697
Event: 14000 | Jet:  0 | pTraw: 99.828 | pTcorr:  51.268 | pTtrue_A:  47.7

Mean Square Error:
 33.31806699791512
Variance Score:
 -0.14175336568763774

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 23.086545516352874
Variance Score:
 0.20886343595364742
Test    0: True: 52.150 , Pred(line): 48.442(-3.708) , Pred(tree): 46.849(-5.301) , Pred(perc): 48.436(-3.714)
Test  100: True: 59.083 , Pred(line): 52.544(-6.539) , Pred(tree): 57.525(-1.558) , Pred(perc): 53.345(-5.737)
Test  200: True: 42.071 , Pred(line): 45.129( 3.059) , Pred(tree): 46.279( 4.209) , Pred(perc): 44.794( 2.723)
Test  300: True: 54.143 , Pred(line): 48.576(-5.566) , Pred(tree): 52.226(-1.917) , Pred(perc): 48.616(-5.526)
Test  400: True: 42.779 , Pred(line): 46.771( 3.992) , Pred(tree): 54.019( 11.240) , Pred(perc): 46.197( 3.418)
Test  500: True: 47.775 , Pred(line): 49.118( 1.343) , Pred(tree): 47.627(-0.148) , Pred(perc): 49.342( 1.567)
Test  600: True: 44.066 , Pred(line): 42.755(-1.311) , Pred(tree): 43.066(-1.000) , Pred(perc): 43.736(-0.330)
Test  700: T

Event: 2000 | Jet:  0 | pTraw: 107.570 | pTcorr:  54.529 | pTtrue_A:  47.536
Event: 2000 | Jet:  0 | pTraw: 107.570 | pTcorr:  54.529 | pTtrue_B:  47.536
Event: 4000 | Jet:  0 | pTraw: 96.127 | pTcorr:  29.619 | pTtrue_A:  41.793
Event: 4000 | Jet:  0 | pTraw: 96.127 | pTcorr:  29.619 | pTtrue_B:  41.793
Event: 6000 | Jet:  0 | pTraw: 108.625 | pTcorr:  47.721 | pTtrue_A:  48.237
Event: 6000 | Jet:  0 | pTraw: 108.625 | pTcorr:  47.721 | pTtrue_B:  48.237
Event: 8000 | Jet:  0 | pTraw: 105.604 | pTcorr:  47.711 | pTtrue_A:  42.306
Event: 8000 | Jet:  0 | pTraw: 105.604 | pTcorr:  47.711 | pTtrue_B:  42.306
Event: 10000 | Jet:  0 | pTraw: 87.294 | pTcorr:  42.176 | pTtrue_A:  44.002
Event: 10000 | Jet:  0 | pTraw: 87.294 | pTcorr:  42.176 | pTtrue_B:  44.002
Event: 12000 | Jet:  0 | pTraw: 94.177 | pTcorr:  37.132 | pTtrue_A:  45.612
Event: 12000 | Jet:  0 | pTraw: 94.177 | pTcorr:  37.132 | pTtrue_B:  45.612
Event: 14000 | Jet:  1 | pTraw: 78.376 | pTcorr:  42.534 | pTtrue_A:  42.723
E

Mean Square Error:
 30.02747402702082
Variance Score:
 -2.7445748620287014

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 20.306765310625007
Variance Score:
 -1.5323542980290838
Test    0: True: 49.200 , Pred(line): 49.186(-0.014) , Pred(tree): 51.375( 2.175) , Pred(perc): 49.433( 0.233)
Test  100: True: 41.763 , Pred(line): 43.691( 1.928) , Pred(tree): 43.762( 1.999) , Pred(perc): 44.006( 2.243)
Test  200: True: 43.281 , Pred(line): 46.682( 3.401) , Pred(tree): 46.929( 3.648) , Pred(perc): 46.078( 2.797)
Test  300: True: 48.520 , Pred(line): 44.706(-3.814) , Pred(tree): 48.376(-0.144) , Pred(perc): 44.622(-3.898)
Test  400: True: 45.645 , Pred(line): 46.496( 0.851) , Pred(tree): 51.479( 5.833) , Pred(perc): 45.854( 0.208)
Test  500: True: 41.996 , Pred(line): 51.483( 9.487) , Pred(tree): 49.824( 7.828) , Pred(perc): 52.341( 10.345)
Test  600: True: 43.849 , Pred(line): 49.226( 5.377) , Pred(tree): 50.863( 7.014) , Pred(perc): 49.487( 5.637)
Test  700: Tr

Event: 2000 | Jet:  0 | pTraw: 100.497 | pTcorr:  61.289 | pTtrue_A:  40.231
Event: 4000 | Jet:  0 | pTraw: 104.580 | pTcorr:  46.124 | pTtrue_A:  40.024
Event: 6000 | Jet:  0 | pTraw: 89.120 | pTcorr:  25.139 | pTtrue_A:  40.142
Event: 8000 | Jet:  0 | pTraw: 95.141 | pTcorr:  38.827 | pTtrue_A:  40.091
Event: 10000 | Jet:  0 | pTraw: 86.360 | pTcorr:  31.495 | pTtrue_A:  40.565
Event: 10000 | Jet:  0 | pTraw: 86.360 | pTcorr:  31.495 | pTtrue_B:  40.565
Event: 14000 | Jet:  0 | pTraw: 90.744 | pTcorr:  34.878 | pTtrue_A:  40.086
Event: 16000 | Jet:  0 | pTraw: 74.252 | pTcorr:  20.712 | pTtrue_A:  40.027
Event: 18000 | Jet:  0 | pTraw: 107.455 | pTcorr:  51.290 | pTtrue_A:  40.466
Event: 18000 | Jet:  0 | pTraw: 107.455 | pTcorr:  51.290 | pTtrue_B:  40.466
All data transferred to array. Testing with 19266 A-jets and 7708 B-jets.

Training set A: 19266 / 19266 / 19266
Training set B: 7708 / 7708 / 7708
Input file closed.

----- Testing Linear Regression Estimator -----

Mean Square E

Mean Square Error:
 50.662274000019536
Variance Score:
 -608.0028394319847

----- Testing Neural Network Regression Estimator -----

Mean Square Error:
 41.105014852237204
Variance Score:
 -493.1166036071873
Test    0: True: 40.226 , Pred(line): 46.203( 5.977) , Pred(tree): 47.414( 7.188) , Pred(perc): 45.548( 5.322)
Test  100: True: 40.564 , Pred(line): 47.121( 6.556) , Pred(tree): 45.717( 5.153) , Pred(perc): 46.666( 6.102)
Test  200: True: 40.848 , Pred(line): 48.898( 8.050) , Pred(tree): 50.085( 9.237) , Pred(perc): 49.047( 8.199)
Test  300: True: 40.141 , Pred(line): 49.104( 8.963) , Pred(tree): 44.184( 4.044) , Pred(perc): 49.322( 9.182)
Test  400: True: 40.097 , Pred(line): 44.056( 3.959) , Pred(tree): 45.657( 5.560) , Pred(perc): 44.197( 4.101)
Test  500: True: 40.341 , Pred(line): 47.730( 7.389) , Pred(tree): 45.770( 5.430) , Pred(perc): 47.482( 7.141)
Test  600: True: 40.044 , Pred(line): 44.478( 4.434) , Pred(tree): 42.072( 2.028) , Pred(perc): 44.457( 4.413)
Test  700: True