## Core Functions
These handle importing necessary libraries, preparation of the feature arrays for Machine Learning, and execution of Machine Learning training and testing.

In [1]:
from ML_Python.ML_Python_Build_FeatureArrays_FromROOT import Build_FeatureArrays_FromROOT
from ML_Python.ML_Python_TrainTest import (
    Build_FeatureArrays_FromCSV,
    Write_MLResults_ToCSV,
    Write_MLWeights_ToCSV,
    Train_All_Estimators,
    Train_LinearRegression,
    Train_RandomForestRegression,
    Train_MLPRegression,
    Test_Estimator,
    Test_All_Estimators)
from datetime import datetime



def Build_SelectFeatureArray(
    X_features,
    feature_index
    ) :
    """
    Builds training and testing data sets
    """
    
    print("Selecting data from master array...")
    
    X_features_select = []
    for i in range(len(X_features)):
        X_temp = []
        for j in range(len(feature_index)):
            X_temp.append(X_features[i][feature_index[j]])
        X_features_select.append(X_temp)
        
    print("Data ready. Feature array length:", len(X_features_select), "\n")
    
    return X_features_select

    

def TestAndSave_LinearRegression(
    feature_label,    # Array of labels corresponding to each feature
    feature_index,    # Array of indices for each feature used in X_train
    lr_pipeline,      # Trained Linear Regression Pipeline
    lr_coeffs,        # Array of coefficient values from trained linear regression pipeline
    X_test_select,    # Array of testing data features
    y_test,           # Array of testing data targets
    sc_test,          # Array of testing data simple corrections
    pt_test_min,      # Float of min pT to test with
    pt_test_max,      # Float of max pT to test with
    output_filename,  # Directory path + name for output csv file
    use_scaler = True # If true, rescales data
    ) :
    
    X_test_temp  = []
    y_test_temp  = []
    sc_test_temp = []
    
    for i in range(len(y_test)):
        if y_test[i] > pt_test_min and y_test[i] < pt_test_max:
            X_test_temp.append(X_test_select[i])
            y_test_temp.append(y_test[i])
            sc_test_temp.append(sc_test[i])
        else: continue
    
    # Tests estimator
    
    print(type(lr_pipeline))
    
    lr_results, lr_results_delta = Test_Estimator(
        lr_pipeline,
        X_test_temp, 
        y_test_temp
        )
    
    # Writes outputs to a csv file
    Write_MLResults_ToCSV(
        output_filename,
        y_test_temp,
        sc_test_temp,
        lr_results,
        X_test_temp,
        feature_label
        )
    
    return



def TrainTestPlot_All_Estimators(
    feature_label,    # Array of labels corresponding to each feature
    feature_index,    # Array of indices for each feature used in X_train
    X_train,          # Array of training data features
    y_train,          # Array of training data targets
    sc_train,         # Array of training data simple correction values
    X_test,           # Array of testing data features
    y_test,           # Array of testing data targets
    sc_test,          # Array of testing data simple corrections
    output_file_path, # File path for outputs
    use_scaler = True,
    use_lr = True,
    use_rf = True,
    use_mlp = True,
    ) :
    
    # Builds training data set
    print("Selecting training data...")
    X_train_select = []
    for i in range(len(X_train)):
        X_temp = []
        for j in range(len(feature_index)):
            X_temp.append(X_train[i][feature_index[j]])
        X_train_select.append(X_temp)
    print("Training data ready. X/Y length:", len(X_train_select), len(y_train), "/n")
    
    # Builds pipelines from selected training features
    print("Building estimator pipelines...")
    lr_pipeline, rf_pipeline, mlp_pipeline, lr_coeffs, rf_features = Train_All_Estimators(
        X_train_select, y_train, feature_label, 
        use_StandardScaler = use_scaler,
        use_LinearRegression = use_lr,
        use_RandomForest = use_rf,
        use_MLP = use_mlp)
    print("Pipelines built./n")
    
    print("Selecting testing data...")
    X_test_select = []
    for i in range(len(X_test)):
        X_temp = []
        for j in range(len(feature_index)):
            X_temp.append(X_test[i][feature_index[j]])
        X_test_select.append(X_temp)
    print("Testing data ready. X/Y length:", len(X_test_select), len(y_test), "/n")
    
    # Test estimators
    print("Testing all estimators...")
    lr_results, lr_results_delta, rf_results, rf_results_delta, mlp_results, mlp_results_delta = Test_All_Estimators(
        X_test_select, 
        y_test, 
        lr_pipeline,
        rf_pipeline,
        mlp_pipeline)
    print("Estimator testing complete!/n")
    
    return



now = datetime.now()
dt_string = now.strftime("%Y/%m/%d %H:%M:%S")
print("\nReady!", dt_string)

Welcome to JupyROOT 6.26/04

Ready! 2022/12/11 23:38:58


## Data Preparation
Sets up input directories/files for training and testing. Creates initial training data arrays.

In [4]:
file_directory   = "../Files/Comparison_Trial/Data/"

train_file_name  = "ML_Prep_10_90_B8_Train.root"
train_tree_name  = "Tree_10_90_B8_Train"
train_file_path  = file_directory + train_file_name
train_csv_path   = file_directory + train_file_name[0:-5] + "_Backup.csv"
train_range      = (10., 90.)

test_file_name   = "ML_Prep_10_90_B8_Test.root"
test_tree_name   = "Tree_10_90_B8_Test"
test_file_path   = file_directory + test_file_name
test_csv_path    = file_directory + test_file_name[0:-5] + "_Backup.csv"
test_range       = (10., 90.)

output_csv_name  = file_directory + "ML_Results/Train_B8"



# Builds feature and target arrays from ROOT file
# X_train, y_train, sc_train = Build_FeatureArrays_FromROOT(
#     train_file_path, train_tree_name, train_csv_path, train_range[0], train_range[1])
# X_test, y_test, sc_test = Build_FeatureArrays_FromROOT(
#     test_file_path,  test_tree_name,  test_csv_path,  test_range[0],  test_range[1])

# Rebuilds feature and target arrays from csv file (MUCH faster if csv has been made already)
X_train, y_train, sc_train = Build_FeatureArrays_FromCSV(train_csv_path)
X_test,  y_test,  sc_test  = Build_FeatureArrays_FromCSV(test_csv_path)



# Set Features to train with
# X_values[
#    0  jet_pt_raw,      1  jet_pt_corr,     2  jet_mass,        3  jet_area, 
#    4  jet_area_err,    5  jet_const_n,     6  const_pt_mean,   7  const_pt_median, 
#    8  const_1_pt,      9  const_2_pt,      10 const_3_pt,      11 const_4_pt,
#    12 const_5_pt,      13 const_6_pt,      14 const_7_pt,      15 const_8_pt,
#    16 const_9_pt,      17 const_10_pt,     18 jet_y,           19 jet_phi,
#    20 jet_rho]

# Training with 1 feature
feature_label_1feat = [
    "jet_pt_raw"]
feature_index_1feat = [0]

# Training with 3 features
feature_label_3feat = [
    "jet_pt_raw", "jet_area", "jet_rho"]
feature_index_3feat = [0, 3, 20]

# Training with 12 features
feature_label_12feat = [
    "jet_pt_raw",    "jet_pt_corr",    "jet_mass",      "jet_area", 
    "jet_const_n",   "const_pt_mean",  "const_1_pt",    "const_2_pt",
    "const_3_pt",    "const_4_pt",     "jet_y",         "jet_rho"]
feature_index_12feat = [0, 1, 2, 3, 5, 6, 8, 9, 10, 11, 18, 20]



now = datetime.now()
dt_string = now.strftime("%Y/%m/%d %H:%M:%S")
print("\nReady!", dt_string)

Preparing to collect data from csv backup file...
Jet: 10000 | pTraw: 69.750 | pTcorr:  19.722 | pTtrue:  21.433
Jet: 20000 | pTraw: 87.747 | pTcorr:  24.601 | pTtrue:  27.341
Jet: 30000 | pTraw: 47.990 | pTcorr:  3.316 | pTtrue:  13.372
Jet: 40000 | pTraw: 73.657 | pTcorr:  17.213 | pTtrue:  10.965
Jet: 50000 | pTraw: 155.385 | pTcorr:  84.131 | pTtrue:  84.254
Jet: 60000 | pTraw: 67.146 | pTcorr:  25.140 | pTtrue:  19.837
Jet: 70000 | pTraw: 36.326 | pTcorr:  14.513 | pTtrue:  15.810
Jet: 80000 | pTraw: 118.382 | pTcorr:  62.321 | pTtrue:  62.664
Jet: 90000 | pTraw: 118.381 | pTcorr:  77.046 | pTtrue:  84.791
Jet: 100000 | pTraw: 142.244 | pTcorr:  85.009 | pTtrue:  86.439
Jet: 110000 | pTraw: 105.900 | pTcorr:  35.717 | pTtrue:  54.590
Jet: 120000 | pTraw: 69.499 | pTcorr:  26.358 | pTtrue:  21.755
Jet: 130000 | pTraw: 87.513 | pTcorr:  40.364 | pTtrue:  27.490
Jet: 140000 | pTraw: 76.336 | pTcorr:  15.606 | pTtrue:  10.107
Jet: 150000 | pTraw: 12.131 | pTcorr:  6.389 | pTtrue:  10.

Jet: 380000 | pTraw: 54.713 | pTcorr:  1.425 | pTtrue:  18.211
Jet: 390000 | pTraw: 95.811 | pTcorr:  36.267 | pTtrue:  37.133
Jet: 400000 | pTraw: 97.870 | pTcorr:  53.228 | pTtrue:  63.752
Jet: 410000 | pTraw: 69.686 | pTcorr:  24.229 | pTtrue:  23.439
Jet: 420000 | pTraw: 57.549 | pTcorr:  10.268 | pTtrue:  14.003
Jet: 430000 | pTraw: 135.320 | pTcorr:  85.527 | pTtrue:  83.276
Jet: 440000 | pTraw: 6.352 | pTcorr:  1.876 | pTtrue:  18.875
Jet: 450000 | pTraw: 85.462 | pTcorr:  6.723 | pTtrue:  20.491
Jet: 460000 | pTraw: 89.105 | pTcorr:  37.067 | pTtrue:  43.989
Jet: 470000 | pTraw: 137.306 | pTcorr:  88.177 | pTtrue:  86.986
Jet: 480000 | pTraw: 109.332 | pTcorr:  48.552 | pTtrue:  26.061
Jet: 490000 | pTraw: 93.846 | pTcorr:  41.155 | pTtrue:  47.725
Jet: 500000 | pTraw: 126.347 | pTcorr:  64.560 | pTtrue:  61.546
Jet: 510000 | pTraw: 66.645 | pTcorr:  24.968 | pTtrue:  17.403
Jet: 520000 | pTraw: 122.057 | pTcorr:  66.335 | pTtrue:  68.934
Jet: 530000 | pTraw: 131.511 | pTcorr: 

## Training & Testing - 1, 3, and 12 Input Features
1 Feature: pt_raw ONLY

3 Features: pt_raw, jet_area, jet_rho

12 Features: jet_pt_raw, jet_pt_corr, jet_mass, jet_area, jet_const_n, const_pt_mean, const_1_pt, const_2_pt, const_3_pt, const_4_pt, jet_y, jet_rho

In [6]:
test_min_max_array = [  # Array of min and max for pT ranges to test on
    [18,22], [28,32], [38,42], [48,52], 
    [58,62], [68,72], [78,82]
    ]
feature_bundle = [
    [feature_label_1feat,  feature_index_1feat], 
    [feature_label_3feat,  feature_index_3feat],
    [feature_label_12feat, feature_index_12feat]
    ]
# train_bundle = [ # This may be implemented later to iterate through multiple training sets
#     [X_train, y_train, sc_train]
#     ]

for feature_set in feature_bundle:
    feature_label = feature_set[0]
    feature_index = feature_set[1]
    
    output_csv_name_2 = output_csv_name + "_F" + str(len(feature_label)) + "_" + str(int(train_range[0])) + "_" + str(int(train_range[1]))
    
    # Builds training and testing arrays
    print("\nBuilding training and testing selected feature arrays...")
    X_train_select = Build_SelectFeatureArray(X_train, feature_index)
    X_test_select  = Build_SelectFeatureArray(X_test, feature_index)

    # Trains estimator
    print("\nTraining linear regression estimator...")
    lr_pipeline, lr_coeffs = Train_LinearRegression(
        X_train_select, 
        y_train, 
        feature_label, 
        use_scaler = True)
    print(type(lr_pipeline))
    
    Write_MLWeights_ToCSV(
        output_csv_name_2 + "_LR_Coeffs.csv",
        lr_coeffs, 
        feature_label
        )

    # Tests estimator and saves results

    for min_max in test_min_max_array:
        
        output = "\nTesting " + str(len(feature_index)) + " features on " + str(min_max[0]) + "-" + str(min_max[1]) + " GeV..."
        print(output)
        
        csv_path = output_csv_name_2 + "_Test_" + str(int(min_max[0])) + "_" + str(int(min_max[1])) + ".csv"
        
        TestAndSave_LinearRegression(
            feature_label,
            feature_index, 
            lr_pipeline, 
            lr_coeffs,
            X_test_select,
            y_test, 
            sc_test,
            min_max[0],
            min_max[1],   
            csv_path,
            use_scaler = True
            )
        
        print("Test and save complete!\n")

    

now = datetime.now()
dt_string = now.strftime("%Y/%m/%d %H:%M:%S")
print("\nComplete!", dt_string)


Building training and testing selected feature arrays...
Selecting data from master array...
Data ready. Feature array length: 915784 

Selecting data from master array...
Data ready. Feature array length: 961986 


Training linear regression estimator...

----- Fitting Linear Regression Estimator -----


Using StandardScaler. Data will be recentered and normalized.


Linear Regression Fit:
 Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression())])
Regression Coefficients:
jet_pt_raw 18.287659172677568
<class 'sklearn.pipeline.Pipeline'>
<class 'sklearn.pipeline.Pipeline'>
ML weights .csv file closed.

Testing 1 features on 18-22 GeV...
<class 'sklearn.pipeline.Pipeline'>
ML results .csv file closed.
Test and save complete!


Testing 1 features on 28-32 GeV...
<class 'sklearn.pipeline.Pipeline'>
ML results .csv file closed.
Test and save complete!


Testing 1 features on 38-42 GeV...
<class 'sklearn.pipeline.Pipeline'>
ML results 

## OLD CODE
#### WARNING: ALL CODE BELOW IS OUT OF DATE AND DOES NOT WORK

In [None]:
# Testing with individual Features

# NOTE: This block tests with individual features (IF) by replacing the ML Estimators 
# after each feature is checked.

X_ptRaw_train_A    = [ [X_train_A[i][0]] for i in range(len(X_train_A)) ]
X_ptRaw_train_B    = [ [X_train_B[i][0]] for i in range(len(X_train_B)) ]
X_ptCorr_train_A   = [ [X_train_A[i][1]] for i in range(len(X_train_A)) ]
X_ptCorr_train_B   = [ [X_train_B[i][1]] for i in range(len(X_train_B)) ]
#X_ptMean_train   = [ [X_values_A[i][6]] for i in range(len(X_values_A)) ]
#X_ptMedian_train = [ [X_values_A[i][7]] for i in range(len(X_values_A)) ]
#X_ptConst1_train = [ [X_values_A[i][8]] for i in range(len(X_values_A)) ]
#X_ptConst2_train = [ [X_values_A[i][9]] for i in range(len(X_values_A)) ]
#X_ptConst3_train = [ [X_values_A[i][10]] for i in range(len(X_values_A)) ]
#X_ptConst4_train = [ [X_values_A[i][11]] for i in range(len(X_values_A)) ]

lr_pipeline_ptRaw, rf_pipeline_ptRaw, nn_pipeline_ptRaw, features_arr_ptRaw = Train_ML_pt_Estimators(
    X_ptRaw_train_A, y_train_A)
lr_pipeline_ptCorr, rf_pipeline_ptCorr, nn_pipeline_ptCorr, features_arr_ptCorr = Train_ML_pt_Estimators(
    X_ptCorr_train_A, y_train_A)

for testing_tree_name in testing_tree_names :
    output_base_name = testing_tree_name[5:]
    pt_true_min = float(testing_tree_name[5:7])
    pt_true_max = float(testing_tree_name[8:10])
    print(pt_true_min, pt_true_max)
    
    X_test_A, y_test_A, sc_corr_test_arr_A, X_test_B, y_test_B, sc_corr_test_arr_B = Build_ML_Feature_Arrays(
        input_file_path, testing_tree_name, pt_true_min, pt_true_max)
    
    X_ptRaw_A  = [ [X_test_A[i][0]] for i in range(len(X_test_A)) ]
    X_ptCorr_A = [ [X_test_A[i][1]] for i in range(len(X_test_A)) ]
    #X_ptMean   = [ [X_values_A[i][6]] for i in range(len(X_values_A)) ]
    #X_ptMedian = [ [X_values_A[i][7]] for i in range(len(X_values_A)) ]
    #X_ptConst1 = [ [X_values_A[i][8]] for i in range(len(X_values_A)) ]
    #X_ptConst2 = [ [X_values_A[i][9]] for i in range(len(X_values_A)) ]
    #X_ptConst3 = [ [X_values_A[i][10]] for i in range(len(X_values_A)) ]
    #X_ptConst4 = [ [X_values_A[i][11]] for i in range(len(X_values_A)) ]

    Test_ML_pt_Estimators(
        X_ptRaw_A, 
        y_test_A, 
        sc_corr_test_arr_A,
        lr_pipeline_ptRaw, rf_pipeline_ptRaw, nn_pipeline_ptRaw, features_arr_ptRaw,
        output_base_name + "_only_ptRaw_ptTrueA_compare", 
        input_file_path, 
        40, -40., 40.,
        False, # If true, outputs feature importance
        True) # If true, compares to paper plots using 40, -40., 40., as limits

    Test_ML_pt_Estimators(
        X_ptCorr_A, 
        y_test_A, 
        sc_corr_test_arr_A,
        lr_pipeline_ptCorr, rf_pipeline_ptCorr, nn_pipeline_ptCorr, features_arr_ptCorr,
        output_base_name + "_only_ptCorr_ptTrueA_compare", 
        input_file_path,
        40, -40., 40.,
        False, # If true, outputs feature importance
        True) # If true, compares to paper plots using 40, -40., 40., as limits
    
print("Complete!")

In [None]:
# Generates a distribution plot of jets used for training

output_file = ROOT.TFile.Open(output_file_path, "UPDATE")

x_bin_min = 10.
x_bin_max = 90.
x_bin_count = (x_bin_max - x_bin_min)/2

print("Output file opened and prepared.")

th1_ptTrue_distribution = ROOT.TH1D(
    "th1_"+train_min+"_"+train_max+"_Train_ptTrue_distribution",
    "Jet p_{T}^{True} distribution for training with "+train_min+" to "+train_max+" GeV; p_{T}^{True} [GeV]; N_{ch jets}",
    (int(train_max) - int(train_min)) / 2, float(train_min), float(train_max))

th1_ptTrue_distribution.SetDirectory(0)

th1_ptTrue_distribution.Sumw2()

print("Writing to output file.")
counter = 0;
for ptTrue in y_train: 
    th1_ptTrue_distribution.Fill(ptTrue)
    if (counter % 1000 == 0): print("Processed event:", counter)
    counter += 1
    
th1_ptTrue_distribution.Write("", ROOT.TObject.kOverwrite)

output_file.Write()
print("Output file written to.")

output_file.Close()
print("Output file closed.")

now = datetime.now()
dt_string = now.strftime("%Y/%m/%d %H:%M:%S")
print("Ready!", dt_string)

In [None]:
    sc_values_arr,   # Array of simple correction values to compare against
    lr_coeffs_arr,   # Feature coefficient values from Linear Regression
    rf_features_arr  # Feature importance values from Random Forest
    
    sc_results = [sc_values_arr[i] for i in range(len(sc_values_arr))]
    sc_results_delta = [sc_results[i] - y_test[i] for i in range(len(sc_results))]
    
    (file_prefix,
    output_file_path,
    x_bins, x_min, x_max,
    bool_features,
    bool_compare,
    X_values)
    
    output_file = ROOT.TFile.Open(output_file_path, "UPDATE")
    output_tree = ROOT.TTree("Tree_ML_" + file_prefix[5:10], "TTree of data from machine learning")
    
    min_GeV = file_prefix[5:7]
    max_GeV = file_prefix[8:10]
    
    # --- LINEAR REGRESSION ---
    
    print("\n----- Testing Linear Regression Estimator -----\n")

    # Outputs mean square error
    output = np.mean((lr_pipeline.predict(X_test) - y_test)**2)
    print("Mean Square Error:\n", output)

    # Outputs the variance score
    output = lr_pipeline.score(X_test, y_test)
    print("Variance Score:\n", output)

    # --- RANDOM FOREST REGRESSION ---
    
    print("\n----- Testing Random Forest Regression Estimator -----\n")

    # Outputs mean square error
    output = np.mean((rf_pipeline.predict(X_test) - y_test)**2)
    print("Mean Square Error:\n", output)

    # Outputs the variance score
    output = rf_pipeline.score(X_test, y_test)
    print("Variance Score:\n", output)

    # --- MULTILAYER PERCEPTRON REGRESSION ---
    
    print("\n----- Testing Neural Network Regression Estimator -----\n")

    # Outputs mean square error
    output = np.mean((nn_pipeline.predict(X_test) - y_test)**2)
    print("Mean Square Error:\n", output)

    # Outputs the variance score
    output = nn_pipeline.score(X_test, y_test)
    print("Variance Score:\n", output)
    
    # --- GENERATE HISTOGRAMS ---
    
    th1d_data_feature_importance = ROOT.TH1D(
        "th1d_" + file_prefix + "_feature_importance","", len(features_arr), 0, 1)
    
    name_simple_correction = "th1d_" + file_prefix + "_simple_correction"
    name_linear_regression = "th1d_" + file_prefix + "_linear_regression"
    name_random_forest     = "th1d_" + file_prefix + "_random_forest"
    name_neural_network    = "th1d_" + file_prefix + "_neural_network"
    title_infix            = min_GeV + " GeV to " + max_GeV + " GeV"
    
    th1d_simple_correction = ROOT.TH1D(
        name_simple_correction,
        "Jet p_{T} Delta for " + title_infix + ", Background Subtraction; (p_{T, reco} - p_{T, true})/p_{T, true} [GeV]; N_{ch jets}",
        x_bins, x_min, x_max)
    th1d_linear_regression = ROOT.TH1D(
        name_linear_regression,
        "Jet p_{T} Delta for " + title_infix + ", Linear Regression; (p_{T, reco} - p_{T, true})/p_{T, true} [GeV]; N_{ch jets}",
        x_bins, x_min, x_max)
    th1d_random_forest = ROOT.TH1D(
        name_random_forest,
        "Jet p_{T} Delta for " + title_infix + ", Random Forest; (p_{T, reco} - p_{T, true})/p_{T, true} [GeV]; N_{ch jets}",
        x_bins, x_min, x_max)
    th1d_neural_network = ROOT.TH1D(
        name_neural_network,
        "Jet p_{T} Delta for " + title_infix + ", Neural Network; (p_{T, reco} - p_{T, true})/p_{T, true} [GeV]; N_{ch jets}",
        x_bins, x_min, x_max)

    th1d_simple_correction.SetDirectory(0)
    th1d_linear_regression.SetDirectory(0)
    th1d_random_forest.SetDirectory(0)
    th1d_neural_network.SetDirectory(0)

    th1d_simple_correction.Sumw2()
    th1d_linear_regression.Sumw2()
    th1d_random_forest.Sumw2()
    th1d_neural_network.Sumw2()
        
    for i in range(len(features_arr)): th1d_data_feature_importance.SetBinContent(i+1, features_arr[i])
    
    lr_prediction_arr = lr_pipeline.predict(X_test)
    rf_prediction_arr = rf_pipeline.predict(X_test)
    nn_prediction_arr = nn_pipeline.predict(X_test)

    output_file.cd()
    
    for i in range(len(X_test)) :
        
        # Normal
        sc_correction = sc_correction_arr[i]
        lr_prediction = lr_prediction_arr[i]
        rf_prediction = rf_prediction_arr[i]
        nn_prediction = nn_prediction_arr[i]

        target = y_test[i]
        
        sc_delta = sc_correction - target
        lr_delta = lr_prediction - target
        rf_delta = rf_prediction - target
        nn_delta = nn_prediction - target
        
        if bool_compare:
            th1d_simple_correction.Fill( sc_delta )
            th1d_linear_regression.Fill( lr_delta )
            th1d_random_forest.Fill( rf_delta )
            th1d_neural_network.Fill( nn_delta )
        else:
            if target != 0 :
                th1d_simple_correction.Fill( sc_delta / target )
                th1d_linear_regression.Fill( lr_delta / target )
                th1d_random_forest.Fill( rf_delta / target )
                th1d_neural_network.Fill( nn_delta / target )
        
        if i % 100 == 0 :
            print(f"Test {i:4.0f}: True: {y_test[i]:3.3f} , ", end="")
            print(f"Pred(line): {lr_prediction:4.3f}({lr_delta: 4.3f}) , ", end="")
            print(f"Pred(tree): {rf_prediction:4.3f}({rf_delta: 4.3f}) , ", end="")
            print(f"Pred(perc): {nn_prediction:4.3f}({nn_delta: 4.3f})")
    
    th1d_simple_correction.Write("", ROOT.TObject.kOverwrite)
    th1d_linear_regression.Write("", ROOT.TObject.kOverwrite)
    th1d_random_forest.Write("", ROOT.TObject.kOverwrite)
    th1d_neural_network.Write("", ROOT.TObject.kOverwrite)
    th1d_data_feature_importance.Write("", ROOT.TObject.kOverwrite)
    
    output_file.Write()
    print("Output file written to.")

    output_file.Close()
    print("Output file closed.")
    
    output_csv  = open((output_file_path[:-5] + "_Train_" + file_prefix[6:] + ".csv"), 'w', newline='')
    csv_writer  = csv.writer(output_csv)
    csv_header = ['Jet Area', 'Jet pT Raw', 'Jet pT True', 'Jet pT Corrected', 
                  'Jet pT ML-LR', 'Jet pT ML-RF', 'Jet pT ML-NN']
    
    o_jet_area     = 0.
    o_jet_pt_raw   = 0.
    o_jet_pt_corr  = 0.
    o_jet_pt_true  = 0.
    o_jet_pt_ml_lr = 0.
    o_jet_pt_ml_rf = 0.
    o_jet_pt_ml_nn = 0.
    
    for i in range(len(X_test)) :
        
        # Normal
        sc_correction = sc_correction_arr[i]
        lr_prediction = lr_prediction_arr[i]
        rf_prediction = rf_prediction_arr[i]
        nn_prediction = nn_prediction_arr[i]

        target = y_test[i]
        
        sc_delta = sc_correction - target
        lr_delta = lr_prediction - target
        rf_delta = rf_prediction - target
        nn_delta = nn_prediction - target
        
        # Adds data to output CSV
        o_jet_area     = X_values[i][3]
        o_jet_pt_raw   = X_values[i][0]
        o_jet_pt_true  = target
        o_jet_pt_corr  = sc_correction
        o_jet_pt_ml_lr = lr_prediction
        o_jet_pt_ml_rf = rf_prediction
        o_jet_pt_ml_nn = nn_prediction
        
        csv_row = [o_jet_area, o_jet_pt_raw, o_jet_pt_true, 
                   o_jet_pt_corr, o_jet_pt_ml_lr, o_jet_pt_ml_rf, o_jet_pt_ml_nn]
        
        csv_writer.writerow(csv_row)
        
        if i % 100 == 0 :
            print(f"Test {i:4.0f}: True: {y_test[i]:3.3f} , ", end="")
            print(f"Pred(line): {lr_prediction:4.3f}({lr_delta: 4.3f}) , ", end="")
            print(f"Pred(tree): {rf_prediction:4.3f}({rf_delta: 4.3f}) , ", end="")
            print(f"Pred(perc): {nn_prediction:4.3f}({nn_delta: 4.3f})")
    
    output_csv.close()
    
    print("Predictions and histogram filling complete.")
    
    return