Skip to content

Commit

Permalink
Merge pull request #134 from HealthyPear/feature-model_weights_and_RF…
Browse files Browse the repository at this point in the history
…_error

Add choice of estimation weigths and standard deviation for RandomForestRegressor models
  • Loading branch information
HealthyPear committed May 11, 2021
2 parents ff7ea05 + 24c15a9 commit b1e0a9d
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 7 deletions.
2 changes: 2 additions & 0 deletions protopipe/aux/example_config_files/analysis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,12 @@ Reconstruction:
EnergyRegressor:
# Name of the regression method (e.g. AdaBoostRegressor, etc.)
method_name: 'AdaBoostRegressor'
estimation_weight: 'STD'

# Parameters for g/h separation
GammaHadronClassifier:
# Name of the classification method (e.g. AdaBoostRegressor, etc.)
method_name: 'RandomForestClassifier'
# Use probability output or score
use_proba: True
estimation_weight: hillas_intensity**0.54 # empirical value from CTAMARS
18 changes: 15 additions & 3 deletions protopipe/scripts/data_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,12 @@ def main():
signal_handler = SignalHandler()
signal.signal(signal.SIGINT, signal_handler)

# Regressor method
# Regressor information
regressor_method = cfg["EnergyRegressor"]["method_name"]
try:
estimation_weight = cfg["EnergyRegressor"]["estimation_weight"]
except KeyError:
estimation_weight = "STD"

# wrapper for the scikit-learn regressor
if args.estimate_energy is True:
Expand Down Expand Up @@ -366,8 +370,16 @@ def main():

############################################################

energy_tel[idx] = model.predict(features_values)
weight_tel[idx] = moments.intensity
if estimation_weight == "STD":
# Get an array of trees
predictions_trees = np.array([tree.predict(features_values) for tree in model.estimators_])
energy_tel[idx] = np.mean(predictions_trees, axis=0)
weight_tel[idx] = np.std(predictions_trees, axis=0)
else:
data.eval(f'estimation_weight = {estimation_weight}', inplace=True)
energy_tel[idx] = model.predict(features_values)
weight_tel[idx] = data["estimation_weight"]

reco_energy_tel[tel_id] = energy_tel[idx]

reco_energy = np.sum(weight_tel * energy_tel) / sum(weight_tel)
Expand Down
3 changes: 3 additions & 0 deletions protopipe/scripts/tests/test_config_analysis_north.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,13 @@ Reconstruction:
EnergyRegressor:
# Name of the regression method (e.g. AdaBoostRegressor, etc.)
method_name: 'RandomForestRegressor'
estimation_weight: 'STD'

# Parameters for g/h separation
GammaHadronClassifier:
# Name of the classification method (e.g. AdaBoostRegressor, etc.)
method_name: 'RandomForestClassifier'
# Use probability output or score
use_proba: True
estimation_weight: hillas_intensity**0.54 # empirical value from CTAMARS

2 changes: 2 additions & 0 deletions protopipe/scripts/tests/test_config_analysis_south.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,12 @@ Reconstruction:
EnergyRegressor:
# Name of the regression method (e.g. AdaBoostRegressor, etc.)
method_name: 'RandomForestRegressor'
estimation_weight: 'STD'

# Parameters for g/h separation
GammaHadronClassifier:
# Name of the classification method (e.g. AdaBoostRegressor, etc.)
method_name: 'RandomForestClassifier'
# Use probability output or score
use_proba: True
estimation_weight: hillas_intensity**0.54 # empirical value from CTAMARS
21 changes: 17 additions & 4 deletions protopipe/scripts/write_dl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,12 @@ def main():

# Regressor and classifier methods
regressor_method = cfg["EnergyRegressor"]["method_name"]
try:
estimation_weight_energy = cfg["EnergyRegressor"]["estimation_weight"]
except KeyError:
estimation_weight_energy = "STD"
classifier_method = cfg["GammaHadronClassifier"]["method_name"]
estimation_weight_classification = cfg["GammaHadronClassifier"]["estimation_weight"]
use_proba_for_classifier = cfg["GammaHadronClassifier"]["use_proba"]

if regressor_method in ["None", "none", None]:
Expand Down Expand Up @@ -410,13 +415,18 @@ class RecoEvent(tb.IsDescription):

############################################################

if good_for_reco[tel_id] == 1:
if (good_for_reco[tel_id] == 1) and (estimation_weight_energy == "STD"):
# Get an array of trees
predictions_trees = np.array([tree.predict(features_values) for tree in model.estimators_])
energy_tel[idx] = np.mean(predictions_trees, axis=0)
weight_tel[idx] = np.std(predictions_trees, axis=0)
elif (good_for_reco[tel_id] == 1):
data.eval(f'estimation_weight_energy = {estimation_weight_energy}', inplace=True)
energy_tel[idx] = model.predict(features_values)
weight_tel[idx] = data["estimation_weight_energy"]
else:
energy_tel[idx] = np.nan

weight_tel[idx] = moments.intensity

# Record the values regardless of the validity
# We don't use this now, but it should be recorded
energy_tel_classifier[tel_id] = energy_tel[idx]
Expand Down Expand Up @@ -502,6 +512,9 @@ class RecoEvent(tb.IsDescription):

############################################################

# add weigth to event dataframe
data.eval(f'estimation_weight_classification = {estimation_weight_classification}', inplace=True)

# Here we check for valid telescope-wise energies
# Because it means that it's a good image
# WARNING: currently we should REQUIRE to estimate both
Expand All @@ -512,7 +525,7 @@ class RecoEvent(tb.IsDescription):
score_tel[idx] = model.decision_function(features_values)
else:
gammaness_tel[idx] = model.predict_proba(features_values)[:, 1]
weight_tel[idx] = np.sqrt(moments.intensity)
weight_tel[idx] = data["estimation_weight_classification"]
else:
# WARNING:
# this is true only because we use telescope-wise
Expand Down

0 comments on commit b1e0a9d

Please sign in to comment.