Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom arrays, example configs and aux scripts #2

Merged
merged 3 commits into from
Jun 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
71 changes: 71 additions & 0 deletions aux/example_config_files/grid/grid.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
General:
# Path of protopipe configuration file (will be uploaded on the GRID)
# Please refer to directory structure shown at Lugano
config_path: $PATH_CONTAINING_ANALYSIS_YAML_CONFIG_FILE
# Name of the configuration file for the protopipe analysis
config_file: 'analysis.yaml'

# Type of cleaning (wave - for "wavelets" - or tail - for 'tailcut' - )
mode: 'tail'

# Type of the particle to process (gamma, proton or electron) - NOT a list!
particle: 'gamma'

# If 'True' estimate energy (need regressor file)
estimate_energy: True

# Force tailcut cleaning for energy/score estimation
# possible only if using 'wave' mode - if 'tail', leave 'False'
force_tailcut_for_extended_cleaning: False # only evt level

GRID:
# Username on GRID
user_name: '$USERNAME'

# Home on GRID
home_grid: '/vo.cta.in2p3.fr/user/$INITIAL/$USERNAME/'

# Output directories on the GRID home_grid/outdir
outdir: 'cta/ana/'

# Directory for DL1
dl1_dir_energy: 'dl1_energy'

# Directory for DL1
dl1_dir_discrimination: 'dl1_discrimination'

# home_grid/outdir/models
model_dir: 'estimators'

# Directory for DL2
dl2_dir: 'dl2'

# Number of file per job
n_file_per_job: 10

# Maximum number of jobs (-1 is no limit)
n_jobs_max: -1

# List of banned site
banned_sites: ['LCG.CPPM.fr']

# The following DL0 paths refer to directory structure shown at Lugano
# you will find it together with the rest of the files

EnergyRegressor:
# This list is used to build an energy regressor, if output_type=DL1 and
# if estimate_energy is False
gamma_list: '../../data/DL0/gamma_energy.list'

GammaHadronClassifier:
# Those lists are used to build a g/h classifier, if output_type=DL1 and
# if estimate_energy is True
gamma_list: '../../data/DL0/gamma_classification.list'
proton_list: '../../data/DL0/proton_classification.list'

Performance:
# Those lists are used to build event lists for performance estimation,
# if output_type=DL2
gamma_list: '../../data/DL0/gamma_perf.list'
proton_list: '../../data/DL0/proton_perf.list'
electron_list: '../../data/DL0/electron_perf.list'
90 changes: 90 additions & 0 deletions aux/example_config_files/protopipe/analysis.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# General informations
General:
config_name: 'lapalma_N_fullarray_MARStailcut_corrected_minNtel2'
site: 'north' # North or South
array: 'full_array' # subarray_LSTs, subarray_MSTs, full_array
cam_id_list: ['LSTCam', 'NectarCam'] # Camera identifiers (Should be read in scripts?)

# Cleaning for reconstruction
ImageCleaning:

# Cleaning for reconstruction
biggest:
tail: #
thresholds: # picture, boundary
- LSTCam: [6, 3]
- NectarCam: [8, 4] # TBC
keep_isolated_pixels: False
min_number_picture_neighbors: 2

wave:
# Directory to write temporary files
#tmp_files_directory: '/dev/shm/'
tmp_files_directory: './'
options:
LSTCam:
type_of_filtering: 'hard_filtering'
filter_thresholds: [3, 0.2]
last_scale_treatment: 'drop'
kill_isolated_pixels: True
detect_only_positive_structures: False
clusters_threshold: 0
NectarCam: # TBC
type_of_filtering: 'hard_filtering'
filter_thresholds: [3, 0.2]
last_scale_treatment: 'drop'
kill_isolated_pixels: True
detect_only_positive_structures: False
clusters_threshold: 0

# Cleaning for energy/score estimation
extended:
tail: #
thresholds: # picture, boundary
- LSTCam: [6, 3]
- NectarCam: [8, 4] # TBC
keep_isolated_pixels: False
min_number_picture_neighbors: 2

wave:
# Directory to write temporary files
#tmp_files_directory: '/dev/shm/'
tmp_files_directory: './'
options:
LSTCam:
type_of_filtering: 'hard_filtering'
filter_thresholds: [3, 0.2]
last_scale_treatment: 'posmask'
kill_isolated_pixels: True
detect_only_positive_structures: False
clusters_threshold: 0
NectarCam: # TBC
type_of_filtering: 'hard_filtering'
filter_thresholds: [3, 0.2]
last_scale_treatment: 'posmask'
kill_isolated_pixels: True
detect_only_positive_structures: False
clusters_threshold: 0

# Cut for image selection
ImageSelection:
charge: [50., 1e10]
pixel: [3, 1e10]
ellipticity: [0.1, 0.6]
nominal_distance: [0., 0.8] # in camera radius

# Minimal number of telescopes to consider events
Reconstruction:
min_tel: 2

# Parameters for energy estimation
EnergyRegressor:
# Name of the regression method (e.g. AdaBoostRegressor, etc.)
method_name: 'AdaBoostRegressor'

# Parameters for g/h separation
GammaHadronClassifier:
# Name of the classification method (e.g. AdaBoostRegressor, etc.)
method_name: 'RandomForestClassifier'
# Use probability output or score
use_proba: True
48 changes: 48 additions & 0 deletions aux/example_config_files/protopipe/classifier.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
General:
model_type: 'classifier'
# [...] = your analysis local path
# Please, refer to directory structure shown at Lugano
data_dir: '[...]/data/DL1/for_classification/'
data_sig_file: 'dl1_tail_gamma_merged.h5'
data_bkg_file: 'dl1_tail_proton_merged.h5'
cam_id_list: ['LSTCam', 'NectarCam']
table_name_template: 'feature_events_' # Will be completed with cam_ids
outdir: '[...]/estimators/gamma_hadron_classifier'

Split:
train_fraction: 0.8
use_same_number_of_sig_and_bkg_for_training: False # Lowest statistics will drive the split

Method:
name: 'RandomForestClassifier' # AdaBoostClassifier or RandomForestClassifier
target_name: 'label'
tuned_parameters:
n_estimators: [200]
max_depth: [10] # null for None
min_samples_split: [10]
min_samples_leaf: [10]
scoring: 'roc_auc'
cv: 2
use_proba: True # If not output is score
calibrate_output: False # If true calibrate probability

FeatureList:
- 'log10_reco_energy'
- 'width'
- 'length'
- 'skewness'
- 'kurtosis'
- 'h_max'

SigFiducialCuts:
- 'offset <= 0.5'

BkgFiducialCuts:
- 'offset <= 1.'

Diagnostic:
# Energy binning (used for reco and true energy)
energy:
nbins: 4
min: 0.0125
max: 125
80 changes: 80 additions & 0 deletions aux/example_config_files/protopipe/performance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
general:
# Directory with input data file
# [...] = your analysis local path
# Please refer to directory structure shown at Lugano
indir: '[...]/data/DL2'
# Template name for input file
template_input_file: 'dl2_{}_{}_merged.h5' # filled with mode and particle type
# Directory for output files
outdir: '[...]/performance'
# Output table name
output_table_name: 'table_best_cutoff'

analysis:

# Theta square cut optimisation (opti, fixed, r68)
thsq_opt:
type: 'opti'
value: 0.2 # In degree, necessary for type fixed

# Normalisation between ON and OFF regions
alpha: 0.2

# Minimimal significance
min_sigma: 5

# Minimal number of gamma-ray-like
min_excess: 10

# Minimal fraction of background events for excess comparison
bkg_syst: 0.05

# Reco energy binning
ereco_binning: # TeV
emin: 0.012589254
emax: 199.52623
nbin: 21

# Reco energy binning
etrue_binning: # TeV
emin: 0.019952623
emax: 199.52623
nbin: 42

particle_information:
gamma:
n_events_per_file: 1000000 # 10**5 * 10
e_min: 0.003
e_max: 330
gen_radius: 1400
diff_cone: 0
gen_gamma: 2

proton:
n_events_per_file: 4000000 # 2 * 10**5 * 20
e_min: 0.004
e_max: 600
gen_radius: 1900
diff_cone: 10
gen_gamma: 2
offset_cut: 1.

electron:
n_events_per_file: 2000000 # 10**5 * 20
e_min: 0.003
e_max: 330
gen_radius: 1900
diff_cone: 10
gen_gamma: 2
offset_cut: 1.

column_definition:
# Column name for true energy
mc_energy: 'mc_energy'
# Column name for reconstructed energy
reco_energy: 'reco_energy'
# Column name for classification output
classification_output:
name: 'gammaness'
range: [0, 1]
angular_distance_to_the_src: 'xi'
41 changes: 41 additions & 0 deletions aux/example_config_files/protopipe/regressor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
General:
model_type: 'regressor'
# [...] = your analysis local path
# Please, refer to directory structure shown at Lugano
data_dir: '[...]/data/DL1/for_energy_estimation'
data_file: 'dl1_{}_gamma_merged.h5'
outdir: '[...]/estimators/energy_regressor'
cam_id_list: ['LSTCam', 'NectarCam']
table_name_template: 'feature_events_'

Split:
train_fraction: 0.8

Method:
name: 'AdaBoostRegressor'
target_name: 'mc_energy'
tuned_parameters:
learning_rate: [0.3]
n_estimators: [100]
base_estimator__max_depth: [null] # null is equivalent to None
base_estimator__min_samples_split: [2]
base_estimator__min_samples_leaf: [10]
scoring: 'explained_variance'
cv: 2

FeatureList:
- 'log10_charge'
- 'log10_impact'
- 'width'
- 'length'
- 'h_max'

SigFiducialCuts:
- 'xi <= 0.5'

Diagnostic:
# Energy binning (used for reco and true energy)
energy:
nbins: 15
min: 0.0125
max: 125
58 changes: 58 additions & 0 deletions aux/scripts/create_dir_structure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Ctapipe/protopipe analysis directory structure.

Author: Dr. Michele Peresano
Affilitation: CEA-Saclay/Irfu

"""

import os
import sys

# FUNCTIONS


def makedir(name):
"""Create folder if non-existent and output OS error if any."""
if not os.path.exists(name):
try:
os.mkdir(name)
except OSError:
print("Creation of the directory {} failed".format(name))
else:
print("Successfully created the directory {}".format(name))
return None


# MAIN

# Input can be included in a better way later
# along the rest of the protopipe configuration

# read name of working directory as 1st argument
wd = sys.argv[1]
# read name of the analysis as 2nd argument
analysisName = sys.argv[2]

# Create analysis parent folder
analysis = os.path.join(wd, analysisName)
makedir(analysis)

subdirectories = {
"configs": ["grid", "protopipe"],
"data": ["DL0", "DL1", "DL2", "DL3"],
"estimators": ["energy_regressor", "gamma_hadron_classifier"],
"performance": [] # here no subdirectories, make_performance.py will do it
}

for d in subdirectories:
subdir = os.path.join(analysis, d)
makedir(subdir)
for dd in subdirectories[d]:
subsubdir = os.path.join(subdir, dd)
makedir(subsubdir)
if dd == "DL1":
makedir(os.path.join(subsubdir, "for_classification"))
makedir(os.path.join(subsubdir, "for_energy_estimation"))

print("Directory structure ready for protopipe analysis on DIRAC.")