Skip to content

Commit

Permalink
Update Diffuse Gamma Preprocessor to correctly generate files
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobbieker committed Jul 5, 2020
1 parent 4668117 commit d7883aa
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 157 deletions.
91 changes: 30 additions & 61 deletions examples/create_event_files.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from os import sys, path
sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
from os import path
#sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""
from factnn import ProtonPreprocessor, GammaPreprocessor, GammaDiffusePreprocessor
Expand All @@ -10,15 +10,15 @@
from functools import partial


base_dir = "/home/jacob/ihp-pc41.ethz.ch/public/phs/"
base_dir = "/run/media/jacob/SSD_Backup/phs/"
obs_dir = [base_dir + "public/"]
gamma_dir = [base_dir + "sim/gamma/"]
gamma_dir = [base_dir + "sim/gamma/werner", base_dir + "sim/gamma/gustav"]
proton_dir = [base_dir + "sim/proton/"]
gamma_dl2 = "../gamma_simulations_diffuse_facttools_dl2.hdf5"
gamma_dl2 = "/run/media/jacob/SSD_Backup/gamma_simulations_diffuse_facttools_dl2.hdf5"


#output_path = "/home/jacob/Documents/cleaned_event_files_test"
output_path = "/home/jacob/Documents/iact_events/"
output_path = "/run/media/jacob/SSD_Backup/iact_events"

shape = [30,70]
rebin_size = 5
Expand All @@ -30,12 +30,13 @@
for file in files:
if file.endswith("phs.jsonl.gz"):
gamma_paths.append(os.path.join(root, file))
print(len(gamma_paths))

def gf(clump_size, path):
print("Gamma_Diffuse")
print("Size: ", clump_size)
gamma_configuration = {
'rebin_size': 5,
'rebin_size': clump_size,
'output_file': "../gamma.hdf5",
'shape': shape,
'paths': [path],
Expand All @@ -46,40 +47,6 @@ def gf(clump_size, path):
gamma_train_preprocessor.event_processor(directory=os.path.join(output_path, "gamma_diffuse"), clean_images=True, only_core=True, clump_size=clump_size)


def f(clump_size, path):
print("Gamma")
print("Size: ", clump_size)
gamma_configuration = {
'rebin_size': 5,
'output_file': "../gamma.hdf5",
'shape': shape,
'paths': [path]
}

gamma_train_preprocessor = GammaPreprocessor(config=gamma_configuration)
gamma_train_preprocessor.event_processor(os.path.join(output_path, "gammaFeature"), clean_images=True, only_core=True, clump_size=clump_size)

# Get paths from the directories
crab_paths = []
for directory in proton_dir:
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith("phs.jsonl.gz"):
crab_paths.append(os.path.join(root, file))

def d(clump_size, path):
print(len(path))
proton_configuration = {
'rebin_size': rebin_size,
'output_file': "../proton.hdf5",
'shape': shape,
'paths': [path],
}

proton_train_preprocessor = ProtonPreprocessor(config=proton_configuration)
proton_train_preprocessor.event_processor(os.path.join(output_path, "protonFeature"), clean_images=True, only_core=True, clump_size=clump_size)


# Now do the Kfold Cross validation Part for both sets of paths


Expand All @@ -93,48 +60,50 @@ def d(clump_size, path):
if not os.path.exists(path):
os.makedirs(path)

proton_pool = Pool(8)
gamma_pool = Pool(8)
dunc = partial(d, clump_size)
#proton_pool = Pool(8)
gamma_pool = Pool(1)
#dunc = partial(d, clump_size)
#gunc = partial(gf, clump_size)
g = proton_pool.map_async(dunc, crab_paths)
func = partial(f, clump_size)
r = gamma_pool.map_async(func, gamma_paths)
#g = proton_pool.map_async(dunc, crab_paths)
func = partial(gf, clump_size)
r = gamma_pool.map(func, gamma_paths)

g.wait()
print("\n\n\n\n\n\n\n----------------------------------Done Proton------------------------------------------------\n\n\n\n\n\n\n\n")
#g.wait()
#print("\n\n\n\n\n\n\n----------------------------------Done Proton------------------------------------------------\n\n\n\n\n\n\n\n")
r.wait()

clump_size_2 = 10
output_paths = [os.path.join(output_path, "proton", "no_clean"),os.path.join(output_path, "proton", "clump"+str(clump_size)),os.path.join(output_path, "proton", "core"+str(clump_size)),
os.path.join(output_path, "gamma", "no_clean"),os.path.join(output_path, "gamma", "clump"+str(clump_size)),os.path.join(output_path, "gamma", "core"+str(clump_size))]
output_paths = [os.path.join(output_path, "protonFeature", "no_clean"),os.path.join(output_path, "protonFeature", "clump"+str(clump_size)),os.path.join(output_path, "protonFeature", "core"+str(clump_size)),
os.path.join(output_path, "gammaFeature", "no_clean"),os.path.join(output_path, "gammaFeature", "clump"+str(clump_size)),os.path.join(output_path, "gammaFeature", "core"+str(clump_size)),
os.path.join(output_path, "gamma_diffuse", "no_clean"),os.path.join(output_path, "gamma_diffuse", "clump"+str(clump_size)),os.path.join(output_path, "gamma_diffuse", "core"+str(clump_size))]
for path in output_paths:
if not os.path.exists(path):
os.makedirs(path)

dunc = partial(d, clump_size_2)
g = proton_pool.map_async(dunc, crab_paths)
#dunc = partial(d, clump_size_2)
#g = proton_pool.map_async(dunc, crab_paths)
r.wait()

func = partial(f, clump_size_2)
func = partial(gf, clump_size_2)
r = gamma_pool.map_async(func, gamma_paths)

r.wait()
g.wait()
#g.wait()

clump_size_3 = 15
output_paths = [os.path.join(output_path, "proton", "no_clean"),os.path.join(output_path, "proton", "clump"+str(clump_size)),os.path.join(output_path, "proton", "core"+str(clump_size)),
os.path.join(output_path, "gamma", "no_clean"),os.path.join(output_path, "gamma", "clump"+str(clump_size)),os.path.join(output_path, "gamma", "core"+str(clump_size))]
output_paths = [os.path.join(output_path, "protonFeature", "no_clean"),os.path.join(output_path, "protonFeature", "clump"+str(clump_size)),os.path.join(output_path, "protonFeature", "core"+str(clump_size)),
os.path.join(output_path, "gammaFeature", "no_clean"),os.path.join(output_path, "gammaFeature", "clump"+str(clump_size)),os.path.join(output_path, "gammaFeature", "core"+str(clump_size)),
os.path.join(output_path, "gamma_diffuse", "no_clean"),os.path.join(output_path, "gamma_diffuse", "clump"+str(clump_size)),os.path.join(output_path, "gamma_diffuse", "core"+str(clump_size))]
for path in output_paths:
if not os.path.exists(path):
os.makedirs(path)
func = partial(f, clump_size_3)
dunc = partial(d, clump_size_3)
func = partial(gf, clump_size_3)
#dunc = partial(d, clump_size_3)
r = gamma_pool.map_async(func, gamma_paths)
g = proton_pool.map_async(dunc, crab_paths)
#g = proton_pool.map_async(dunc, crab_paths)

r.wait()
g.wait()
#g.wait()



Expand Down
168 changes: 84 additions & 84 deletions factnn/data/preprocess/simulation_preprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,92 +588,92 @@ def event_processor(self, directory, clean_images=False, only_core=True, clump_s
for index, file in enumerate(self.paths):
mc_truth = file.split(".phs")[0] + ".ch.gz"
file_name = file.split("/")[-1].split(".phs")[0]
#try:
sim_reader = ps.SimulationReader(
photon_stream_path=file,
mmcs_corsika_path=mc_truth
)
counter = 0
for event in sim_reader:
df_event = self.dl2_file.loc[(np.isclose(self.dl2_file['corsika_event_header_total_energy'],
event.simulation_truth.air_shower.energy)) &
(self.dl2_file['run_id'] == event.simulation_truth.run)]
counter += 1
if not df_event.empty:

if os.path.isfile(os.path.join(directory, "clump"+str(clump_size), str(file_name) + "_" + str(counter))) \
and os.path.isfile(os.path.join(directory, "core"+str(clump_size), str(file_name) + "_" + str(counter))):
print("True: " + str(file_name) + "_" + str(counter))
continue

if clean_images:
all_photons, clump_photons, core_photons = self.clean_image(event, only_core=only_core,min_samples=clump_size)
if core_photons is None:
print("No Clumps, skip")
try:
sim_reader = ps.SimulationReader(
photon_stream_path=file,
mmcs_corsika_path=mc_truth
)
counter = 0
for event in sim_reader:
print(f"Event Count: {counter}")
df_event = self.dl2_file.loc[(np.isclose(self.dl2_file['corsika_event_header_total_energy'],
event.simulation_truth.air_shower.energy)) &
(self.dl2_file['run_id'] == event.simulation_truth.run)]
counter += 1
if not df_event.empty:
if os.path.isfile(os.path.join(directory, "clump"+str(clump_size), str(file_name) + "_" + str(counter))) \
and os.path.isfile(os.path.join(directory, "core"+str(clump_size), str(file_name) + "_" + str(counter))):
print("True: " + str(file_name) + "_" + str(counter))
continue
else:
for key, photon_set in {"no_clean": all_photons, "clump": clump_photons, "core": core_photons}.items():
event.photon_stream.raw = photon_set
# Now extract parameters from the available photons and save them to a file
features = extract_single_simulation_features(event, min_samples=1)
# In the event chosen from the file
# Each event is the same as each line below
cog_x = df_event['cog_x'].values[0]
cog_y = df_event['cog_y'].values[0]
act_sky_source_zero = df_event['source_position_x'].values[0]
act_sky_source_one = df_event['source_position_y'].values[0]
event_photons = event.photon_stream.list_of_lists
zd_deg = event.zd
az_deg = event.az
delta = df_event['delta'].values[0]
energy = event.simulation_truth.air_shower.energy
sky_source_zd = df_event['source_position_zd'].values[0]
sky_source_az = df_event['source_position_az'].values[0]
zd_deg1 = df_event['aux_pointing_position_az'].values[0]
az_deg1 = df_event['aux_pointing_position_zd'].values[0]
data_dict = [[event_photons, act_sky_source_zero, act_sky_source_one,
cog_x, cog_y, zd_deg, az_deg, sky_source_zd, sky_source_az, delta,
energy, zd_deg1, az_deg1],
{'Image': 0, 'Source_X': 1, 'Source_Y': 2, 'COG_X': 3, 'COG_Y': 4, 'Zd_Deg': 5,
'Az_Deg': 6,
'Source_Zd': 7, 'Source_Az': 8, 'Delta': 9, 'Energy': 10, 'Pointing_Zd': 11,
'Pointing_Az': 12}, features]
if key != "no_clean":
with open(os.path.join(directory, key+str(clump_size), str(file_name) + "_" + str(counter)), "wb") as event_file:
pickle.dump(data_dict, event_file)
else:
if not os.path.isfile(os.path.join(directory, key, str(file_name) + "_" + str(counter))):
with open(os.path.join(directory, key, str(file_name) + "_" + str(counter)), "wb") as event_file:

if clean_images:
all_photons, clump_photons, core_photons, dbscan = self.clean_image(event, only_core=only_core,min_samples=clump_size)
if core_photons is None:
print("No Clumps, skip")
continue
else:
for key, photon_set in {"no_clean": all_photons, "clump": clump_photons, "core": core_photons}.items():
event.photon_stream.raw = photon_set
# Now extract parameters from the available photons and save them to a file
features = extract_single_simulation_features(event, min_samples=1)
# In the event chosen from the file
# Each event is the same as each line below
cog_x = df_event['cog_x'].values[0]
cog_y = df_event['cog_y'].values[0]
act_sky_source_zero = df_event['source_position_x'].values[0]
act_sky_source_one = df_event['source_position_y'].values[0]
event_photons = event.photon_stream.list_of_lists
zd_deg = event.zd
az_deg = event.az
delta = df_event['delta'].values[0]
energy = event.simulation_truth.air_shower.energy
sky_source_zd = df_event['source_position_zd'].values[0]
sky_source_az = df_event['source_position_az'].values[0]
zd_deg1 = df_event['aux_pointing_position_az'].values[0]
az_deg1 = df_event['aux_pointing_position_zd'].values[0]
data_dict = [[event_photons, act_sky_source_zero, act_sky_source_one,
cog_x, cog_y, zd_deg, az_deg, sky_source_zd, sky_source_az, delta,
energy, zd_deg1, az_deg1],
{'Image': 0, 'Source_X': 1, 'Source_Y': 2, 'COG_X': 3, 'COG_Y': 4, 'Zd_Deg': 5,
'Az_Deg': 6,
'Source_Zd': 7, 'Source_Az': 8, 'Delta': 9, 'Energy': 10, 'Pointing_Zd': 11,
'Pointing_Az': 12}, features]
if key != "no_clean":
with open(os.path.join(directory, key+str(clump_size), str(file_name) + "_" + str(counter)), "wb") as event_file:
pickle.dump(data_dict, event_file)
else:
# In the event chosen from the file
# Each event is the same as each line below
features = extract_single_simulation_features(event)
cog_x = df_event['cog_x'].values[0]
cog_y = df_event['cog_y'].values[0]
act_sky_source_zero = df_event['source_position_x'].values[0]
act_sky_source_one = df_event['source_position_y'].values[0]
event_photons = event.photon_stream.list_of_lists
zd_deg = event.zd
az_deg = event.az
delta = df_event['delta'].values[0]
energy = event.simulation_truth.air_shower.energy
sky_source_zd = df_event['source_position_zd'].values[0]
sky_source_az = df_event['source_position_az'].values[0]
zd_deg1 = df_event['aux_pointing_position_az'].values[0]
az_deg1 = df_event['aux_pointing_position_zd'].values[0]
data_dict = [[event_photons, act_sky_source_zero, act_sky_source_one,
cog_x, cog_y, zd_deg, az_deg, sky_source_zd, sky_source_az, delta,
energy, zd_deg1, az_deg1],
{'Image': 0, 'Source_X': 1, 'Source_Y': 2, 'COG_X': 3, 'COG_Y': 4, 'Zd_Deg': 5,
'Az_Deg': 6,
'Source_Zd': 7, 'Source_Az': 8, 'Delta': 9, 'Energy': 10, 'Pointing_Zd': 11,
'Pointing_Az': 12}, features]
with open(os.path.join(directory, str(file_name) + "_" + str(counter)), "wb") as event_file:
pickle.dump(data_dict, event_file)
#except Exception as e:
# print(str(e))
# pass
else:
if not os.path.isfile(os.path.join(directory, key, str(file_name) + "_" + str(counter))):
with open(os.path.join(directory, key, str(file_name) + "_" + str(counter)), "wb") as event_file:
pickle.dump(data_dict, event_file)
else:
# In the event chosen from the file
# Each event is the same as each line below
features = extract_single_simulation_features(event)
cog_x = df_event['cog_x'].values[0]
cog_y = df_event['cog_y'].values[0]
act_sky_source_zero = df_event['source_position_x'].values[0]
act_sky_source_one = df_event['source_position_y'].values[0]
event_photons = event.photon_stream.list_of_lists
zd_deg = event.zd
az_deg = event.az
delta = df_event['delta'].values[0]
energy = event.simulation_truth.air_shower.energy
sky_source_zd = df_event['source_position_zd'].values[0]
sky_source_az = df_event['source_position_az'].values[0]
zd_deg1 = df_event['aux_pointing_position_az'].values[0]
az_deg1 = df_event['aux_pointing_position_zd'].values[0]
data_dict = [[event_photons, act_sky_source_zero, act_sky_source_one,
cog_x, cog_y, zd_deg, az_deg, sky_source_zd, sky_source_az, delta,
energy, zd_deg1, az_deg1],
{'Image': 0, 'Source_X': 1, 'Source_Y': 2, 'COG_X': 3, 'COG_Y': 4, 'Zd_Deg': 5,
'Az_Deg': 6,
'Source_Zd': 7, 'Source_Az': 8, 'Delta': 9, 'Energy': 10, 'Pointing_Zd': 11,
'Pointing_Az': 12}, features]
with open(os.path.join(directory, str(file_name) + "_" + str(counter)), "wb") as event_file:
pickle.dump(data_dict, event_file)
except Exception as e:
print(str(e))
pass

def batch_processor(self, clean_images=False):
for index, file in enumerate(self.paths):
Expand Down

0 comments on commit d7883aa

Please sign in to comment.