Skip to content

Commit

Permalink
Include cleaning images
Browse files Browse the repository at this point in the history
Now in all the base preprocessors, EventFile still does not have it as of now because the cleaning relies on the PhotonStream event existing for parts of it.

Once the phs files are reprocessed, this should be a drop in replacement for the current images though.
  • Loading branch information
jacobbieker committed Nov 26, 2018
1 parent a252d1b commit 47f562e
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 123 deletions.
111 changes: 107 additions & 4 deletions factnn/data/preprocess/base_preprocessor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import fact
from fact.instrument import get_pixel_coords
from fact.instrument.constants import PIXEL_SPACING_MM
from sklearn.cluster import DBSCAN
import pickle
import os

Expand Down Expand Up @@ -177,13 +179,13 @@ def generate_rebinning(self, size):
hex_to_grid = [chid_to_pixel, pixel_index_to_grid]
return hex_to_grid

def batch_processor(self):
def batch_processor(self, clean_images=False):
return NotImplemented

def single_processor(self, normalize=False, collapse_time=False, final_slices=5):
def single_processor(self, normalize=False, collapse_time=False, final_slices=5, clean_images=False):
return NotImplemented

def on_batch_processor(self, filepath, size, sample=False, normalize=False, collapse_time=False, final_slices=5):
def on_batch_processor(self, filepath, size, sample=False, normalize=False, collapse_time=False, final_slices=5, clean_images=False):
"""
Returns at most size-elements from the file at filepath, if sample=True, then does resevoir sampling of the entire
file, otherwise takes the first size-elements
Expand All @@ -195,7 +197,7 @@ def on_batch_processor(self, filepath, size, sample=False, normalize=False, coll
"""
return NotImplementedError

def event_processor(self, directory):
def event_processor(self, directory, clean_images=False):
"""
Goes through each event in all the files specified in self.paths and returns each event individually, including the
default photon-stream representation, and auxiliary data and saves it to a new file based on the
Expand Down Expand Up @@ -298,5 +300,106 @@ def convert_to_gaussian_image(self, phs_photons, sigma, size, delta=PIXEL_SPACIN

return NotImplementedError

def select_clustered_photons(self, dbscan, point_cloud):
"""
Take DBSCAN output on the point cloud and translate it back to a list of lists of photons
Useful for using the clustering to reduce the noise in the image stacks later, image cleaning
The raw photons are stored in order of CHID, with 255 separating the different pixels and the
numbers indicating the arrival time of the photon
Can be found by using the core samples from DBSCAN aand only keeping those photons
Core smaples define the clusters, those that are not core samples are on the fringe of the sample
and we can test discarding them to clean the image further, so only dbscan.core_sample_indicies_ is
needed really
Raw to point cloud is given below where cx and cy are the geometry of the image, but not needed here
def raw_phs_to_point_cloud(raw_phs, cx, cy):
number_photons = len(raw_phs) - NUMBER_OF_PIXELS
cloud = np.zeros(shape=(number_photons, 3))
pixel_chid = 0
p = 0
for s in raw_phs:
if s == io.binary.LINEBREAK:
pixel_chid += 1
else:
cloud[p, 0] = cx[pixel_chid]
cloud[p, 1] = cy[pixel_chid]
cloud[p, 2] = s*TIME_SLICE_DURATION_S
p += 1
return cloud
:param dbscan:
:param raw_photons:
:return:
"""
TIME_SLICE_DURATION_S = 0.5e-9 # Taken from FACT magic constants

core_sample = dbscan.core_sample_indices_
print(core_sample)
# Now go backwards through the raw photons and gather those ones in a new raw photon format
# That format will be passed to raw_photons_to_list_of_lists to create a new list_of_lists repr

pixels = fact.instrument.get_pixel_dataframe()
pixels.sort_values('CHID', inplace=True)

x_angle = np.deg2rad(pixels.x_angle.values)
y_angle = np.deg2rad(pixels.y_angle.values)
new_list_of_list = [[] for _ in range(1440)]
for element in core_sample:
# Now have each index into the point cloud, so start backwards
current_photon = point_cloud[element]
for index in range(1440):
if np.isclose(current_photon[0], x_angle[index]) and np.isclose(current_photon[1], y_angle[index]):
time_slice = int(np.round(current_photon[2] / TIME_SLICE_DURATION_S))
# Now add to new_raw
new_list_of_list[index].append(time_slice)

# Convert back to raw
new_raw = []
for sublist in new_list_of_list:
# Go through each sublist in order, adding 255 for each one
if not sublist:
new_raw.append(255)
# List is empty
else:
# Go through sublist, adding its stuff to it
for item in sublist:
new_raw.append(item)
# Done with sublist, so add end one
new_raw.append(255)

return new_raw

def clean_image(self, event, min_samples=20, eps=0.1):
"""
Clean the image with various methods, currently only DBSCAN
DBSCAN code is taken almost directly from pyfact
:param event: PhotonStream Event
:param min_samples: Min samples for DBSCAN
:param eps: maximal distance between two samples to be considered same neighborhood
:return: The same Photon Stream Event with only photons in a cluster
"""

point_cloud = event.photon_stream.point_cloud

deg_over_s = 0.35e9
xyt = event.photon_stream.point_cloud.copy()
xyt[:, 2] *= np.deg2rad(deg_over_s)

fov_radius = np.deg2rad(fact.instrument.camera.FOV_RADIUS)
abs_eps = eps * (2.0*fov_radius)

dbscan = DBSCAN(eps=abs_eps, min_samples=min_samples).fit(xyt)

event.photon_stream.raw = self.select_clustered_photons(dbscan, point_cloud)

return event

def format(self, batch):
return NotImplemented
2 changes: 1 addition & 1 deletion factnn/data/preprocess/eventfile_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def on_files_processor(self, paths, collapse_time=True, final_slices=5):
return all_data


def single_processor(self, normalize=False, collapse_time=False, final_slices=5):
def single_processor(self, normalize=False, collapse_time=False, final_slices=5, clean_images=False):
pass

def event_file_processor(self, filepath, normalize=False, collapse_time=False, final_slices=5):
Expand Down
8 changes: 6 additions & 2 deletions factnn/data/preprocess/observation_preprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def init(self):
"timestamp", "pointing_position_az",
"pointing_position_zd"])

def batch_processor(self):
def batch_processor(self, clean_images=False):
self.init()
for index, file in enumerate(self.paths):
print(file)
Expand All @@ -29,6 +29,8 @@ def batch_processor(self):
(self.dl2_file['night'] == event.observation_info.night) &
(self.dl2_file['run_id'] == event.observation_info.run)]
if not df_event.empty:
if clean_images:
event = self.clean_image(event)
# In the event chosen from the file
# Each event is the same as each line below
source_pos_x = df_event['source_position_x'].values[0]
Expand Down Expand Up @@ -64,7 +66,7 @@ def batch_processor(self):
except Exception as e:
print(str(e))

def single_processor(self, normalize=False, collapse_time=False, final_slices=5, as_channels=False):
def single_processor(self, normalize=False, collapse_time=False, final_slices=5, as_channels=False, clean_images=False):
while True:
print("New Crab")
for index, file in enumerate(self.paths):
Expand All @@ -76,6 +78,8 @@ def single_processor(self, normalize=False, collapse_time=False, final_slices=5,
(self.dl2_file['night'] == event.observation_info.night) &
(self.dl2_file['run_id'] == event.observation_info.run)]
if not df_event.empty:
if clean_images:
event = self.clean_image(event)
# In the event chosen from the file
# Each event is the same as each line below
source_pos_x = df_event['source_position_x'].values[0]
Expand Down

0 comments on commit 47f562e

Please sign in to comment.