Skip to content

Commit

Permalink
Fix errors with flow add normalization
Browse files Browse the repository at this point in the history
Now can normalize per iamge basis for the single_processor.

Also, flow_from_files_separation is now for some reason working very well, vs not at all before at predicting. I think something is wrong....

Other misc fixes and testing done.
  • Loading branch information
jacobbieker committed Oct 31, 2018
1 parent 9e710bd commit 34ddc51
Show file tree
Hide file tree
Showing 8 changed files with 123 additions and 35 deletions.
8 changes: 5 additions & 3 deletions examples/flow_from_files_separation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
gamma_dir = [base_dir + "sim/gamma/"]
proton_dir = [base_dir + "sim/proton/"]

shape = [25,90]
rebin_size = 3
shape = [30,70]
rebin_size = 5

# Get paths from the directories
gamma_paths = []
Expand Down Expand Up @@ -112,5 +112,7 @@
separation_model.train_generator = separation_train
separation_model.validate_generator = separation_validate

separation_model.train(train_generator=separation_train, validate_generator=separation_validate, val_num=int(150000*0.8*0.2), num_events=int(150000*0.8*0.8))
from examples.open_crab_sample_constants import NUM_EVENTS_GAMMA, NUM_EVENTS_PROTON

separation_model.train(train_generator=separation_train, validate_generator=separation_validate, val_num=int(NUM_EVENTS_PROTON*0.8*0.2), num_events=int(NUM_EVENTS_PROTON*0.8*0.8))

34 changes: 18 additions & 16 deletions examples/flow_from_files_source_detection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from factnn import GammaDiffusePreprocessor, DispGenerator, DispModel
from factnn import GammaDiffusePreprocessor, DispGenerator, DispModel, SignGenerator, SignModel
import os.path
from factnn.utils import kfold

Expand All @@ -7,7 +7,7 @@
gamma_dir = [base_dir + "sim/gamma/"]
gamma_dl2 = "../gamma_simulations_diffuse_facttools_dl2.hdf5"

shape = [0,70]
shape = [30,70]
rebin_size = 5

# Get paths from the directories
Expand Down Expand Up @@ -40,7 +40,7 @@

source_generator_configuration = {
'seed': 1337,
'batch_size': 4,
'batch_size': 8,
'start_slice': 0,
'number_slices': shape[1]-shape[0],
'mode': 'train',
Expand All @@ -51,9 +51,9 @@
}


disp_train = DispGenerator(config=source_generator_configuration)
disp_validate = DispGenerator(config=source_generator_configuration)
disp_test = DispGenerator(config=source_generator_configuration)
disp_train = SignGenerator(config=source_generator_configuration)
disp_validate = SignGenerator(config=source_generator_configuration)
disp_test = SignGenerator(config=source_generator_configuration)

disp_train.train_preprocessor = gamma_train_preprocessor
disp_train.validate_preprocessor = gamma_validate_preprocessor
Expand All @@ -68,23 +68,23 @@
'conv_dropout': 0.2,
'lstm_dropout': 0.3,
'fc_dropout': 0.5,
'num_conv3d': 2,
'kernel_conv3d': 2,
'num_conv3d': 0,
'kernel_conv3d': 5,
'strides_conv3d': 1,
'num_lstm': 1,
'kernel_lstm': 2,
'num_lstm': 4,
'kernel_lstm': 3,
'strides_lstm': 1,
'num_fc': 2,
'num_fc': 1,
'pooling': True,
'neurons': [16, 16, 32, 24, 36],
'neurons': [16, 16, 16, 16, 32],
'shape': [shape[1]-shape[0], gamma_train_preprocessor.shape[2], gamma_train_preprocessor.shape[1], 1],
'start_slice': 0,
'number_slices': 38,
'number_slices': shape[1]-shape[0],
'activation': 'relu',
}

disp_model = DispModel(config=source_model_configuration)

disp_model = SignModel(config=source_model_configuration)
print(disp_model)
"""
Now run the models with the generators!
Expand All @@ -95,4 +95,6 @@
disp_model.validate_generator = disp_validate
disp_model.test_generator = disp_test

disp_model.train(train_generator=disp_train, validate_generator=disp_validate, num_events=gamma_train_preprocessor.count_events(), val_num=gamma_validate_preprocessor.count_events())
# This is done as an approx of the actual number of events, but is a ton faster
from examples.open_crab_sample_constants import NUM_EVENTS_DIFFUSE
disp_model.train(train_generator=disp_train, validate_generator=disp_validate, num_events=int(NUM_EVENTS_DIFFUSE*0.8*0.8), val_num=int(NUM_EVENTS_DIFFUSE*0.8*0.2))
4 changes: 4 additions & 0 deletions examples/open_crab_sample_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
NUM_EVENTS_GAMMA = 829853
NUM_EVENTS_PROTON = 152165
NUM_EVENTS_DIFFUSE = 552381

21 changes: 19 additions & 2 deletions factnn/data/augment.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from sklearn.utils import shuffle
import h5py
from keras.utils import to_categorical


def image_augmenter(images):
Expand Down Expand Up @@ -251,6 +252,14 @@ def true_delta(cog_y, source_y, cog_x, source_x):
cog_x - source_x
)

def true_sign(source_x, source_y, cog_x, cog_y, delta):

true_delta = np.arctan2(
cog_y - source_y,
cog_x - source_x,
)
true_sign = np.sign(np.abs(delta - true_delta) - np.pi / 2)
return true_sign

def get_random_from_paths(preprocessor, size, time_slice, total_slices,
proton_preprocessor=None, type_training=None, augment=True, swap=True, shape=None):
Expand Down Expand Up @@ -298,9 +307,17 @@ def get_random_from_paths(preprocessor, size, time_slice, total_slices,
labels = np.array(labels)
training_data = [item[data_format["Image"]] for item in training_data]
elif type_training == "Sign":
labels = [true_delta(item[data_format['Source_X']], item[data_format['Source_Y']],
item[data_format['COG_X']], item[data_format['COG_Y']]) for item in training_data]
labels = [true_sign(item[data_format['Source_X']], item[data_format['Source_Y']],
item[data_format['COG_X']], item[data_format['COG_Y']], item[data_format['Delta']]) for item in training_data]
labels = np.array(labels)
# Create own categorical one since only two sides anyway
new_labels = np.zeros((labels.shape[0],2))
for index, element in enumerate(labels):
if element < 0:
new_labels[index][0] = 1.
else:
new_labels[index][1] = 1.
labels = new_labels
training_data = [item[data_format["Image"]] for item in training_data]

training_data = np.array(training_data)
Expand Down
11 changes: 8 additions & 3 deletions factnn/models/source_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def init(self):
self.model_type = "Sign"
self.auc = 0.0
if self.name is None:
self.name = self.model_type + "_" + self.num_lstm + "LSTM_" + self.num_conv3d + "Conv3D_" + self.num_fc + \
self.name = self.model_type + "_" + str(self.num_lstm) + "LSTM_" + str(self.num_conv3d) + "Conv3D_" + str(self.num_fc) + \
"FC" + ".hdf5"

def create(self):
Expand All @@ -106,12 +106,15 @@ def create(self):
padding='same', input_shape=self.shape, activation=self.activation,
dropout=self.conv_dropout, recurrent_dropout=self.lstm_dropout,
recurrent_activation='hard_sigmoid', return_sequences=True))
if self.pooling:
model.add(MaxPooling3D())
for i in range(self.num_lstm - 1):
model.add(ConvLSTM2D(self.neurons[i + 1], kernel_size=self.kernel_lstm, strides=self.strides_lstm,
padding='same', activation=self.activation,
dropout=self.conv_dropout, recurrent_dropout=self.lstm_dropout,
recurrent_activation='hard_sigmoid', return_sequences=True))

if self.pooling:
model.add(MaxPooling3D())
for i in range(self.num_conv3d):
model.add(Conv3D(self.neurons[self.num_lstm + i],
kernel_size=self.kernel_conv3d, strides=self.strides_conv3d,
Expand All @@ -123,6 +126,8 @@ def create(self):
model.add(Conv3D(self.neurons[0], input_shape=self.shape,
kernel_size=self.kernel_conv3d, strides=self.strides_conv3d,
padding='same', activation=self.activation))
if self.pooling:
model.add(MaxPooling3D())
for i in range(self.num_conv3d - 1):
model.add(Conv3D(self.neurons[i + 1],
kernel_size=self.kernel_conv3d, strides=self.strides_conv3d,
Expand All @@ -137,7 +142,7 @@ def create(self):
model.add(Dropout(self.fc_dropout))

# Final Dense layer
model.add(Dense(1, activation='softmax'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy',
metrics=['acc'])

Expand Down
21 changes: 21 additions & 0 deletions factnn/preprocess/base_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,27 @@ def count_events(self):
"""
return NotImplementedError

def normalize_image(self, iamge):
"""
Assumes Image in the format given by reformat, and designed for single processor
:param image:
:return:
"""
# Now have the whole data image, go through an normalize each slice
temp_matrix = []
for data_cube in iamge:
for image_slice in data_cube:
# Each time slice you normalize
mean = np.mean(image_slice)
stddev = np.std(image_slice)
denom = np.max([stddev, 1.0/np.sqrt(image_slice.size)])
image_slice = (image_slice - mean) / denom
temp_matrix.append(image_slice)
# Should be normalized now
temp_matrix = np.array(temp_matrix)
temp_matrix = temp_matrix.reshape(1, temp_matrix.shape[0], temp_matrix.shape[1], temp_matrix.shape[2])
return temp_matrix

def reformat(self, image):
"""
Reformats image to what is needed for LSTM with time, width, height, channel order
Expand Down
31 changes: 23 additions & 8 deletions factnn/preprocess/simulation_preprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def batch_processor(self):
except Exception as e:
print(str(e))

def single_processor(self):
def single_processor(self, normalize=False):
while True:
self.paths = shuffle(self.paths)
for index, file in enumerate(self.paths):
Expand Down Expand Up @@ -72,7 +72,12 @@ def single_processor(self):
input_matrix[coords[0]][coords[1]][value - self.start] += element[1] * 1
data.append([np.fliplr(np.rot90(input_matrix, 3)), energy, zd_deg, az_deg, act_phi, act_theta])
data_format = {'Image': 0, 'Energy': 1, 'Zd_Deg': 2, 'Az_Deg': 3, 'COG_Y': 4, 'Phi': 5, 'Theta': 6,}
yield self.format(data), data_format
data = self.format(data)
if normalize:
data = list(data)
data[0] = self.normalize_image(data[0])
data = tuple(data)
yield data, data_format

except Exception as e:
print(str(e))
Expand Down Expand Up @@ -198,7 +203,7 @@ def batch_processor(self):
except Exception as e:
print(str(e))

def single_processor(self):
def single_processor(self, normalize=False):
while True:
self.paths = shuffle(self.paths)
for index, file in enumerate(self.paths):
Expand Down Expand Up @@ -226,10 +231,15 @@ def single_processor(self):
coords = pixel_index_to_grid[element[0]]
for value in event_photons[index]:
if self.end > value > self.start:
input_matrix[coords[0]][coords[1]][value - self.start] += element[1] * 1
input_matrix[coords[0]][coords[1]][value - self.start] += element[1] * 100
data.append([np.fliplr(np.rot90(input_matrix, 3)), energy, zd_deg, az_deg, act_phi, act_theta])
data_format = {'Image': 0, 'Energy': 1, 'Zd_Deg': 2, 'Az_Deg': 3, 'COG_Y': 4, 'Phi': 5, 'Theta': 6,}
yield self.format(data), data_format
data = self.format(data)
if normalize:
data = list(data)
data[0] = self.normalize_image(data[0])
data = tuple(data)
yield data, data_format

except Exception as e:
print(str(e))
Expand Down Expand Up @@ -369,7 +379,7 @@ def batch_processor(self):
except Exception as e:
print(str(e))

def single_processor(self):
def single_processor(self, normalize=False):
while True:
self.paths = shuffle(self.paths)
for index, file in enumerate(self.paths):
Expand Down Expand Up @@ -410,15 +420,20 @@ def single_processor(self):
for value in event_photons[index]:
if self.end > value > self.start:
input_matrix[coords[0]][coords[1]][value - self.start] += element[1] * 1

data.append([np.fliplr(np.rot90(input_matrix, 3)), act_sky_source_zero, act_sky_source_one,
cog_x, cog_y, zd_deg, az_deg, sky_source_zd, sky_source_az, delta,
energy, zd_deg1, az_deg1])
# Add an associated structure that gives the name?
data_format = {'Image': 0, 'Source_X': 1, 'Source_Y': 2, 'COG_X': 3, 'COG_Y': 4, 'Zd_Deg': 5, 'Az_Deg': 6,
'Source_Zd': 7, 'Source_Az': 8, 'Delta': 9, 'Energy': 10, 'Pointing_Zd': 11, 'Pointing_Az': 12}

if len(data) != 0:
yield self.format(data), data_format
data = self.format(data)
if normalize:
data = list(data)
data[0] = self.normalize_image(data[0])
data = tuple(data)
yield data, data_format

except Exception as e:
print(str(e))
Expand Down
28 changes: 25 additions & 3 deletions factnn/utils/cross_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from sklearn.model_selection import train_test_split, KFold
from sklearn.utils import shuffle


def split_data(indicies, kfolds, seeds=None):
"""
Splits the data into the indicies for kfold validation
Expand All @@ -16,7 +17,7 @@ def split_data(indicies, kfolds, seeds=None):
if seeds is None:
seeds = []
for fold in range(kfolds):
seeds.append(np.random.randint(0,2**32-1))
seeds.append(np.random.randint(0, 2 ** 32 - 1))

# Now split into kfolds,
list_of_training = []
Expand All @@ -30,10 +31,31 @@ def split_data(indicies, kfolds, seeds=None):

for train_index, test_index in kf.split(indicies):
# Need to split train_index into validation data
train_data, validate_data = train_test_split(indicies[train_index], train_size=(1.0-validate_fraction), test_size=validate_fraction)
train_data, validate_data = train_test_split(indicies[train_index], train_size=(1.0 - validate_fraction),
test_size=validate_fraction)
list_of_training.append(train_data)
list_of_validate.append(validate_data)
list_of_testing.append(indicies[test_index])

# Now convert to a numpy array
return list_of_training, list_of_validate, list_of_testing
return list_of_training, list_of_validate, list_of_testing


def cross_validate(model, data, generator, proton_data=None, proton_generator=None, kfolds=5, preprocessor=None, proton_preprocessor=None,
plot=False):
"""
Performs a k-fold cross validation on a given model and data
:param proton_preprocessor: Preprocessor for the proton data, if used, default is None
:param preprocessor: Preprocessor for gamma data, if used, usually for streaming files, default None
:param model: Keras Model instance to train
:param generator: Generator for Keras fit_generator
:param proton_generator: Generator for Keras fit_generator if using proton data, default None
:param kfolds: Number of folds to do for the k-fold validation
:param plot: Whether to plot the output
:return:
"""




return NotImplementedError

0 comments on commit 34ddc51

Please sign in to comment.