Skip to content

Commit

Permalink
Sampling redshifts from a random uniform distribution before training…
Browse files Browse the repository at this point in the history
… instead of just choosing the same redshift intervals for each. Every host, SN, age, and snCeoff has a different set of random redshifts. Hopefully this makes the trained set more agnostic to redshift.
  • Loading branch information
daniel-muthukrishna committed Apr 15, 2018
1 parent a202fb1 commit f6a7afd
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 39 deletions.
2 changes: 1 addition & 1 deletion dash/classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(self, filenames=[], redshifts=[], smooth=6, minWave=3500, maxWave=1
def _get_images(self, filename, redshift):
if redshift in list(catalogDict.keys()):
redshift = 0
loadInputSpectra = LoadInputSpectra(filename, redshift, redshift, self.smooth, self.pars, self.minWave, self.maxWave, self.classifyHost)
loadInputSpectra = LoadInputSpectra(filename, redshift, self.smooth, self.pars, self.minWave, self.maxWave, self.classifyHost)
inputImage, inputRedshift, typeNamesList, nw, nBins, inputMinMaxIndex = loadInputSpectra.input_spectra()

return inputImage, typeNamesList, nw, nBins, inputMinMaxIndex, inputRedshift
Expand Down
4 changes: 2 additions & 2 deletions dash/create_and_save_all_data_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
f.write("Classify Host: False\n")
f.write("Redshift: Zero\n")
f.write("Redshift Range: 0 to 0.8\n")
f.write("Redshift Precision: 0.05\n")
f.write("Num of Redshifts: 20\n")
f.write("Fraction of Training Set Used: 0.8\n")
f.write("Training Amount: 50 x 500000\n")
f.write("Changed wavelength range to 3000 to 10000A\n")
Expand All @@ -39,7 +39,7 @@
print("time spent: {0:.2f}".format(t2 - t1))

# CREATE TRAINING SET FILES
trainingSetFilename = create_training_set_files(dataDirName, minZ=0, maxZ=0.8, redshiftPrecision=0.05, trainWithHost=True, classifyHost=False)
trainingSetFilename = create_training_set_files(dataDirName, minZ=0., maxZ=0.8, numOfRedshifts=20, trainWithHost=True, classifyHost=False)
dataFilenames.append(trainingSetFilename)
t3 = time.time()
print("time spent: {0:.2f}".format(t3 - t2))
Expand Down
10 changes: 7 additions & 3 deletions dash/create_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def over_sample_arrays(self, **kwargs):


class CreateArrays(object):
def __init__(self, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, redshiftPrecision, hostTypes=None, nHostTypes=None):
def __init__(self, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, numOfRedshifts, hostTypes=None, nHostTypes=None):
self.w0 = w0
self.w1 = w1
self.nw = nw
Expand All @@ -255,7 +255,7 @@ def __init__(self, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, min
self.typeList = typeList
self.minZ = minZ
self.maxZ = maxZ
self.numOfRedshifts = int((maxZ - minZ) * 1./redshiftPrecision)
self.numOfRedshifts = numOfRedshifts
self.ageBinning = AgeBinning(minAge, maxAge, ageBinSize)
self.numOfAgeBins = self.ageBinning.age_bin(maxAge-0.1) + 1
self.nLabels = nTypes * self.numOfAgeBins * nHostTypes
Expand All @@ -279,7 +279,11 @@ def combined_sn_gal_templates_to_arrays(self, snTemplateLocation, snTempList, ga
break
for snCoeff in snFractions:
galCoeff = 1 - snCoeff
for z in np.linspace(self.minZ, self.maxZ, self.numOfRedshifts + 1):
if self.numOfRedshifts == 1:
redshifts = [self.minZ]
else:
redshifts = np.random.uniform(low=self.minZ, high=self.maxZ, size=self.numOfRedshifts)
for z in redshifts:
tempWave, tempFlux, nCols, ages, tType, tMinIndex, tMaxIndex = readSpectra.sn_plus_gal_template(ageidx, snCoeff, galCoeff, z)
agesList.append(ages[ageidx])
if tMinIndex == tMaxIndex or not tempFlux.any():
Expand Down
14 changes: 7 additions & 7 deletions dash/create_training_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class CreateTrainingSet(object):

def __init__(self, snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, redshiftPrecision, galTemplateLocation, galTempFileList, hostTypes, nHostTypes):
def __init__(self, snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, numOfRedshifts, galTemplateLocation, galTempFileList, hostTypes, nHostTypes):
self.snidTemplateLocation = snidTemplateLocation
self.snidTempFileList = snidTempFileList
self.galTemplateLocation = galTemplateLocation
Expand All @@ -24,7 +24,7 @@ def __init__(self, snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, m
self.ageBinning = AgeBinning(self.minAge, self.maxAge, self.ageBinSize)
self.numOfAgeBins = self.ageBinning.age_bin(self.maxAge-0.1) + 1
self.nLabels = self.nTypes * self.numOfAgeBins * nHostTypes
self.createArrays = CreateArrays(w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, redshiftPrecision, hostTypes, nHostTypes)
self.createArrays = CreateArrays(w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, numOfRedshifts, hostTypes, nHostTypes)
self.arrayTools = ArrayTools(self.nLabels, self.nw)

def type_amounts(self, labels):
Expand Down Expand Up @@ -74,7 +74,7 @@ def sort_data(self):


class SaveTrainingSet(object):
def __init__(self, snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, redshiftPrecision, galTemplateLocation=None, galTempFileList=None, hostTypes=None, nHostTypes=1):
def __init__(self, snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, numOfRedshifts, galTemplateLocation=None, galTempFileList=None, hostTypes=None, nHostTypes=1):
self.snidTemplateLocation = snidTemplateLocation
self.snidTempFileList = snidTempFileList
self.w0 = w0
Expand All @@ -87,7 +87,7 @@ def __init__(self, snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, m
self.typeList = typeList
self.createLabels = CreateLabels(nTypes, minAge, maxAge, ageBinSize, typeList, hostTypes, nHostTypes)

self.createTrainingSet = CreateTrainingSet(snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, redshiftPrecision, galTemplateLocation, galTempFileList, hostTypes, nHostTypes)
self.createTrainingSet = CreateTrainingSet(snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, numOfRedshifts, galTemplateLocation, galTempFileList, hostTypes, nHostTypes)
self.sortData = self.createTrainingSet.sort_data()
self.trainImages = self.sortData[0][0]
self.trainLabels = self.sortData[0][1]
Expand Down Expand Up @@ -141,7 +141,7 @@ def save_arrays(self, saveFilename):
os.remove(filename)


def create_training_set_files(dataDirName, minZ=0, maxZ=0, redshiftPrecision=0.01, trainWithHost=True, classifyHost=False):
def create_training_set_files(dataDirName, minZ=0, maxZ=0, numOfRedshifts=80, trainWithHost=True, classifyHost=False):
with open(dataDirName + 'training_params.pickle', 'rb') as f1:
pars = pickle.load(f1)
nTypes, w0, w1, nw, minAge, maxAge, ageBinSize, typeList = pars['nTypes'], pars['w0'], pars['w1'], \
Expand All @@ -162,7 +162,7 @@ def create_training_set_files(dataDirName, minZ=0, maxZ=0, redshiftPrecision=0.0
else:
galTemplateLocation, galTempFileList = None, None

saveTrainingSet = SaveTrainingSet(snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, redshiftPrecision, galTemplateLocation, galTempFileList, hostList, nHostTypes)
saveTrainingSet = SaveTrainingSet(snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, numOfRedshifts, galTemplateLocation, galTempFileList, hostList, nHostTypes)
typeNamesList, typeAmounts = saveTrainingSet.type_amounts()

saveFilename = dataDirName + 'training_set.zip'
Expand All @@ -172,4 +172,4 @@ def create_training_set_files(dataDirName, minZ=0, maxZ=0, redshiftPrecision=0.0


if __name__ == '__main__':
trainingSetFilename = create_training_set_files('data_files/', minZ=0, maxZ=0, redshiftPrecision=0.01, trainWithHost=False, classifyHost=False)
trainingSetFilename = create_training_set_files('data_files/', minZ=0, maxZ=0, numOfRedshifts=80, trainWithHost=False, classifyHost=False)
4 changes: 2 additions & 2 deletions dash/gui_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,10 +534,10 @@ def __del__(self):

def _input_spectrum_single_redshift(self):
trainParams = get_training_parameters()
loadInputSpectraUnRedshifted = LoadInputSpectra(self.inputFilename, 0, 0, self.smooth, trainParams, self.minWave, self.maxWave, self.classifyHost)
loadInputSpectraUnRedshifted = LoadInputSpectra(self.inputFilename, 0, self.smooth, trainParams, self.minWave, self.maxWave, self.classifyHost)
inputImageUnRedshifted, inputRedshift, typeNamesList, nw, nBins, minMaxIndexUnRedshifted = loadInputSpectraUnRedshifted.input_spectra()

loadInputSpectra = LoadInputSpectra(self.inputFilename, self.knownZ, self.knownZ, self.smooth, trainParams, self.minWave, self.maxWave, self.classifyHost)
loadInputSpectra = LoadInputSpectra(self.inputFilename, self.knownZ, self.smooth, trainParams, self.minWave, self.maxWave, self.classifyHost)
inputImage, inputRedshift, typeNamesList, nw, nBins, minMaxIndex = loadInputSpectra.input_spectra()
bestTypesList = BestTypesListSingleRedshift(self.modelFilename, inputImage, typeNamesList, nw, nBins)
bestTypes = bestTypesList.bestTypes[0]
Expand Down
40 changes: 18 additions & 22 deletions dash/input_spectra.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@


class InputSpectra(object):
def __init__(self, filename, minZ, maxZ, nTypes, minAge, maxAge, ageBinSize, w0, w1, nw, typeList, smooth, minWave, maxWave, hostList, nHostTypes):
def __init__(self, filename, z, nTypes, minAge, maxAge, ageBinSize, w0, w1, nw, typeList, smooth, minWave, maxWave, hostList, nHostTypes):
self.filename = filename
self.minZ = minZ
self.maxZ = maxZ
self.z = z
self.w0 = w0
self.w1 = w1
self.nw = nw
Expand All @@ -19,8 +18,6 @@ def __init__(self, filename, minZ, maxZ, nTypes, minAge, maxAge, ageBinSize, w0,
self.ageBinning = AgeBinning(self.minAge, self.maxAge, self.ageBinSize)
self.numOfAgeBins = self.ageBinning.age_bin(self.maxAge) + 1
self.nLabels = self.nTypes * self.numOfAgeBins * nHostTypes
self.redshiftPrecision = 1000
self.numOfRedshifts = (self.maxZ - self.minZ) * self.redshiftPrecision
self.createLabels = CreateLabels(self.nTypes, self.minAge, self.maxAge, self.ageBinSize, self.typeList, hostList, nHostTypes)
self.fileType = 'fits or twocolumn etc.' #Will use later on
self.typeNamesList = self.createLabels.type_names_list()
Expand All @@ -38,23 +35,22 @@ def redshifting(self):
readSpectra = ReadSpectra(self.w0, self.w1, self.nw, self.filename)

#Undo it's previous redshift)
for z in np.linspace(self.minZ, self.maxZ, self.numOfRedshifts + 1):
wave, flux, minIndex, maxIndex, z = readSpectra.input_spectrum(z, self.smooth, self.minWave, self.maxWave)
nonzeroflux = flux[minIndex:maxIndex + 1]
newflux = normalise_spectrum(nonzeroflux)
newflux2 = np.concatenate((flux[0:minIndex], newflux, flux[maxIndex + 1:]))
images = np.append(images, np.array([newflux2]), axis=0) # images.append(newflux2)
filenames.append(str(self.filename) + "_" + str(-z))
redshifts.append(-z)
minMaxIndexes.append((minIndex, maxIndex))
# # Add white noise to regions outside minIndex to maxIndex
# noise = np.zeros(self.nw)
# noise[0:minIndex] = np.random.uniform(0.0, 1.0, minIndex)
# noise[maxIndex:] = np.random.uniform(0.0, 1.0, self.nw - maxIndex)
#
# augmentedFlux = flux + noise
# augmentedFlux = normalise_spectrum(augmentedFlux)
# augmentedFlux = zero_non_overlap_part(augmentedFlux, minIndex, maxIndex)
wave, flux, minIndex, maxIndex, z = readSpectra.input_spectrum(self.z, self.smooth, self.minWave, self.maxWave)
nonzeroflux = flux[minIndex:maxIndex + 1]
newflux = normalise_spectrum(nonzeroflux)
newflux2 = np.concatenate((flux[0:minIndex], newflux, flux[maxIndex + 1:]))
images = np.append(images, np.array([newflux2]), axis=0) # images.append(newflux2)
filenames.append(str(self.filename) + "_" + str(-z))
redshifts.append(-z)
minMaxIndexes.append((minIndex, maxIndex))
# # Add white noise to regions outside minIndex to maxIndex
# noise = np.zeros(self.nw)
# noise[0:minIndex] = np.random.uniform(0.0, 1.0, minIndex)
# noise[maxIndex:] = np.random.uniform(0.0, 1.0, self.nw - maxIndex)
#
# augmentedFlux = flux + noise
# augmentedFlux = normalise_spectrum(augmentedFlux)
# augmentedFlux = zero_non_overlap_part(augmentedFlux, minIndex, maxIndex)


inputImages = np.array(images)
Expand Down
4 changes: 2 additions & 2 deletions dash/restore_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def get_training_parameters(data_files='models_v04'):


class LoadInputSpectra(object):
def __init__(self, inputFilename, minZ, maxZ, smooth, pars, minWave, maxWave, classifyHost):
def __init__(self, inputFilename, z, smooth, pars, minWave, maxWave, classifyHost):
self.nw = pars['nw']
nTypes, w0, w1, minAge, maxAge, ageBinSize, typeList = pars['nTypes'], pars['w0'], pars['w1'], pars['minAge'], \
pars['maxAge'], pars['ageBinSize'], pars['typeList']
Expand All @@ -24,7 +24,7 @@ def __init__(self, inputFilename, minZ, maxZ, smooth, pars, minWave, maxWave, cl
else:
hostList, nHostTypes = None, 1

self.inputSpectra = InputSpectra(inputFilename, minZ, maxZ, nTypes, minAge, maxAge, ageBinSize, w0, w1, self.nw, typeList, smooth, minWave, maxWave, hostList, nHostTypes)
self.inputSpectra = InputSpectra(inputFilename, z, nTypes, minAge, maxAge, ageBinSize, w0, w1, self.nw, typeList, smooth, minWave, maxWave, hostList, nHostTypes)

self.inputImages, self.inputFilenames, self.inputRedshifts, self.typeNamesList, self.inputMinMaxIndexes = self.inputSpectra.redshifting()
self.nBins = len(self.typeNamesList)
Expand Down

0 comments on commit f6a7afd

Please sign in to comment.