Sampling redshifts from a random uniform distribution before training…

… instead of just choosing the same redshift intervals for each. Every host, SN, age, and snCeoff has a different set of random redshifts. Hopefully this makes the trained set more agnostic to redshift.
daniel-muthukrishna · Apr 15, 2018 · f6a7afd · f6a7afd
1 parent a202fb1
commit f6a7afd
Show file tree

Hide file tree

Showing 7 changed files with 39 additions and 39 deletions.
diff --git a/dash/classify.py b/dash/classify.py
@@ -56,7 +56,7 @@ def __init__(self, filenames=[], redshifts=[], smooth=6, minWave=3500, maxWave=1
     def _get_images(self, filename, redshift):
         if redshift in list(catalogDict.keys()):
             redshift = 0
-        loadInputSpectra = LoadInputSpectra(filename, redshift, redshift, self.smooth, self.pars, self.minWave, self.maxWave, self.classifyHost)
+        loadInputSpectra = LoadInputSpectra(filename, redshift, self.smooth, self.pars, self.minWave, self.maxWave, self.classifyHost)
         inputImage, inputRedshift, typeNamesList, nw, nBins, inputMinMaxIndex = loadInputSpectra.input_spectra()
 
         return inputImage, typeNamesList, nw, nBins, inputMinMaxIndex, inputRedshift

diff --git a/dash/create_and_save_all_data_files.py b/dash/create_and_save_all_data_files.py
@@ -24,7 +24,7 @@
         f.write("Classify Host: False\n")
         f.write("Redshift: Zero\n")
         f.write("Redshift Range: 0 to 0.8\n")
-        f.write("Redshift Precision: 0.05\n")
+        f.write("Num of Redshifts: 20\n")
         f.write("Fraction of Training Set Used: 0.8\n")
         f.write("Training Amount: 50 x 500000\n")
         f.write("Changed wavelength range to 3000 to 10000A\n")
@@ -39,7 +39,7 @@
     print("time spent: {0:.2f}".format(t2 - t1))
 
     # CREATE TRAINING SET FILES
-    trainingSetFilename = create_training_set_files(dataDirName, minZ=0, maxZ=0.8, redshiftPrecision=0.05, trainWithHost=True, classifyHost=False)
+    trainingSetFilename = create_training_set_files(dataDirName, minZ=0., maxZ=0.8, numOfRedshifts=20, trainWithHost=True, classifyHost=False)
     dataFilenames.append(trainingSetFilename)
     t3 = time.time()
     print("time spent: {0:.2f}".format(t3 - t2))

diff --git a/dash/create_arrays.py b/dash/create_arrays.py
@@ -244,7 +244,7 @@ def over_sample_arrays(self, **kwargs):
 
 
 class CreateArrays(object):
-    def __init__(self, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, redshiftPrecision, hostTypes=None, nHostTypes=None):
+    def __init__(self, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, numOfRedshifts, hostTypes=None, nHostTypes=None):
         self.w0 = w0
         self.w1 = w1
         self.nw = nw
@@ -255,7 +255,7 @@ def __init__(self, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, min
         self.typeList = typeList
         self.minZ = minZ
         self.maxZ = maxZ
-        self.numOfRedshifts = int((maxZ - minZ) * 1./redshiftPrecision)
+        self.numOfRedshifts = numOfRedshifts
         self.ageBinning = AgeBinning(minAge, maxAge, ageBinSize)
         self.numOfAgeBins = self.ageBinning.age_bin(maxAge-0.1) + 1
         self.nLabels = nTypes * self.numOfAgeBins * nHostTypes
@@ -279,7 +279,11 @@ def combined_sn_gal_templates_to_arrays(self, snTemplateLocation, snTempList, ga
                         break
                     for snCoeff in snFractions:
                         galCoeff = 1 - snCoeff
-                        for z in np.linspace(self.minZ, self.maxZ, self.numOfRedshifts + 1):
+                        if self.numOfRedshifts == 1:
+                            redshifts = [self.minZ]
+                        else:
+                            redshifts = np.random.uniform(low=self.minZ, high=self.maxZ, size=self.numOfRedshifts)
+                        for z in redshifts:
                             tempWave, tempFlux, nCols, ages, tType, tMinIndex, tMaxIndex = readSpectra.sn_plus_gal_template(ageidx, snCoeff, galCoeff, z)
                             agesList.append(ages[ageidx])
                             if tMinIndex == tMaxIndex or not tempFlux.any():

diff --git a/dash/create_training_set.py b/dash/create_training_set.py
@@ -8,7 +8,7 @@
 
 class CreateTrainingSet(object):
 
-    def __init__(self, snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, redshiftPrecision, galTemplateLocation, galTempFileList, hostTypes, nHostTypes):
+    def __init__(self, snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, numOfRedshifts, galTemplateLocation, galTempFileList, hostTypes, nHostTypes):
         self.snidTemplateLocation = snidTemplateLocation
         self.snidTempFileList = snidTempFileList
         self.galTemplateLocation = galTemplateLocation
@@ -24,7 +24,7 @@ def __init__(self, snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, m
         self.ageBinning = AgeBinning(self.minAge, self.maxAge, self.ageBinSize)
         self.numOfAgeBins = self.ageBinning.age_bin(self.maxAge-0.1) + 1
         self.nLabels = self.nTypes * self.numOfAgeBins * nHostTypes
-        self.createArrays = CreateArrays(w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, redshiftPrecision, hostTypes, nHostTypes)
+        self.createArrays = CreateArrays(w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, numOfRedshifts, hostTypes, nHostTypes)
         self.arrayTools = ArrayTools(self.nLabels, self.nw)
 
     def type_amounts(self, labels):
@@ -74,7 +74,7 @@ def sort_data(self):
 
 
 class SaveTrainingSet(object):
-    def __init__(self, snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, redshiftPrecision, galTemplateLocation=None, galTempFileList=None, hostTypes=None, nHostTypes=1):
+    def __init__(self, snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, numOfRedshifts, galTemplateLocation=None, galTempFileList=None, hostTypes=None, nHostTypes=1):
         self.snidTemplateLocation = snidTemplateLocation
         self.snidTempFileList = snidTempFileList
         self.w0 = w0
@@ -87,7 +87,7 @@ def __init__(self, snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, m
         self.typeList = typeList
         self.createLabels = CreateLabels(nTypes, minAge, maxAge, ageBinSize, typeList, hostTypes, nHostTypes)
 
-        self.createTrainingSet = CreateTrainingSet(snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, redshiftPrecision, galTemplateLocation, galTempFileList, hostTypes, nHostTypes)
+        self.createTrainingSet = CreateTrainingSet(snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, numOfRedshifts, galTemplateLocation, galTempFileList, hostTypes, nHostTypes)
         self.sortData = self.createTrainingSet.sort_data()
         self.trainImages = self.sortData[0][0]
         self.trainLabels = self.sortData[0][1]
@@ -141,7 +141,7 @@ def save_arrays(self, saveFilename):
             os.remove(filename)
 
 
-def create_training_set_files(dataDirName, minZ=0, maxZ=0, redshiftPrecision=0.01, trainWithHost=True, classifyHost=False):
+def create_training_set_files(dataDirName, minZ=0, maxZ=0, numOfRedshifts=80, trainWithHost=True, classifyHost=False):
     with open(dataDirName + 'training_params.pickle', 'rb') as f1:
         pars = pickle.load(f1)
     nTypes, w0, w1, nw, minAge, maxAge, ageBinSize, typeList = pars['nTypes'], pars['w0'], pars['w1'], \
@@ -162,7 +162,7 @@ def create_training_set_files(dataDirName, minZ=0, maxZ=0, redshiftPrecision=0.0
     else:
         galTemplateLocation, galTempFileList = None, None
 
-    saveTrainingSet = SaveTrainingSet(snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, redshiftPrecision, galTemplateLocation, galTempFileList, hostList, nHostTypes)
+    saveTrainingSet = SaveTrainingSet(snidTemplateLocation, snidTempFileList, w0, w1, nw, nTypes, minAge, maxAge, ageBinSize, typeList, minZ, maxZ, numOfRedshifts, galTemplateLocation, galTempFileList, hostList, nHostTypes)
     typeNamesList, typeAmounts = saveTrainingSet.type_amounts()
 
     saveFilename = dataDirName + 'training_set.zip'
@@ -172,4 +172,4 @@ def create_training_set_files(dataDirName, minZ=0, maxZ=0, redshiftPrecision=0.0
 
 
 if __name__ == '__main__':
-    trainingSetFilename = create_training_set_files('data_files/', minZ=0, maxZ=0, redshiftPrecision=0.01, trainWithHost=False, classifyHost=False)
+    trainingSetFilename = create_training_set_files('data_files/', minZ=0, maxZ=0, numOfRedshifts=80, trainWithHost=False, classifyHost=False)
diff --git a/dash/gui_main.py b/dash/gui_main.py
@@ -534,10 +534,10 @@ def __del__(self):
 
     def _input_spectrum_single_redshift(self):
         trainParams = get_training_parameters()
-        loadInputSpectraUnRedshifted = LoadInputSpectra(self.inputFilename, 0, 0, self.smooth, trainParams, self.minWave, self.maxWave, self.classifyHost)
+        loadInputSpectraUnRedshifted = LoadInputSpectra(self.inputFilename, 0, self.smooth, trainParams, self.minWave, self.maxWave, self.classifyHost)
         inputImageUnRedshifted, inputRedshift, typeNamesList, nw, nBins, minMaxIndexUnRedshifted = loadInputSpectraUnRedshifted.input_spectra()
 
-        loadInputSpectra = LoadInputSpectra(self.inputFilename, self.knownZ, self.knownZ, self.smooth, trainParams, self.minWave, self.maxWave, self.classifyHost)
+        loadInputSpectra = LoadInputSpectra(self.inputFilename, self.knownZ, self.smooth, trainParams, self.minWave, self.maxWave, self.classifyHost)
         inputImage, inputRedshift, typeNamesList, nw, nBins, minMaxIndex = loadInputSpectra.input_spectra()
         bestTypesList = BestTypesListSingleRedshift(self.modelFilename, inputImage, typeNamesList, nw, nBins)
         bestTypes = bestTypesList.bestTypes[0]

diff --git a/dash/input_spectra.py b/dash/input_spectra.py
@@ -4,10 +4,9 @@
 
 
 class InputSpectra(object):
-    def __init__(self, filename, minZ, maxZ, nTypes, minAge, maxAge, ageBinSize, w0, w1, nw, typeList, smooth, minWave, maxWave, hostList, nHostTypes):
+    def __init__(self, filename, z, nTypes, minAge, maxAge, ageBinSize, w0, w1, nw, typeList, smooth, minWave, maxWave, hostList, nHostTypes):
         self.filename = filename
-        self.minZ = minZ
-        self.maxZ = maxZ
+        self.z = z
         self.w0 = w0
         self.w1 = w1
         self.nw = nw
@@ -19,8 +18,6 @@ def __init__(self, filename, minZ, maxZ, nTypes, minAge, maxAge, ageBinSize, w0,
         self.ageBinning = AgeBinning(self.minAge, self.maxAge, self.ageBinSize)
         self.numOfAgeBins = self.ageBinning.age_bin(self.maxAge) + 1
         self.nLabels = self.nTypes * self.numOfAgeBins * nHostTypes
-        self.redshiftPrecision = 1000
-        self.numOfRedshifts = (self.maxZ - self.minZ) * self.redshiftPrecision
         self.createLabels = CreateLabels(self.nTypes, self.minAge, self.maxAge, self.ageBinSize, self.typeList, hostList, nHostTypes)
         self.fileType = 'fits or twocolumn etc.' #Will use later on
         self.typeNamesList = self.createLabels.type_names_list()
@@ -38,23 +35,22 @@ def redshifting(self):
         readSpectra = ReadSpectra(self.w0, self.w1, self.nw, self.filename)
 
         #Undo it's previous redshift)
-        for z in np.linspace(self.minZ, self.maxZ, self.numOfRedshifts + 1):
-            wave, flux, minIndex, maxIndex, z = readSpectra.input_spectrum(z, self.smooth, self.minWave, self.maxWave)
-            nonzeroflux = flux[minIndex:maxIndex + 1]
-            newflux = normalise_spectrum(nonzeroflux)
-            newflux2 = np.concatenate((flux[0:minIndex], newflux, flux[maxIndex + 1:]))
-            images = np.append(images, np.array([newflux2]), axis=0)  # images.append(newflux2)
-            filenames.append(str(self.filename) + "_" + str(-z))
-            redshifts.append(-z)
-            minMaxIndexes.append((minIndex, maxIndex))
-            # # Add white noise to regions outside minIndex to maxIndex
-            # noise = np.zeros(self.nw)
-            # noise[0:minIndex] = np.random.uniform(0.0, 1.0, minIndex)
-            # noise[maxIndex:] = np.random.uniform(0.0, 1.0, self.nw - maxIndex)
-            #
-            # augmentedFlux = flux + noise
-            # augmentedFlux = normalise_spectrum(augmentedFlux)
-            # augmentedFlux = zero_non_overlap_part(augmentedFlux, minIndex, maxIndex)
+        wave, flux, minIndex, maxIndex, z = readSpectra.input_spectrum(self.z, self.smooth, self.minWave, self.maxWave)
+        nonzeroflux = flux[minIndex:maxIndex + 1]
+        newflux = normalise_spectrum(nonzeroflux)
+        newflux2 = np.concatenate((flux[0:minIndex], newflux, flux[maxIndex + 1:]))
+        images = np.append(images, np.array([newflux2]), axis=0)  # images.append(newflux2)
+        filenames.append(str(self.filename) + "_" + str(-z))
+        redshifts.append(-z)
+        minMaxIndexes.append((minIndex, maxIndex))
+        # # Add white noise to regions outside minIndex to maxIndex
+        # noise = np.zeros(self.nw)
+        # noise[0:minIndex] = np.random.uniform(0.0, 1.0, minIndex)
+        # noise[maxIndex:] = np.random.uniform(0.0, 1.0, self.nw - maxIndex)
+        #
+        # augmentedFlux = flux + noise
+        # augmentedFlux = normalise_spectrum(augmentedFlux)
+        # augmentedFlux = zero_non_overlap_part(augmentedFlux, minIndex, maxIndex)
 
 
         inputImages = np.array(images)

diff --git a/dash/restore_model.py b/dash/restore_model.py
@@ -13,7 +13,7 @@ def get_training_parameters(data_files='models_v04'):
 
 
 class LoadInputSpectra(object):
-    def __init__(self, inputFilename, minZ, maxZ, smooth, pars, minWave, maxWave, classifyHost):
+    def __init__(self, inputFilename, z, smooth, pars, minWave, maxWave, classifyHost):
         self.nw = pars['nw']
         nTypes, w0, w1, minAge, maxAge, ageBinSize, typeList = pars['nTypes'], pars['w0'], pars['w1'], pars['minAge'], \
                                                                pars['maxAge'], pars['ageBinSize'], pars['typeList']
@@ -24,7 +24,7 @@ def __init__(self, inputFilename, minZ, maxZ, smooth, pars, minWave, maxWave, cl
         else:
             hostList, nHostTypes = None, 1
 
-        self.inputSpectra = InputSpectra(inputFilename, minZ, maxZ, nTypes, minAge, maxAge, ageBinSize, w0, w1, self.nw, typeList, smooth, minWave, maxWave, hostList, nHostTypes)
+        self.inputSpectra = InputSpectra(inputFilename, z, nTypes, minAge, maxAge, ageBinSize, w0, w1, self.nw, typeList, smooth, minWave, maxWave, hostList, nHostTypes)
 
         self.inputImages, self.inputFilenames, self.inputRedshifts, self.typeNamesList, self.inputMinMaxIndexes = self.inputSpectra.redshifting()
         self.nBins = len(self.typeNamesList)