In [1]:
from desispec.io import read_spectra
from desitrip.preproc import rebin_flux, rescale_flux

from glob import glob

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import math


from astropy.table import Table

import os
import platform

In [2]:
mpl.rc('font', size=14)
# np.seterr(all='raise')
# np.seterr(all='warn')

In [3]:
def condition_spectra(coadd_files, truth_files):
    """Read DESI spectra, rebin to a subsampled logarithmic wavelength grid, and rescale.
    
    Parameters
    ----------
    coadd_files : list or ndarray
        List of FITS files on disk with DESI spectra.
    truth_files : list or ndarray
        Truth files.
    
    Returns
    -------
    fluxes : ndarray
        Array of fluxes rebinned to a logarithmic wavelength grid.
    """
    fluxes = None
    
    for cf, tf in zip(coadd_files, truth_files):
        spectra = read_spectra(cf)
        wave = spectra.wave['brz']
        flux = spectra.flux['brz']
        ivar = spectra.ivar['brz']
        
#         truth = Table.read(tf, 'TRUTH')
#         truez = truth['TRUEZ']

#         # Pre-condition: remove spectra with NaNs and zero flux values.
#         mask = np.isnan(flux).any(axis=1) | (np.count_nonzero(flux, axis=1) == 0)
#         mask_idx = np.argwhere(mask)
#         flux = np.delete(flux, mask_idx, axis=0)
#         ivar = np.delete(ivar, mask_idx, axis=0)

#         # Rebin and rescale fluxes so that each is normalized between 0 and 1.
#         rewave, reflux, reivar = rebin_flux(wave, flux, ivar, truez, minwave=2500., maxwave=9500., nbins=150, log=True, clip=True)
#         rsflux = rescale_flux(reflux)

        if fluxes is None:
            fluxes = flux
        else:
            fluxes = np.concatenate((fluxes, flux))
    
    return_flux=[]
    for i in range(len(fluxes)):
        trial=fluxes[i][0:6241]
        trial=((trial-np.min(trial))/(np.max(trial) - np.min(trial)))
        trial=trial.reshape(79,79)
        return_flux.append(trial)
        
    bad_host_counter=0
    final_flux=[]
    for i in return_flux:
        if np.sum(i)==0 or math.isnan(np.sum(i)):
            bad_host_counter=bad_host_counter+1
        else:
            final_flux.append(i)
    
    return final_flux

In [4]:
host_truth = sorted(glob('/scratch/sbenzvi_lab/desi/time-domain/bgs/150s/hosts/*truth.fits'))
host_coadd = sorted(glob('/scratch/sbenzvi_lab/desi/time-domain/bgs/150s/hosts/*coadd.fits'))
host_flux  = condition_spectra(host_coadd, host_truth)

In [5]:
snia_truth = sorted(glob('/scratch/sbenzvi_lab/desi/time-domain/bgs/150s/sn_ia/hsiao/*truth.fits'))
snia_files = sorted(glob('/scratch/sbenzvi_lab/desi/time-domain/bgs/150s/sn_ia/hsiao/*coadd.fits'))
snia_flux  = condition_spectra(snia_files, snia_truth)

In [6]:
snib_truth = sorted(glob('/scratch/sbenzvi_lab/desi/time-domain/bgs/150s/sn_ib/*/*truth.fits'))
snib_files = sorted(glob('/scratch/sbenzvi_lab/desi/time-domain/bgs/150s/sn_ib/*/*coadd.fits'))
snib_flux  = condition_spectra(snib_files, snib_truth)

In [7]:
snic_truth = sorted(glob('/scratch/sbenzvi_lab/desi/time-domain/bgs/150s/sn_ic/*/*truth.fits'))
snic_files = sorted(glob('/scratch/sbenzvi_lab/desi/time-domain/bgs/150s/sn_ic/*/*coadd.fits'))
snic_flux  = condition_spectra(snic_files, snic_truth)

In [8]:
sniin_truth = sorted(glob('/scratch/sbenzvi_lab/desi/time-domain/bgs/150s/sn_iin/*/*truth.fits'))
sniin_files = sorted(glob('/scratch/sbenzvi_lab/desi/time-domain/bgs/150s/sn_iin/*/*coadd.fits'))
sniin_flux  = condition_spectra(sniin_files, sniin_truth)

In [9]:
sniip_truth = sorted(glob('/scratch/sbenzvi_lab/desi/time-domain/bgs/150s/sn_iip/*/*truth.fits'))
sniip_files = sorted(glob('/scratch/sbenzvi_lab/desi/time-domain/bgs/150s/sn_iip/*/*coadd.fits'))
sniip_flux  = condition_spectra(sniip_files, sniip_truth)

In [10]:
host_flux=np.asarray(host_flux)
snia_flux=np.asarray(snia_flux)
snib_flux=np.asarray(snib_flux)
snic_flux=np.asarray(snic_flux)
sniin_flux=np.asarray(sniin_flux)
sniip_flux=np.asarray(sniip_flux)

nhost, nylen, nxlen  = host_flux.shape
nsnia, nylen, nxlen  = snia_flux.shape
nsnib, nylen, nxlen  = snib_flux.shape
nsnic, nylen, nxlen  = snic_flux.shape
nsniin, nylen, nxlen = sniin_flux.shape
nsniip, nylen, nxlen = sniip_flux.shape
# nhost, nsnia, nsnib, nsnibc, nsnic, nsniin, nsniilp, nsniip, nbins
nhost, nsnia, nsnib, nsnic, nsniin, nsniip, nylen, nxlen

(9969, 9964, 9958, 8269, 9949, 9962, 79, 79)

In [11]:
x = np.concatenate([host_flux, 
                    snia_flux,
                    snib_flux,
                    snic_flux,
                    sniin_flux,
                    sniip_flux
                   ]).reshape(-1, nylen, nxlen, 1)

labels = ['Host',
          'SN Ia',
          'SN Ib',
          'SN Ic',
          'SN IIn',
          'SN IIP']
ntypes = len(labels)

# Convert y-label array to appropriate categorical array
from tensorflow.keras.utils import to_categorical

y = to_categorical(
        np.concatenate([np.full(nhost, 0), 
                        np.full(nsnia, 1),
                        np.full(nsnib, 2),
                        np.full(nsnic, 3),
                        np.full(nsniin, 4),
                        np.full(nsniip, 5)
                       ]))


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  return f(*args, **kwds)


In [12]:
from sklearn.model_selection import train_test_split

def train_test_validate(x, y, train_size=0.75, test_size=0.125, val_size=0.125):
    # Ensure proper normalization.
    if train_size + test_size + val_size != 1:
        norm = train_size + test_size + val_size
        train_size = train_size/norm
        test_size = test_size/norm
        val_size = val_size/norm
        print('Renormalized to train {:g}, test {:g}, and validate {:g}'.format(train_size, test_size, val_size))
        
    # Split into training and testing samples.
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=1-train_size)
    
    # Split off the validation sample from the test sample.
    x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=test_size/(test_size+val_size))
    
    return x_train, x_test, x_val, y_train, y_test, y_val

In [13]:
img_x, img_y = 79, 79
x_train, x_test, x_val, y_train, y_test, y_val = train_test_validate(x, y, 0.6, 0.2, 0.2)

x_train = x_train.reshape(x_train.shape[0], img_x, img_y, 1)
x_test = x_test.reshape(x_test.shape[0], img_x, img_y, 1)
x_val = x_val.reshape(x_val.shape[0], img_x, img_y, 1)
input_shape = (img_x, img_y, 1)

# convert the data to the right type
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')


x_train.shape, x_test.shape, x_val.shape

x_train shape: (34842, 79, 79, 1)
34842 train samples
11615 test samples


((34842, 79, 79, 1), (11615, 79, 79, 1), (11614, 79, 79, 1))

In [14]:
from __future__ import print_function
import tensorflow.keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
import matplotlib.pylab as plt


In [15]:
batch_size = 128
num_classes = 6
epochs = 30

# input image dimensions
img_x, img_y = 79, 79

In [16]:
input_shape=(79,79,1)

In [17]:

# model = Sequential()
# model.add(Conv2D(8, kernel_size=(2, 2), strides=(1, 1),
#                  activation='relu',
#                  input_shape=input_shape))
# model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
# model.add(Conv2D(16, (2, 2), activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Flatten())
# model.add(Dense(1000, activation='relu'))
# model.add(Dense(num_classes, activation='softmax'))

# model.compile(loss=tensorflow.keras.losses.categorical_crossentropy,
#               optimizer=tensorflow.keras.optimizers.Adam(),
#               metrics=['accuracy'])


model = Sequential()
model.add(Conv2D(32, kernel_size=(5, 5), strides=(1, 1),
                 activation='relu',
                 input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(64, (5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(1000, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=tensorflow.keras.losses.categorical_crossentropy,
              optimizer=tensorflow.keras.optimizers.Adam(),
              metrics=['accuracy'])





In [None]:
class AccuracyHistory(tensorflow.keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.acc = []

    def on_epoch_end(self, batch, logs={}):
        self.acc.append(logs.get('acc'))

history = AccuracyHistory()

model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test),
          callbacks=[history])
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


Train on 34842 samples, validate on 11615 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
 1152/34842 [..............................] - ETA: 15:27 - loss: 1.7903 - acc: 0.1875