# HyperParameter Optimizer Notebook

This notebook tries to find the best hyperparameter to use for the CNN multilabel classifier

In [1]:
from glob import glob

import pickle 
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

import os
import platform

In [2]:
mpl.rc('font', size=14)

## Input Spectra

Input DESI spectra generated from the pickle notebook. Then divide them into training and test sets for the classifier.

In [3]:
with open('host_flux.data', 'rb') as filehandle:
    host_flux = pickle.load(filehandle)

In [4]:
with open('snia_flux.data', 'rb') as filehandle:
    # read the data as binary data stream
    snia_flux = pickle.load(filehandle)

In [5]:
with open('snib_flux.data', 'rb') as filehandle:
    # read the data as binary data stream
    snib_flux = pickle.load(filehandle)

In [6]:
with open('snic_flux.data', 'rb') as filehandle:
    snic_flux = pickle.load(filehandle)

In [7]:
with open('sniin_flux.data', 'rb') as filehandle:
    sniin_flux = pickle.load(filehandle)

In [8]:
with open('sniip_flux.data', 'rb') as filehandle:
    sniip_flux = pickle.load(filehandle)

In [9]:
nhost, nbins  = host_flux.shape
nsnia, nbins  = snia_flux.shape
nsnib, nbins  = snib_flux.shape
# nsnibc, nbins = snibc_flux.shape
nsnic, nbins  = snic_flux.shape
nsniin, nbins = sniin_flux.shape
# nsniilp, nbins = sniilp_flux.shape
nsniip, nbins = sniip_flux.shape

# nhost, nsnia, nsnib, nsnibc, nsnic, nsniin, nsniilp, nsniip, nbins
nhost, nsnia, nsnib, nsnic, nsniin, nsniip, nbins

(9969, 9964, 9958, 8269, 9949, 9962, 150)

### Set up Training Sets and Labels

0. "host" spectra based only on BGS templates
0. "snia" spectra based on BGS + SN Ia templates
0. "snib" spectra based on BGS + SN Ib templates
0. "snic" spectra based on BGS + SN Ic templates
0. "sniin" spectra based on BGS + SN IIn templates
0. "sniip" spectra based on BGS + SN IIP templates

In [10]:
x = np.concatenate([host_flux, 
                    snia_flux,
                    snib_flux,
                    snic_flux,
                    sniin_flux,
                    sniip_flux
                   ]).reshape(-1, nbins, 1)

labels = ['Host',
          'SN Ia',
          'SN Ib',
          'SN Ic',
          'SN IIn',
          'SN IIP']
ntypes = len(labels)

# Convert y-label array to appropriate categorical array
from tensorflow.keras.utils import to_categorical

y = to_categorical(
        np.concatenate([np.full(nhost, 0), 
                        np.full(nsnia, 1),
                        np.full(nsnib, 2),
                        np.full(nsnic, 3),
                        np.full(nsniin, 4),
                        np.full(nsniip, 5)
                       ]))

In [11]:
x.shape, y.shape

((58071, 150, 1), (58071, 6))

## HyperModel setup

Build a hypermodel which for tuner search

In [12]:
import tensorflow as tf
from tensorflow.keras import utils, regularizers, callbacks, Sequential
from tensorflow.keras.layers import Input, Dense, Activation, ZeroPadding1D, BatchNormalization, Flatten, Reshape, Conv1D, MaxPooling1D, Dropout, Add, GlobalAvgPool1D,AvgPool1D
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.initializers import glorot_normal, glorot_uniform

In [13]:
def model_builder(hp):
    
  seed=None 
  inputs =Input(shape=(nbins,1))
  x = inputs
    
  for i in range(hp.Int('conv_blocks', 5, 6, default=5)):
    filters = hp.Int('filters_' + str(i), 32, 256, step=32)
    for _ in range(2):
      x = Conv1D(filters=16, kernel_size=5, padding='same',kernel_regularizer=regularizers.l2(0.05))(x)
      x = BatchNormalization(axis=2)(x)
      x = Activation('relu')(x)
      x = Conv1D(filters=16, kernel_size=5, padding='same',kernel_regularizer=regularizers.l2(0.05))(x)
    if hp.Choice('pooling_' + str(i), ['avg', 'max']) == 'max':
      x = MaxPooling1D()(x)
    else:
      x = AvgPool1D()(x)
  x = GlobalAvgPool1D()(x)
  x = Dense(
      hp.Int('hidden_size', 30, 100, step=10, default=50),
      activation='relu')(x)
  x = Dropout(
      hp.Float('dropout', 0, 0.5, step=0.1, default=0.5))(x)
  outputs = Dense(6, activation='softmax')(x)

  model = Model(inputs, outputs)
  model.compile(
    optimizer=Adam(
      hp.Float('learning_rate', 1e-3, 15e-2, sampling='log')),
    loss='categorical_crossentropy', 
    metrics=['accuracy'])
  return model


### Organizing the training Data
Split the data into training and testing (+ validation) samples and fit the network weights.

In [14]:
from sklearn.model_selection import train_test_split

def train_test_validate(x, y, train_size=0.75, test_size=0.125, val_size=0.125):
    # Ensure proper normalization.
    if train_size + test_size + val_size != 1:
        norm = train_size + test_size + val_size
        train_size = train_size/norm
        test_size = test_size/norm
        val_size = val_size/norm
        print('Renormalized to train {:g}, test {:g}, and validate {:g}'.format(train_size, test_size, val_size))
        
    # Split into training and testing samples.
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=1-train_size)
    
    # Split off the validation sample from the test sample.
    x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=test_size/(test_size+val_size))
    
    return x_train, x_test, x_val, y_train, y_test, y_val

In [15]:
x_train, x_test, x_val, y_train, y_test, y_val = train_test_validate(x, y, 0.6, 0.2, 0.2)

x_train.shape, x_test.shape, x_val.shape

((34842, 150, 1), (11615, 150, 1), (11614, 150, 1))

### HyperParamater Optimization

In [16]:
import kerastuner as kt
from  kerastuner import Hyperband
from kerastuner.applications import HyperResNet


tuner = Hyperband(model_builder,objective='val_accuracy', max_epochs=30, project_name='Optimal_CNN3')
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2)

INFO:tensorflow:Reloading Oracle from existing project ./Optimal_CNN3/oracle.json
INFO:tensorflow:Reloading Tuner from ./Optimal_CNN3/tuner0.json


In [17]:
tuner.search(x_train, y_train,epochs=30,validation_data=(x_test, y_test),
             callbacks=[stop_early])

Trial 5 Complete [00h 01m 08s]
val_accuracy: 0.4124838709831238

Best val_accuracy So Far: 0.7185536026954651
Total elapsed time: 00h 11m 40s

Search: Running Trial #6

Hyperparameter    |Value             |Best Value So Far 
conv_blocks       |4                 |5                 
filters_0         |192               |96                
pooling_0         |avg               |max               
filters_1         |96                |96                
pooling_1         |avg               |avg               
filters_2         |128               |192               
pooling_2         |avg               |avg               
hidden_size       |80                |40                
dropout           |0.4               |0.1               
learning_rate     |0.00016749        |0.00034474        
filters_3         |160               |256               
pooling_3         |max               |max               
filters_4         |96                |192               
pooling_4         |avg           

KeyboardInterrupt: 

### Optimal hyperparameters

In [None]:
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first densely-connected
layer is and the optimal learning rate for the optimizer
is {best_hps.get('learning_rate')}.
""")