In [246]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import datetime
import os
import skimage
import random
from tensorflow.python.keras.utils.data_utils import Sequence

In [247]:
#https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
class DataGenerator(Sequence):

    def __init__(self, list_IDs, labels, batch_size=32, dim=(64,64), n_channels=5,
                 n_classes=2, shuffle=True):
     #   'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def on_epoch_end(self):
    #'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
    #'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
    # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

      # Generate data
        for i, ID in enumerate(list_IDs_temp):
          # Store sample
            X[i,] = np.load('image_arrays/' + ID + '.npy')
            #flip
            if random.random() > 0.5:
                X[i,] = np.flip(X[i,],0)
            if random.random() > 0.5:
                X[i,] = np.flip(X[i,],1)
            #rotate
            angle = 360*random.random()
            X[i,] = skimage.transform.rotate(X[i,], angle=angle, mode='reflect')
          # Store class
            y[i] = self.labels[ID]
            
        return X, y
        #return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

    def __len__(self):
    #'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
    #  'Generate one batch of data'
      # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

      # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

      # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

In [248]:
#input variables
path = 'image_arrays_new\\'
validation_path = path + 'validation'
training_path = path + 'training'
test_path = path + 'test'
#model variables
batch_size = 32 #32 == best, run 156079264
epoch_number = 50
learning_rate = 1e-3 #1e-4 stable, overtraining started after step 30, #1e-3 best, OT @ epoch 15

params = {'dim': (64,64),
          'batch_size': batch_size,
          'n_classes': 2,
          'n_channels': 5,
          'shuffle': True}


#architecture variables
"""
CNL1_filters = 32
CNL1_kernal_size = 5
MPL1_pool_size= (2,2)
MPL1_strides = 2
CNL2_filters = 64
CNL2_kernal_size = 5
MPL2_pool_size = (2,2)
MPL2_strides = 2
"""
#defined within because were modeling after a paper

'\nCNL1_filters = 32\nCNL1_kernal_size = 5\nMPL1_pool_size= (2,2)\nMPL1_strides = 2\nCNL2_filters = 64\nCNL2_kernal_size = 5\nMPL2_pool_size = (2,2)\nMPL2_strides = 2\n'

In [249]:
galaxyzoo = pd.read_csv("zoo2MainSpecz.csv", usecols=[8], nrows=10000)
Class = galaxyzoo["gz2class"].values
dictionary = {'A':int(2),'E':np.array([0]),'S':np.array([1])}
#resave using my dictionary
target = np.empty((len(Class)))
for i in range(len(Class)):
    target[i] = dictionary[Class[i][0]]
#target = target.astype(int)

In [250]:
train_list = os.listdir(training_path)
for i,file in enumerate(train_list):
    train_list[i] = file.split('.')[0]
val_list = os.listdir(validation_path)
for i,file in enumerate(val_list):
    val_list[i] = file.split('.')[0]

partition = {'train':train_list,'validation':val_list}

labels = {}
for i in range(10000):
    name = 'array_number_{}'.format(i)
    labels.update({name:target[i]})

In [251]:
training_generator = DataGenerator(partition['train'], labels, **params)
validation_generator = DataGenerator(partition['validation'], labels, **params)

In [252]:
"""
directory = os.listdir('image_arrays')
data = np.zeros((len(directory),64,64,5))
inbetween_target = np.zeros((len(directory)))
for i in range(len(directory)):
    ith = directory[i].split('_')[-1]
    ith = ith.split('.')[0]
    ith = int(ith)
    inbetween_target[i] = target[ith]
target = inbetween_target
"""

"\ndirectory = os.listdir('image_arrays')\ndata = np.zeros((len(directory),64,64,5))\ninbetween_target = np.zeros((len(directory)))\nfor i in range(len(directory)):\n    ith = directory[i].split('_')[-1]\n    ith = ith.split('.')[0]\n    ith = int(ith)\n    inbetween_target[i] = target[ith]\ntarget = inbetween_target\n"

In [253]:
#so this is pretty neat, you can create a keras callback to display on tensorboard using a simplified summary tf api

#and also this is an example of how to change the lr on the fly, which is pretty handy
#https://keras.io/callbacks/

logdir="summaries/scalars/" + str(datetime.datetime.now().timestamp())
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir,
                                                   histogram_freq=1,
                                                   write_graph=False,
                                                   write_grads=True,)
                                                   #write_images=True)
"""
    file_writer = tf.summary.create_file_writer(logdir + "/metrics")
    file_writer.set_as_default()

    def lr_schedule(epoch):
"""
#Returns a custom learning rate that decreases as epochs progress.
"""
    learning_rate = 0.2
    if epoch > 10:
        learning_rate = 0.02
    if epoch > 20:
        learning_rate = 0.01
    if epoch > 50:
        learning_rate = 0.005

    tf.summary.scalar('learning rate', data=learning_rate, step=epoch)
    return learning_rate

    lr_callback = keras.callbacks.LearningRateScheduler(lr_schedule)
    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
"""
#will it still print stuff

"\n    learning_rate = 0.2\n    if epoch > 10:\n        learning_rate = 0.02\n    if epoch > 20:\n        learning_rate = 0.01\n    if epoch > 50:\n        learning_rate = 0.005\n\n    tf.summary.scalar('learning rate', data=learning_rate, step=epoch)\n    return learning_rate\n\n    lr_callback = keras.callbacks.LearningRateScheduler(lr_schedule)\n    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)\n"

In [254]:
def create_model(learning_rate=learning_rate):
    
    model = keras.Sequential([])
    
    model.add(keras.layers.Conv2D(input_shape=(64,64,5),filters=32,kernel_size=6,padding='same',activation=tf.nn.relu))
    model.add(keras.layers.Dropout(rate=0.5))
    
    model.add(keras.layers.Conv2D(filters=64,kernel_size=5,padding='same',activation=tf.nn.relu))
    model.add(keras.layers.MaxPool2D(pool_size=2,))
    model.add(keras.layers.Dropout(rate=0.25)) #best = 0.25
    
    model.add(keras.layers.Conv2D(filters=128,kernel_size=2,padding='same',activation=tf.nn.relu))
    model.add(keras.layers.MaxPool2D(pool_size=2,))
    model.add(keras.layers.Dropout(rate=0.25)) #best = 0.25
    
    model.add(keras.layers.Conv2D(filters=128,kernel_size=3,padding='same',activation=tf.nn.relu))
    model.add(keras.layers.Dropout(rate=0.25)) #best = 0.35

    model.add(keras.layers.Flatten())
    
    model.add(keras.layers.Dense(units=64,activation=tf.nn.relu))
    model.add(keras.layers.Dropout(rate=0.5))
    model.add(keras.layers.Dense(units=2,activation=tf.nn.softmax))
    
    adam = keras.optimizers.Adam(lr=learning_rate)
    model.compile(optimizer=adam, loss='sparse_categorical_crossentropy',metrics=['accuracy',])
    return(model)

In [255]:
keras.backend.clear_session()
model = create_model(learning_rate = learning_rate)

In [256]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 64, 64, 32)        5792      
_________________________________________________________________
dropout (Dropout)            (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 64, 64)        51264     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 32, 32, 64)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 32, 32, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 128)       32896     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 16, 128)       0         
__________

In [None]:
steps_to_take = int(len(os.listdir(validation_path))/batch_size)
val_steps_to_take = int(len(os.listdir(training_path))/batch_size)
                #typically be equal to the number of unique samples if your dataset
                #divided by the batch size.

hist = model.fit_generator(generator=training_generator,
                    steps_per_epoch=steps_to_take, 
                    epochs=epoch_number,
                    validation_data=validation_generator,
                    validation_steps=val_steps_to_take,
                    verbose=2,
                    callbacks=[tensorboard_callback])

Exception in thread Thread-117:
Traceback (most recent call last):
  File "C:\Users\awe2\AppData\Local\Continuum\anaconda3\lib\threading.py", line 917, in _bootstrap_inner
    self.run()
  File "C:\Users\awe2\AppData\Local\Continuum\anaconda3\lib\threading.py", line 865, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\awe2\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 619, in _run
    with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor:
  File "C:\Users\awe2\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\keras\utils\data_utils.py", line 600, in pool_fn
    workers, initializer=init_pool_generator, initargs=(seqs, None))
  File "C:\Users\awe2\AppData\Local\Continuum\anaconda3\lib\multiprocessing\context.py", line 119, in Pool
    context=self.get_context())
  File "C:\Users\awe2\AppData\Local\Continuum\anaconda3\lib\multiprocessing\pool.py", line 176, in __init__
    self._rep

Epoch 1/50


In [None]:
#test_loss, test_acc = model.evaluate(test_images, test_target)
#print('Test accuracy:', test_acc)
#print('Test loss:', test_loss)

In [None]:
#all above are models using a different architecture. all below are comments from the current architecture

In [None]:
#source list
"""
https://fizzylogic.nl/2017/05/08/monitor-progress-of-your-keras-based-neural-network-using-tensorboard/

https://stackoverflow.com/questions/41032551/how-to-compute-receiving-operating-characteristic-roc-and-auc-in-keras

https://astrobites.org/2018/07/16/creating-a-more-general-deep-learning-algorithm-for-galaxies/

https://github.com/jameslawlor/kaggle_galaxy_zoo/blob/master/galaxy_zoo_keras.ipynb
"""