In [66]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import datetime
import os
import skimage
import random
from tensorflow.python.keras.utils.data_utils import Sequence
from scipy.ndimage import zoom
from scipy.ndimage import shift
from keras.applications import Xception

In [100]:
#input variables
path = 'image_arrays_new_new\\'
validation_path = path + 'validation'
training_path = path + 'training'
test_path = path + 'test'
#model variables
batch_size = 30 #
epoch_number = 50
learning_rate = 1e-3 

params = {'dim': (72,72),
          'batch_size': batch_size,
          'n_classes': 2,
          'n_channels': 3,
          'shuffle': True}


#more parameters means more prone to overfitting, and I am 5/3 times worse on parameters compared to the paper I have
#based this on. (5 bands instead of 3) I need to find ways to add more regularization, or otherwise might try reducing my number
#of layers to reduce the number of parameters.

In [68]:
#https://stackoverflow.com/questions/37119071/scipy-rotate-and-zoom-an-image-without-changing-its-dimensions/48097478
def clipped_zoom(img, zoom_factor, **kwargs):

    h, w = img.shape[:2]

    # For multichannel images we don't want to apply the zoom factor to the RGB
    # dimension, so instead we create a tuple of zoom factors, one per array
    # dimension, with 1's for any trailing dimensions after the width and height.
    zoom_tuple = (zoom_factor,) * 2 + (1,) * (img.ndim - 2)

    # Zooming out
    if zoom_factor < 1:

        # Bounding box of the zoomed-out image within the output array
        zh = int(np.round(h * zoom_factor))
        zw = int(np.round(w * zoom_factor))
        top = (h - zh) // 2
        left = (w - zw) // 2

        # Zero-padding
        out = np.zeros_like(img)
        out[top:top+zh, left:left+zw] = zoom(img, zoom_tuple, **kwargs)

    # Zooming in
    elif zoom_factor > 1:

        # Bounding box of the zoomed-in region within the input array
        zh = int(np.round(h / zoom_factor))
        zw = int(np.round(w / zoom_factor))
        top = (h - zh) // 2
        left = (w - zw) // 2

        out = zoom(img[top:top+zh, left:left+zw], zoom_tuple, **kwargs)

        # `out` might still be slightly larger than `img` due to rounding, so
        # trim off any extra pixels at the edges
        trim_top = ((out.shape[0] - h) // 2)
        trim_left = ((out.shape[1] - w) // 2)
        out = out[trim_top:trim_top+h, trim_left:trim_left+w]

    # If zoom_factor == 1, just return the input array
    else:
        out = img
    return out

In [69]:
#https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
class DataGenerator(Sequence):

    def __init__(self, list_IDs, labels, batch_size=32, dim=(64,64), n_channels=3,
                 n_classes=2, shuffle=True):
     #   'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def on_epoch_end(self):
    #'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
    #'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
    # Initialization

        X = np.zeros((self.batch_size, *self.dim, self.n_channels))
        #X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)
      
      # Generate data and perform augmentation
        for i, ID in enumerate(list_IDs_temp):
            
          # Store sample
            #X[i,:,:,:] = np.load('image_arrays/' + ID + '.npy')[:,:,1:4]
            X[i,4:68,4:68,:] = np.load('image_arrays/' + ID + '.npy')[:,:,1:4]              
            #flip
            if random.random() > 0.5:
                X[i,] = np.flip(X[i,],0)
            if random.random() > 0.5:
                X[i,] = np.flip(X[i,],1)
            
            #shift
            if random.random() > 0.5 :
                X[i,] = shift(X[i,], (4,0,0), mode='nearest')
            elif random.random() > 0.5 :
                X[i,] = shift(X[i,], (-4,0,0), mode='nearest')
                              
            if random.random() > 0.5 :
                X[i,] = shift(X[i,], (0,4,0), mode='nearest')
            elif random.random() > 0.5 :
                X[i,] = shift(X[i,], (0,-4,0), mode='nearest')
          
            #zoom in/out
            zoom_factor = random.uniform(0.75,1.3)
            X[i,] = clipped_zoom(X[i,],zoom_factor)
            
            #rotate
            angle = 45*random.random()
            X[i,] = skimage.transform.rotate(X[i,], angle=angle, mode='reflect')
            
            # Store class
            y[i] = self.labels[ID]
    
        if self.n_classes > 2:
            return X, keras.utils.to_categorical(y, num_classes=self.n_classes)
        else:
            return X, y

    def __len__(self):
    #'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
    #  'Generate one batch of data'
      # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

      # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

      # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

In [70]:
galaxyzoo = pd.read_csv("zoo2MainSpecz.csv", usecols=[8], nrows=10000)
Class = galaxyzoo["gz2class"].values
dictionary = {'A':int(2),'E':np.array([0]),'S':np.array([1])}
#resave using my dictionary
target = np.empty((len(Class)))
for i in range(len(Class)):
    target[i] = dictionary[Class[i][0]]
#target = target.astype(int)
count_0 = 0
count_1 = 0
for i in target:
    if i == np.array([0]):
        count_0 += 1
    if i == np.array([1]):
        count_1 += 1

print(count_0)
print(count_1)

4099
5887


In [71]:
train_list = os.listdir(training_path)
for i,file in enumerate(train_list):
    train_list[i] = file.split('.')[0]
val_list = os.listdir(validation_path)
for i,file in enumerate(val_list):
    val_list[i] = file.split('.')[0]

partition = {'train':train_list,'validation':val_list}

labels = {}
for i in range(10000):
    name = 'array_number_{}'.format(i)
    labels.update({name:target[i]})

In [72]:
training_generator = DataGenerator(partition['train'], labels, **params)
validation_generator = DataGenerator(partition['validation'], labels, **params)

In [73]:
#so this is pretty neat, you can create a keras callback to display on tensorboard using a simplified summary tf api

#and also this is an example of how to change the lr on the fly, which is pretty handy
#https://keras.io/callbacks/


"""
    file_writer = tf.summary.create_file_writer(logdir + "/metrics")
    file_writer.set_as_default()
"""
def lr_schedule(epoch,lr):

#Returns a custom learning rate that decreases as epochs progress.
    if epoch > 15:
        lr = 1e-4
    if epoch > 30:
        lr = 1e-5

    tf.summary.scalar('learning_rate', tensor=lr)
    return lr

#lr_callback = keras.callbacks.LearningRateScheduler(lr_schedule)

logdir="summaries/scalars/" + str(datetime.datetime.now().timestamp())
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir,
                                                   histogram_freq=1,
                                                   write_graph=False,
                                                   write_grads=True,)
                                                   #write_images=True)
#will it still print stuff

In [74]:
"""
base_model = Xception(input_shape=(72,72,3), weights='imagenet', include_top=False)
x = base_model.output
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dense(1024, activation='relu')(x)
x = keras.layers.Dropout(0.7)(x)
x = keras.layers.Dense(1024, activation="relu", name='second_last_layer')(x)
predictions = keras.layers.Dense(1, activation="sigmoid")(x)

model_final = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model_final.compile(loss = "binary_crossentropy", optimizer = optimizers.Adam(lr=learning_rate), metrics=["accuracy"])
"""

'\nbase_model = Xception(input_shape=(72,72,3), weights=\'imagenet\', include_top=False)\nx = base_model.output\nx = keras.layers.GlobalAveragePooling2D()(x)\nx = keras.layers.Dense(1024, activation=\'relu\')(x)\nx = keras.layers.Dropout(0.7)(x)\nx = keras.layers.Dense(1024, activation="relu", name=\'second_last_layer\')(x)\npredictions = keras.layers.Dense(1, activation="sigmoid")(x)\n\nmodel_final = Model(inputs=base_model.input, outputs=predictions)\n\nfor layer in base_model.layers:\n    layer.trainable = False\n\nmodel_final.compile(loss = "binary_crossentropy", optimizer = optimizers.Adam(lr=learning_rate), metrics=["accuracy"])\n'

In [75]:
steps_to_take = int(len(os.listdir(training_path))/batch_size)
val_steps_to_take = int(len(os.listdir(validation_path))/batch_size)
                #typically be equal to the number of unique samples if your dataset
                #divided by the batch size.

print(steps_to_take)
print(val_steps_to_take)

208
37


In [110]:
keras.backend.clear_session()
tf.reset_default_graph()

In [111]:
#lets try it how we ran VGG16...
"""
Input_layer = layers.Input(shape=(32,32,3))
base_model = vgg16.VGG16(include_top=False, weights='imagenet',input_tensor=Input_layer)

x=base_model.output
x=layers.GlobalAveragePooling2D()(x)
x=layers.Dropout(dropout)(x)
x=layers.Dense(1024,activation='relu')(x)
"""
Input_layer = keras.layers.Input(shape=(72,72,3))
base_model = keras.applications.Xception(input_tensor=Input_layer,weights='imagenet',include_top=False)

x= base_model.output
x= keras.layers.GlobalAveragePooling2D()(x)
x= keras.layers.Dense(1024, activation=tf.nn.relu)(x)
x= keras.layers.Dropout(0.7)(x)
x= keras.layers.Dense(1024, activation=tf.nn.relu)(x)
preds= keras.layers.Dense(1, activation=tf.nn.sigmoid)(x)

In [112]:
from tensorflow.keras import Model
model=Model(inputs=base_model.input,outputs=preds)

In [96]:
#for i,layer in enumerate(model.layers):
#    print(i,layer.name)

In [113]:
for layer in model.layers[:132]:
    layer.trainable=False
for layer in model.layers[132:]:
    layer.trainable=True

In [84]:
#nope this doesnt work
"""
keras.backend.clear_session()
tf.reset_default_graph()

base_model = keras.applications.Xception(input_shape=(72,72,3),weights='imagenet',include_top=False)
#base_model = keras.applications.Xception(weights='imagenet',include_top=False)
for layer in base_model.layers:
    layer.trainable = False

model = keras.Sequential([])
model.add(keras.layers.InputLayer((72,72,3), name='input'))
model.add(base_model)
model.add(keras.layers.GlobalAveragePooling2D(name='global_after_transfer'))
model.add(keras.layers.Dense(1024, activation=tf.nn.relu,name='first_dense'))
model.add(keras.layers.Dropout(0.7))
model.add(keras.layers.Dense(1024, activation=tf.nn.relu,name='second_dense'))
model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid,name='predictor'))

model.compile(loss=keras.losses.binary_crossentropy, optimizer = keras.optimizers.Adam(lr=learning_rate), metrics=["accuracy"])
"""

'\nkeras.backend.clear_session()\ntf.reset_default_graph()\n\nbase_model = keras.applications.Xception(input_shape=(72,72,3),weights=\'imagenet\',include_top=False)\n#base_model = keras.applications.Xception(weights=\'imagenet\',include_top=False)\nfor layer in base_model.layers:\n    layer.trainable = False\n\nmodel = keras.Sequential([])\nmodel.add(keras.layers.InputLayer((72,72,3), name=\'input\'))\nmodel.add(base_model)\nmodel.add(keras.layers.GlobalAveragePooling2D(name=\'global_after_transfer\'))\nmodel.add(keras.layers.Dense(1024, activation=tf.nn.relu,name=\'first_dense\'))\nmodel.add(keras.layers.Dropout(0.7))\nmodel.add(keras.layers.Dense(1024, activation=tf.nn.relu,name=\'second_dense\'))\nmodel.add(keras.layers.Dense(1, activation=tf.nn.sigmoid,name=\'predictor\'))\n\nmodel.compile(loss=keras.losses.binary_crossentropy, optimizer = keras.optimizers.Adam(lr=learning_rate), metrics=["accuracy"])\n'

In [114]:
#define an optimizer, loss, and accuracy metric.
adam = tf.keras.optimizers.Adam(1e-3)
model.compile(optimizer=adam, loss=keras.losses.binary_crossentropy,metrics=['accuracy'])

In [None]:
hist = model.fit_generator(generator=training_generator,
                    steps_per_epoch=steps_to_take, 
                    epochs=1,
                    validation_data=validation_generator,
                    validation_steps=val_steps_to_take,
                    verbose=2,
                    callbacks=[tensorboard_callback])


In [None]:
#test_loss, test_acc = model.evaluate(test_images, test_target)
#print('Test accuracy:', test_acc)
#print('Test loss:', test_loss)

In [None]:
#y_prob = model.predict(X)

In [None]:
#source list
"""
https://fizzylogic.nl/2017/05/08/monitor-progress-of-your-keras-based-neural-network-using-tensorboard/

https://stackoverflow.com/questions/41032551/how-to-compute-receiving-operating-characteristic-roc-and-auc-in-keras

https://arxiv.org/pdf/1711.05744.pdf

https://arxiv.org/pdf/1807.00807.pdf

https://github.com/jameslawlor/kaggle_galaxy_zoo/blob/master/galaxy_zoo_keras.ipynb

https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly

#https://stackoverflow.com/questions/37119071/scipy-rotate-and-zoom-an-image-without-changing-its-dimensions/48097478

https://distill.pub/2018/building-blocks/ what I want to do with this after it is working.

https://github.com/khanx169/DL_DES/blob/master/deeplearning/Xception_final.ipynb
"""