<h3>Importing Libraries</h3>

In [None]:
from __future__ import print_function
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.utils import plot_model

import cv2
import csv
import pickle
import os
from matplotlib import pyplot as plt
import numpy as np
import imageio
import glob
from tqdm import tqdm
import dictionary
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix


<h3>Loading Dictionary and Generating Histogram</h3>

In [2]:

#################################
#    Loading Dictionary
#################################

dic_file = 'dic.p'
galaxy_dic = {}
galaxy_dic = dictionary.load_obj(dic_file)
#print(galaxy_dic)


#################################
#    Generating Histogram
#################################

histogram = dictionary.dataset_class_histogram(galaxy_dic)
#print(histogram)

#################################
#    Reducing Classes to

#    Sx : Spiral
   
#    S0 : Lenticular
   
#    Ix: Irregular
   
#    Ex : Elliptical
#################################


#defining morfology
for key,value in galaxy_dic.items():
    if value == "S0" or value == "S0-a":
        galaxy_dic.update({key: 'L'})

for key,value in galaxy_dic.items():
    if value[0] == "S" and value[1] == "B":
        galaxy_dic.update({key: 'SB'})
        
for key,value in galaxy_dic.items():
    if value[0] == "S" and value[1] != "B":
        galaxy_dic.update({key: 'S'})
        
for key,value in galaxy_dic.items():
    if value[0] == "I":
        galaxy_dic.update({key: 'I'})
        
for key,value in galaxy_dic.items():
    if value[0] == "E":
        galaxy_dic.update({key: 'E'}) 

for key,value in galaxy_dic.items():
    if value[0] == "|":
        galaxy_dic = dictionary.removekey(galaxy_dic,key)
    
histogram = dictionary.dataset_class_histogram(galaxy_dic)

print(histogram)



{'S': 2272, 'L': 675, 'SB': 741, 'I': 336, 'E': 422}


<h3>Creating Labels dictionary and Creating Train, Test and Validation Sets</h3>

In [3]:
#################################
#     Creating Labels dictionary
#################################

i = 0
label_dic = {}
for k in galaxy_dic.values():
  if k not in label_dic.keys():
    label_dic.update({k : i})
    i = i + 1

number_of_labels = len(label_dic.keys())


#########
#################################
#    Creating Train, Test and Validation Sets
#################################
#########


#########
#Separating all set into single type subset
#Separating in train, validation and test
#########

L_galaxy_dic = []
for key,value in galaxy_dic.items():
    if value == "L":
        L_galaxy_dic.append(key)

L_x_train_names, L_x_test_names = train_test_split(L_galaxy_dic, test_size=0.3)

SB_galaxy_dic= []
for key,value in galaxy_dic.items():
    if value == "SB":
        SB_galaxy_dic.append(key)

SB_x_train_names, SB_x_test_names = train_test_split(SB_galaxy_dic, test_size=0.3)

S_galaxy_dic= []
for key,value in galaxy_dic.items():
    if value == "S":
        S_galaxy_dic.append(key)

S_x_train_names, S_x_test_names = train_test_split(S_galaxy_dic, test_size=0.3)


I_galaxy_dic= []
for key,value in galaxy_dic.items():
    if value == "I":
        I_galaxy_dic.append(key)

I_x_train_names, I_x_test_names = train_test_split(I_galaxy_dic, test_size=0.4)

E_galaxy_dic= []
for key,value in galaxy_dic.items():
    if value == "E":
        E_galaxy_dic.append(key)

E_x_train_names, E_x_test_names = train_test_split(E_galaxy_dic, test_size=0.3)


L_x_validation_names, L_x_test_names = train_test_split(L_x_test_names, test_size=0.5)
SB_x_validation_names, SB_x_test_names = train_test_split(SB_x_test_names, test_size=0.5)
S_x_validation_names, S_x_test_names = train_test_split(S_x_test_names, test_size=0.5)
I_x_validation_names, I_x_test_names = train_test_split(I_x_test_names, test_size=0.5)
E_x_validation_names, E_x_test_names = train_test_split(E_x_test_names, test_size=0.5)


x_train_names = L_x_train_names + SB_x_train_names + S_x_train_names  + E_x_train_names + I_x_train_names
x_test_names = L_x_test_names + SB_x_test_names + S_x_test_names  + E_x_test_names + I_x_test_names
x_validation_names = L_x_validation_names + SB_x_validation_names + S_x_validation_names + E_x_validation_names + I_x_validation_names


<h3>Gathering Train, Test and Validation Features</h3>

In [15]:

#########
#Gathering Train, Test and Validation Features
#########

i = 0
features_train = []
features_test = []
features_validation = []
labels_train = []
labels_test = []
labels_validation = []



for path_to_image in tqdm(glob.glob("./images/png-grey/*.png")):
    # print(path_to_image[18:-4])
    name = path_to_image[18:-4]
    name_to_open = './images/png-grey/' + path_to_image[18:]

    if name in x_train_names:
    	im = cv2.resize(cv2.imread(name_to_open), (224, 224)).astype(np.float32)
    	#im = im.transpose((2,0,1))
    	# im = np.expand_dims(im, axis=0)
    	features_train.append(im)
    	idx = label_dic[galaxy_dic[name]]
    	label_train = idx
    	labels_train.append(label_train)


    if name in x_test_names:
    	im = cv2.resize(cv2.imread(name_to_open), (224, 224)).astype(np.float32)
    	#im = im.transpose((2,0,1))
    	# im = np.expand_dims(im, axis=0)
    	features_test.append(im)
    	idx = label_dic[galaxy_dic[name]]
    	label_test = idx
    	labels_test.append(label_test)

    if name in x_validation_names:
    	im = cv2.resize(cv2.imread(name_to_open), (224, 224)).astype(np.float32)
    	#im = im.transpose((2,0,1))
    	# im = np.expand_dims(im, axis=0)
    	features_validation.append(im)
    	idx = label_dic[galaxy_dic[name]]
    	label_validation = idx
    	labels_validation.append(label_validation)



features_train = np.array(features_train)
features_test = np.array(features_test)
features_validation = np.array(features_validation)
labels_train = np.array(labels_train)
labels_test = np.array(labels_test)


x_train = features_train
x_validation = features_validation
x_test = features_test
y_train = labels_train
y_validation = labels_validation
y_test = labels_test



100%|██████████| 4446/4446 [00:27<00:00, 163.83it/s]


<h3>Defining Training parameters</h3>

In [16]:

#########
#Defining Training parameters
#########

#O numero de passos vai ser len(x_train)/(batch_size)
batch_size = 64 #numero de amostras por gradiente
num_classes = number_of_labels # numero de classes do cifar10
epochs = 1 #numero de epocas para treinar o modelo
data_augmentation = True


print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
y_validation = keras.utils.to_categorical(y_validation, num_classes)


print('y_train shape:', y_train.shape)
# print ('size of first dimention ', y_train[0][0])
print(y_train.shape[0], 'train samples')
print(y_test.shape[0], 'test samples')



x_train shape: (63, 255, 255, 3)
63 train samples
687 test samples
y_train shape: (63, 5)
63 train samples
687 test samples


<h3>Defining Network Topology (VGG16z)</h3>

In [17]:

#########
#Defining Network Topology(VGG16z)
#########


model = Sequential()
model.add(ZeroPadding2D((1,1), input_shape=x_train.shape[1:],data_format='channels_last'))
model.add(Convolution2D(64, (3,3), activation='relu',data_format='channels_last'))
model.add(ZeroPadding2D((1,1),data_format='channels_last'))
model.add(Convolution2D(64, (3,3), activation='relu',data_format='channels_last'))
model.add(MaxPooling2D((2,2), strides=(2,2),data_format='channels_last'))

model.add(ZeroPadding2D((1,1),data_format='channels_last'))
model.add(Convolution2D(128, (3,3), activation='relu',data_format='channels_last'))
model.add(ZeroPadding2D((1,1),data_format='channels_last'))
model.add(Convolution2D(128, (3,3), activation='relu',data_format='channels_last'))
model.add(MaxPooling2D((2,2), strides=(2,2),data_format='channels_last'))

model.add(ZeroPadding2D((1,1),data_format='channels_last'))
model.add(Convolution2D(256, (3,3), activation='relu',data_format='channels_last'))
model.add(ZeroPadding2D((1,1),data_format='channels_last'))
model.add(Convolution2D(256, (3,3), activation='relu',data_format='channels_last'))
model.add(ZeroPadding2D((1,1),data_format='channels_last'))
model.add(Convolution2D(256, (3,3), activation='relu',data_format='channels_last'))
model.add(MaxPooling2D((2,2), strides=(2,2),data_format='channels_last'))

model.add(ZeroPadding2D((1,1),data_format='channels_last'))
model.add(Convolution2D(512, (3,3), activation='relu',data_format='channels_last'))
model.add(ZeroPadding2D((1,1),data_format='channels_last'))
model.add(Convolution2D(512, (3,3), activation='relu',data_format='channels_last'))
model.add(ZeroPadding2D((1,1),data_format='channels_last'))
model.add(Convolution2D(512, (3,3), activation='relu',data_format='channels_last'))
model.add(MaxPooling2D((2,2), strides=(2,2),data_format='channels_last'))

model.add(ZeroPadding2D((1,1),data_format='channels_last'))
model.add(Convolution2D(512, (3,3), activation='relu',data_format='channels_last'))
model.add(ZeroPadding2D((1,1),data_format='channels_last'))
model.add(Convolution2D(512, (3,3), activation='relu',data_format='channels_last'))
model.add(ZeroPadding2D((1,1),data_format='channels_last'))
model.add(Convolution2D(512, (3,3), activation='relu',data_format='channels_last'))
model.add(MaxPooling2D((2,2), strides=(2,2),data_format='channels_last'))

model.add(Flatten())
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))



# initiate RMSprop optimizer
# RMS usa back propagation [RMS(w) eh em funcao de w-1] quadratico

opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])


plot_model(model, to_file='model.png')





<h3>Training CNN</h3>

In [18]:

#########
#Training the CNN
#########

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# ao invez de pegar todos os treinos e testes, o programa pega apenas uma porcao
x_train /= 255
x_test /= 255

if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_validation, y_validation),
              shuffle=True)
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=180,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=True)  # randomly flip images

    print ("augmenting data")
    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    print ("data augmented")

    # Fit the model on the batches generated by datagen.flow().
    model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), 
                        steps_per_epoch=x_train.shape[0] // batch_size, 
                        epochs=epochs, validation_data=(x_validation, y_validation))



Not using data augmentation.
Train on 63 samples, validate on 683 samples
Epoch 1/1


<h3>Getting Results</h3>

In [2]:
#########
#Getting Results
#########


score = model.evaluate(x_test, y_test, verbose=1)
print("Resultado do teste final de acerto da rede")
print(score)




#keras get_layer: retorna a layer: podemos usar para ver o estado final dos filtros usados (ja que sao 5x5)
#visualize single neuron ->> output para apresentação





Y_pred = model.predict(x_test, batch_size=batch_size, verbose=1)


NameError: name 'model' is not defined

In [23]:
y_pred = np.argmax(Y_pred)

for i in range(len(Y_pred)):
    
print (Y_pred)
print (y_pred)
print(y_test)
# print('Confusion Matrix')
# print(confusion_matrix(y_test, y_pred))


[[  9.99969125e-01   1.64386511e-05   1.26375317e-05   1.65746911e-07
    1.61980063e-06]
 [  9.99903798e-01   4.96908979e-05   3.93847185e-05   8.84367182e-07
    6.31536204e-06]
 [  9.99992132e-01   4.31862827e-06   3.27619682e-06   2.69725025e-08
    3.17913930e-07]
 ..., 
 [  9.99999046e-01   5.72607689e-07   4.00848251e-07   1.51866197e-09
    2.82330799e-08]
 [  9.99998927e-01   5.93250832e-07   4.32683464e-07   1.70635750e-09
    2.92510531e-08]
 [  9.84149635e-01   6.72753248e-03   6.16854057e-03   7.64608267e-04
    2.18970468e-03]]
145
[[ 0.  1.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 ..., 
 [ 0.  1.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.]]
