In [1]:
import numpy as np
import pandas as pd
import keras.backend as K
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Reshape, Flatten, Dropout, Activation
from keras.models import Model, Sequential, load_model
from keras.layers.normalization import BatchNormalization
from keras import optimizers, callbacks
from skimage.transform import resize
from imageio import imread

  return f(*args, **kwds)
  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
path_data = '../all/'

In [3]:
def preprocess_input(np_img):
    np_img = np_img / 255
    return np_img

In [4]:
def generator(path_images, labels, batch_size):
    count = 0
    while True:
        batch_features = []
        batch_labels = []
        for i in range(batch_size):
            # Read image
            name_img = labels[count][0] + '.jpg'
            img = imread(path_images + '/' + name_img)
            
            # Image preprocessing
            img = preprocess_input(img)

            label = labels[count][1:]

            batch_features.append(img)
            batch_labels.append(label)
            count += 1
            
            # Restart counter when it has reached the size 
            # of the data set
            if count == labels.shape[0] - 1:
                count = 0
                break
            
        yield np.array(batch_features), np.array(batch_labels)
            

In [5]:
def generator_predictions(path_images, list_names, batch_size):
    size_list = list_names.shape[0] - 1
    count = 0
    while True:
        batch_features = []
        for i in range(batch_size):
            # Read image
            name_img = list_names[count]
            img = imread(path_images + '/' + name_img)
            
            # Image preprocessing
            img = preprocess_input(img)

            batch_features.append(img)
            
            # Restart counter when it has reached the size 
            # of the data set
            if count == size_list:
                count = 0
                yield np.array(batch_features)
            count += 1
            
        yield np.array(batch_features)

In [6]:
def galaxy_cnn(input_size, output_size, drop_out=False, batch_norm=False, dense_size=1024):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=input_size))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, kernel_size=(3, 3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    if batch_norm:
        model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(128, kernel_size=(3, 3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    if batch_norm:
        model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(256, kernel_size=(3, 3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    if batch_norm:
        model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Conv2D(512, kernel_size=(3, 3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    if batch_norm:
        model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dense(dense_size, activation='relu'))
    if drop_out:
        model.add(Dropout(0.5))
    model.add(Dense(dense_size, activation='relu'))
    if drop_out:
        model.add(Dropout(0.5))
    model.add(Dense(output_size, activation='sigmoid'))
    return model

In [7]:
def rmse (y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

In [8]:
labels_train = np.load(path_data + 'y_train.npy')
labels_test = np.load(path_data + 'y_test.npy')
labels_val = np.load(path_data + 'y_val.npy')

In [8]:
labels_val.shape

(9237, 38)

In [10]:
dense_size = [512, 1024, 2048]
drop_o = [False, True]
batch_n = [False, True]

In [11]:
batch_size = 32
steps = 43104 // batch_size 
steps_va = 9237 // batch_size
epochs = 20

In [16]:
for d_s in dense_size:
    for d_o in drop_o:
        for b_n in batch_n:
            model_name = 'model' + '_Dense_'+ str(d_s) + '_Drop_out_' + str(d_o) + '_Batch_n_' + str(b_n)
            print('Training model: ', model_name)
            # Set architecture
            opt = optimizers.Adam(lr=0.0001)
            model = galaxy_cnn((160, 160, 3), 37, d_o, b_n, d_s)
            model.compile(loss='mean_squared_error', optimizer=opt, metrics=[rmse])
#             tbCallBack = callbacks.TensorBoard(log_dir=path_data + './Graph', histogram_freq=0, write_graph=True, write_images=True)
            # Train the model
            model.fit_generator(generator(path_data + 'preprocess_img_train', labels_train, batch_size=batch_size), 
                    steps_per_epoch=steps, 
                    epochs=epochs, 
                    validation_data= generator(path_data + 'preprocess_img_val', labels_val, batch_size=batch_size),
                    validation_steps=steps_va)
            # Evaluation over validation test
            evaluation = model.evaluate_generator(generator(path_data + 'preprocess_img_test', labels_test, batch_size=batch_size), steps=steps)
            print(model_name)
            print('Evaluation:', evaluation)
            # Save model after training
            model.save(path_data + 'weights/' + model_name + '.h5')
            # Clear session to free memory
            K.clear_session()

Training model:  model_Dense_2048_Drop_out_False_Batch_n_True
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
model_Dense_2048_Drop_out_False_Batch_n_True
Evaluation: [0.010969073897548122, 0.10437506831011246]


# Evaluate models

In [13]:
model_names = []
evaluations = []
for d_s in dense_size:
    for d_o in drop_o:
        for b_n in batch_n:
            model_name = 'model' + '_Dense_'+ str(d_s) + '_Drop_out_' + str(d_o) + '_Batch_n_' + str(b_n)
            
            print('Evaluating model: ', model_name)
            
            # Load the model
            # Set architecture
            opt = optimizers.Adam(lr=0.0001)
            model = galaxy_cnn((160, 160, 3), 37, d_o, b_n, d_s)
            model.load_weights(path_data + 'weights/' + model_name +'.h5')
            model.compile(loss='mean_squared_error', optimizer=opt, metrics=[rmse])
            
            # Evaluate model
            evaluation = model.evaluate_generator(generator(path_data + 'preprocess_img_test', labels_test, batch_size=batch_size), steps=steps)

            model_names.append(model_name)
            evaluations.append(evaluation)
            
            K.clear_session()

Evaluating model:  model_Dense_512_Drop_out_False_Batch_n_False
Evaluating model:  model_Dense_512_Drop_out_False_Batch_n_True
Evaluating model:  model_Dense_512_Drop_out_True_Batch_n_False
Evaluating model:  model_Dense_512_Drop_out_True_Batch_n_True
Evaluating model:  model_Dense_1024_Drop_out_False_Batch_n_False
Evaluating model:  model_Dense_1024_Drop_out_False_Batch_n_True
Evaluating model:  model_Dense_1024_Drop_out_True_Batch_n_False
Evaluating model:  model_Dense_1024_Drop_out_True_Batch_n_True
Evaluating model:  model_Dense_2048_Drop_out_False_Batch_n_False
Evaluating model:  model_Dense_2048_Drop_out_False_Batch_n_True
Evaluating model:  model_Dense_2048_Drop_out_True_Batch_n_False
Evaluating model:  model_Dense_2048_Drop_out_True_Batch_n_True


In [14]:
evaluations = np.array(evaluations)

In [15]:
rmse_list = evaluations[:, 1]

In [16]:
list_index = rmse_list.argsort()

## Best models

In [18]:
for item in list_index[:3]:
    print(model_names[item], rmse_list[item])

model_Dense_2048_Drop_out_True_Batch_n_False 0.09997407958847312
model_Dense_2048_Drop_out_True_Batch_n_True 0.1013503158260836
model_Dense_1024_Drop_out_True_Batch_n_True 0.10138449899295766


In [200]:
test_sub = pd.read_csv(path_data + 'all_zeros_benchmark.csv')

In [71]:
name_test_sub = test_sub['GalaxyID'].apply(lambda x: str(x) + '.jpg').values

## Set best model for submission

In [77]:
opt = optimizers.Adam(lr=0.0001)
model = galaxy_cnn((160, 160, 3), 37, True, False, 2048)
model.load_weights(path_data + 'weights/' + 'model_Dense_2048_Drop_out_True_Batch_n_False.h5')
model.compile(loss='mean_squared_error', optimizer=opt, metrics=[rmse])

In [126]:
batch_size_sub = 32
steps_sub = np.ceil(79975 // batch_size_sub + 1) 

In [127]:
predictions = model.predict_generator(generator_predictions(path_data + 'sub_test_img', name_test_sub, batch_size=batch_size_sub), steps=steps_sub, verbose=1)



In [250]:
results = test_sub.copy()
results.head()
columns = prueba.columns[1:]

In [251]:
for count in range(predictions.shape[1]):
    column = columns[count]
    results[column] = predictions[:, count]

In [252]:
results.head()

Unnamed: 0,GalaxyID,Class1.1,Class1.2,Class1.3,Class2.1,Class2.2,Class3.1,Class3.2,Class4.1,Class4.2,...,Class9.3,Class10.1,Class10.2,Class10.3,Class11.1,Class11.2,Class11.3,Class11.4,Class11.5,Class11.6
0,100018,0.535169,0.448261,0.019502,0.03022,0.419044,0.078234,0.371073,0.100051,0.34907,...,0.002883,0.053527,0.031145,0.012281,0.00458,0.025267,0.001130747,0.000689,0.001644,0.064111
1,100037,0.451488,0.546781,0.005816,0.485883,0.070427,0.009963,0.056207,0.017074,0.05189,...,0.271759,0.010485,0.005015,0.001607,0.00015,0.004047,2.939095e-05,5e-05,0.000145,0.012347
2,100042,0.665638,0.309743,0.012054,0.247905,0.072442,0.010673,0.05261,0.013125,0.053177,...,0.117793,0.004995,0.002808,0.002289,0.000205,0.003236,9.213272e-06,1.5e-05,6.5e-05,0.006813
3,100052,0.565562,0.406442,0.029627,0.005225,0.394116,0.014635,0.386293,0.068376,0.337812,...,0.000367,0.035641,0.018003,0.008099,0.005975,0.010004,0.001036999,0.000562,0.001518,0.043148
4,100056,0.127365,0.869175,0.001181,0.85843,0.005488,0.001954,0.003685,0.001851,0.003085,...,0.519266,0.000607,0.000419,0.000284,6e-06,0.000977,8.200367e-07,2e-06,3e-06,0.000624


In [253]:
# Save results
results.to_csv(path_data + 'results.csv', index=False)