# Single Resnet50 model

In [None]:
import pandas as pd
import numpy as np
import bcolz
import random
from importlib import reload
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Input
from keras.models import Model
from keras import backend as K

In [None]:
resnet_model = ResNet50(include_top=False, input_shape=(3, 224, 224))

Getting last layer vectors for all galaxy images take a lot of time, so lets process them once and save results on disk.

In [None]:
def save_array(fname, arr): 
    c=bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()
def load_array(fname): 
    return bcolz.open(fname)[:]

def save_resnet_vecs(vec_fname, img_name_fname, folder):
    gen = ImageDataGenerator(preprocessing_function=preprocess_input)
    batches = gen.flow_from_directory(folder, 
                                      target_size=(224, 224), 
                                      batch_size=64, 
                                      class_mode=None,
                                      shuffle=False)
    vecs = resnet_model.predict_generator(batches, steps=(batches.samples + batches.batch_size) 
                                          // batches.batch_size)
    save_array(vec_fname, vecs)
    save_array(img_name_fname, batches.filenames)
    
def load_resnet_vecs(vec_fname, img_name_fname):
    return load_array(img_name_fname), load_array(vec_fname)
    
save_resnet_vecs('train_vecs', 'train_img_name', '../data/train')
save_resnet_vecs('test_vecs', 'test_img_name', '../data/test')
save_resnet_vecs('valid_vecs', 'valid_img_name', '../data/valid')

Add two dense layers on top of Resnet50, finetune, and predict test data.

In [None]:
def generator(vecs, labels, batch_size=64):
    batch_vecs = []
    batch_lables = []
    rnd = random.Random()
    rnd.seed(123)
    while True:
        index_shuf = [*range(len(vecs))]
        rnd.shuffle(index_shuf)
        for i in index_shuf:
            batch_vecs.append(vecs[i])
            batch_lables.append(labels[i])
            if len(batch_vecs) == batch_size:
                yield np.array(batch_vecs), np.array(batch_lables)
                batch_vecs = []
                batch_lables = []
                
                
def test_generator(vecs, batch_size=64):
    batch_vecs = []
    for vec in vecs:
        batch_vecs.append(vec)
        if len(batch_vecs) == batch_size:
            yield np.array(batch_vecs)
            batch_vecs = []
    yield np.array(batch_vecs)
    
                
def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))         
        
def train_predict_classes(classes, epochs=5):
    inp = Input(shape=(2048, 1, 1))
    x = Flatten()(inp)
    x = Dense(200, activation='relu')(x)
    x = Dense(len(classes), activation='sigmoid')(x)
    model = Model(inputs=inp, outputs=x)
    fnames, vecs = load_resnet_vecs('train_vecs', 'train_img_name')
    val_fnames, val_vecs = load_resnet_vecs('valid_vecs', 'valid_img_name')
    df = pd.read_csv('../data/training_solutions_rev1.csv')[['GalaxyID'] + classes]
    labels = np.array([df[df['GalaxyID'] == int(x[5:11])].drop('GalaxyID', axis=1).as_matrix()[0] 
                       for x in fnames])
    val_labels = np.array([df[df['GalaxyID'] == int(x[5:11])].drop('GalaxyID', axis=1).as_matrix()[0] 
                       for x in val_fnames])
    batch_size = 64
    model.compile(optimizer='rmsprop', loss=root_mean_squared_error, metrics =["accuracy"])
    model.fit_generator(generator(vecs, labels, batch_size=batch_size), 
                        (len(fnames) + batch_size) // batch_size, 
                        validation_data=generator(vecs, labels, batch_size=batch_size), 
                        validation_steps=1, epochs=epochs)
    
    fnames, vecs = load_resnet_vecs('test_vecs', 'test_img_name')

    batch_size = 64
    predictions = model.predict_generator(test_generator(vecs, batch_size), 
                                      steps=(len(fnames) + batch_size) // batch_size, max_queue_size=1)
    df = pd.DataFrame(predictions)
    df = df.rename(columns={k: v for k, v in enumerate(classes)})

    df['GalaxyID'] = [name[5:11] for name in fnames]
    return df

Train on all classes at once for 3 epochs

In [None]:
np.random.seed(123)
classes = train_predict_classes(['Class1.1', 'Class1.2', 'Class1.3', 'Class2.1',
       'Class2.2', 'Class3.1', 'Class3.2', 'Class4.1', 'Class4.2',
       'Class5.1', 'Class5.2', 'Class5.3', 'Class5.4', 'Class6.1',
       'Class6.2', 'Class7.1', 'Class7.2', 'Class7.3', 'Class8.1',
       'Class8.2', 'Class8.3', 'Class8.4', 'Class8.5', 'Class8.6',
       'Class8.7', 'Class9.1', 'Class9.2', 'Class9.3', 'Class10.1',
       'Class10.2', 'Class10.3', 'Class11.1', 'Class11.2', 'Class11.3',
       'Class11.4', 'Class11.5', 'Class11.6'], 3)
classes.to_csv('output.csv', index=False)