In [1]:
import keras
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm
from sklearn import metrics
import time

Using TensorFlow backend.


In [2]:
from keras.layers import Input, Dense, Convolution2D, \
    MaxPooling2D, UpSampling2D, BatchNormalization, Dropout, \
    UpSampling2D, Layer, Flatten, Activation
from keras.models import Model, Sequential
from keras import backend as K
from keras.layers.advanced_activations import ELU
from keras.callbacks import TensorBoard

In [3]:
RAND_SEED = 777
np.random.seed(RAND_SEED)

In [13]:
IMG_SHAPE = (96, 2048, 16)

# axis 0 is batch sample index
AXIS_FREQ = 1
AXIS_TIME = 2
AXIS_CH = 3

VAL_DATA_FILE = 'out/val-msgs.mem'
VAL_YS_FILE = 'out/val-ys.mem'

N_TRAIN_FILES = 1
TRAIN_DATA_FILE = 'out/train-msgs-%d.mem'
TRAIN_YS_FILE = 'out/train-ys-%d.npy'

MODEL_PATH = 'out/models/ab-f%d-e%d-auc%.5f'

In [14]:
# load validation data
# X_val = np.memmap(VAL_DATA_FILE, dtype=np.float32, mode='r+').reshape((-1,) + IMG_SHAPE)
# y_val = np.fromfile(VAL_YS_FILE, dtype=np.float32).reshape(-1, 2)

In [15]:
scores = []

In [16]:
def score_auc():
    s = 0
    n = X_val.shape[0]
    
    print '\n'
    y_p = model.predict(X_val[s:s+n], verbose=True, batch_size=32)
    y_p = np.nan_to_num(y_p)
    return metrics.roc_auc_score(y_val[s:s+n].T[0], y_p.T[0])

In [17]:
class MyCallback(keras.callbacks.Callback):
    def _validate(self, epoch):
        s = score_auc()
        scores.append(s)
        np.save('out/scores.npy', scores)
        print "\n\n AUC = %.5f\n"%(s)
        if True or len(scores) == 0 or s >= max(scores):
            f = MODEL_PATH%(train_file_n, epoch, s)
            print 'Saving to: ', f, '\n'
            model.save(f)
#     def on_train_begin(self, epoch, logs={}):
#         self._validate(0)
    def on_epoch_end(self, epoch, logs={}):
        self._validate(1 + epoch)
    def on_epoch_begin(self, epoch, logs={}):
        print '\n'

In [18]:
model = Sequential()

model.add(BatchNormalization(axis=AXIS_TIME, input_shape=IMG_SHAPE))

# conv 1
model.add(Convolution2D(32, 3, 3, border_mode='same'))
model.add(BatchNormalization(axis=AXIS_CH, mode=0))
model.add(ELU())
model.add(MaxPooling2D(pool_size=(2,4)))
model.add(Dropout(1.))

# conv 2
model.add(Convolution2D(128, 3, 3, border_mode='same'))
model.add(BatchNormalization(axis=AXIS_CH, mode=0))
model.add(ELU())
model.add(MaxPooling2D(pool_size=(2,4)))
model.add(Dropout(1.))

# conv 3
model.add(Convolution2D(128, 3, 3, border_mode='same'))
model.add(BatchNormalization(axis=AXIS_CH, mode=0))
model.add(ELU())
model.add(MaxPooling2D(pool_size=(2,4)))
model.add(Dropout(1.))

# conv 4
model.add(Convolution2D(192, 3, 3, border_mode='same'))
model.add(BatchNormalization(axis=AXIS_CH, mode=0))
model.add(ELU())
model.add(MaxPooling2D(pool_size=(2,4)))
model.add(Dropout(1.))

# conv 5
model.add(Convolution2D(256, 3, 3, border_mode='same'))
model.add(BatchNormalization(axis=AXIS_CH, mode=0))
model.add(ELU())
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dropout(0.5))

model.add(Dense(2, activation='sigmoid'))
model.add(Activation('softmax'))

In [19]:
# model.output_shape

In [20]:
model.compile(optimizer='adagrad', loss='binary_crossentropy')

In [21]:
for train_file_n in range(N_TRAIN_FILES):

    X_file = TRAIN_DATA_FILE%(1 + train_file_n)
    y_file = TRAIN_YS_FILE%(1 + train_file_n)
    
    print 'Using file', X_file
    
    X_train = np.memmap(X_file, dtype=np.float32, mode='r+')
    X_train = X_train.reshape((-1,) + IMG_SHAPE)
    y_train = np.fromfile(y_file, dtype=np.float32)
    y_train = y_train.reshape(-1, 2)
    
    X_train = X_train[:100]
    y_train = y_train[:100]
    
    model.fit(
        X_train,
        y_train,
        batch_size=32,
        nb_epoch=75,
#         validation_data=(X_val, y_val),
        shuffle=True,
        callbacks=[
            keras.callbacks.ModelCheckpoint(
                'out/models/b-e{epoch:02d}-vl{loss:.5f}.hdf5', monitor='loss', verbose=1,
                save_best_only=False, save_weights_only=False, mode='auto')
            TensorBoard(log_dir='/tmp/tf-mls-b')
        ]
    )

    del X_train
    del y_train

Using file out/train-msgs-1.mem
Train on 5042 samples, validate on 1000 samples


Epoch 1/75
 128/5042 [..............................] - ETA: 1044s - loss: 0.4726

KeyboardInterrupt: 

In [None]:
keras.