## Load data

In [1]:
import os, sys, time
import numpy as np
import pandas as pd
from tqdm import tqdm
import cPickle as pk
np.random.seed(1) # to be reproductive

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, Flatten, Dropout
from keras.layers import Conv1D, MaxPooling1D, Embedding
from keras.layers import Conv2D, MaxPooling2D, Reshape
from keras.models import Sequential
from keras.models import load_model
from keras.utils.np_utils import to_categorical
from keras.callbacks import TensorBoard
from keras.callbacks import Callback, EarlyStopping
import keras.backend as K

Using TensorFlow backend.


In [2]:
# paths
NOTE_DATA_DIR = '/local/XW/DATA/MIMIC/noteevents_by_sid/'
ICD_FPATH = 'data/subject_diag_icds.txt'
PK_FPATH = 'data/diag_processed_data.pk' # './processed_data_small.pk'
MODEL_PATH = './models/'
LOG_PATH = './logs/'
# constants
N_LABELS = 50
K_ICDS_TOKEEP = N_LABELS - 1 # predict only on top K frequent icd codes
N_SUBJECTS = 41886
# word2vec configurations
GLOVE_DIR = '/local/XW/DATA/glove.6B/'
MAX_SEQ_LEN = 1000 # max length of input sequence (pad/truncate to fix length)
MAX_NB_WORDS = 20000 # top 20k most freq words
EMBEDDING_DIM = 100
# learning configurations
VALIDATION_SPLIT = 0.2
N_EPOCHS = 20
SZ_BATCH = 512

In [3]:
# load pickled data
pk_data = pk.load(open(PK_FPATH, 'rb'))
embedding_matrix = pk_data['embedding_matrix']
X_train, Y_train = pk_data['X_train'], pk_data['Y_train']
X_val, Y_val = pk_data['X_val'], pk_data['Y_val']
nb_words = MAX_NB_WORDS # forgot to pickle this number...

In [4]:
# found one row that is ALL 0 (strange?)
print np.min( np.sum(Y_train, axis=1) ), np.min( np.sum(Y_val, axis=1) )
print np.argmin( np.sum(Y_train, axis=1) )
Y_train[11730]
Y_train = np.delete(Y_train, 11730, axis=0)
X_train = np.delete(X_train, 11730, axis=0)
print X_train.shape, Y_train.shape

0.0 1.0
11730
(36916, 1000) (36916, 50)


### Modify sample weight, and use larger batch size

In [20]:
Y_train_noother = Y_train[::-1]
inv_freq = 1e6*Y_train_noother.sum(axis=0)**(-1.5)
sample_weight = (inv_freq * Y_train_noother).sum(axis=1)
print sample_weight.shape
sample_weight
# y_n_poslabels = Y_train_noother.sum(axis=1)

(36916,)


## Define evaluation metrics

**NB:** this metrics is the continus relaxation of what we really want, so the acc output during training is not precise.

In [5]:
def multlabel_prec(y_true, y_pred):
    y_pred, y_true = y_pred[:,:-1], y_true[:,:-1] # test without last column considered
    y_pred = K.round(K.clip(y_pred, 0, 1)) # turn to 0/1 
    tp = K.sum(y_true * y_pred, axis =-1)
    sum_true = K.sum(y_true, axis=-1)
    sum_pred = K.sum(y_pred, axis=-1)
    return K.mean(tp/(sum_pred+1e-10)) # to avoid NaN precision
    
def multlabel_recall(y_true, y_pred):
    y_pred, y_true = y_pred[:,:-1], y_true[:,:-1] # test without last column considered
    y_pred = K.round(K.clip(y_pred, 0, 1)) # turn to 0/1 
    tp = K.sum(y_true * y_pred, axis =-1)
    sum_true = K.sum(y_true, axis=-1)
    sum_pred = K.sum(y_pred, axis=-1)
    return K.mean(tp/(sum_true+1e-10)) 

def multlabel_F1(y_true, y_pred):
    y_pred, y_true = y_pred[:,:-1], y_true[:,:-1] # test without last column considered
    y_pred = K.round(K.clip(y_pred, 0, 1)) # turn to 0/1 
    tp = K.sum(y_true * y_pred, axis =-1)
    sum_true = K.sum(y_true, axis=-1)
    sum_pred = K.sum(y_pred, axis=-1)
    return 2*K.mean(tp/(sum_true+sum_pred+1e-10))

def multlabel_acc(y_true, y_pred):
    y_pred, y_true = y_pred[:,:-1], y_true[:,:-1] # test without last column considered
    y_pred = K.round(K.clip(y_pred, 0, 1)) # turn to 0/1 
    intersect = y_true * y_pred
    intersect = K.sum(intersect, axis=-1)
    union = K.clip(y_true+y_pred, 0, 1)
    union = K.sum(union, axis=-1)
    return K.mean(intersect/(union+1e-10))

In [6]:
def evaluate_model(model):
    print 'evaluation on training set:'
    print model.evaluate(X_train, Y_train, batch_size=128)
    print 'evaluation on validation set:'
    print model.evaluate(X_val, Y_val, batch_size=128)

In [25]:
# wraps up operations on models
def compile_fit_evaluate(model, quick_test=False, print_summary=True,
                         save_log=True, save_model=True, del_model=False):
    
    model.compile(loss='binary_crossentropy',
             optimizer='rmsprop',
             metrics=[multlabel_prec, multlabel_recall, multlabel_F1, multlabel_acc])
    if print_summary:
        print model.summary()
        
    if quick_test: # use tiny data for quick test
        print '(quick test mode)'
        model.fit(X_train[:100], Y_train[:100], nb_epoch=1)
        return  
    
    _callbacks = [EarlyStopping(monitor='val_loss', patience=2)] #[RelaxAccHistory()]
    if save_log:
        logdir = os.path.join( LOG_PATH, time.strftime('%m%d')+'_'+str(model.name) )
        if not os.path.exists(logdir):
            os.makedirs(logdir)
        _callbacks.append(TensorBoard(log_dir=logdir))
        print 'run "tensorboard --logdir=%s" to launch tensorboard'%logdir
    
    model.fit( X_train, Y_train, 
              validation_data=(X_val, Y_val), 
              nb_epoch=N_EPOCHS, batch_size=SZ_BATCH, 
              sample_weight = sample_weight, 
              callbacks=_callbacks )
    
    print 'evaluating model...'
    evaluate_model(model)
    
    if save_model: 
        model_fpath = os.path.join( MODEL_PATH, time.strftime('%m%d')+'_%s.h5'% str(model.name) )
        model.save(model_fpath)
    
    if del_model:
        del model # delete the model to save memory

In [8]:
# ''' ***NOTE***
# To load models from file, we have to modify metrics.py at: 
# `/local/XW/SOFT/anaconda2/envs/thesis_nb/lib/python2.7/site-packages/keras` 
# to add the `multlabel_XXX` function, otherwise throws exception ! 

# cf issue: https://github.com/fchollet/keras/issues/3911
# '''
# m = load_model(os.path.sep.join([MODEL_PATH, 'model_1conv1d.h5']))

## Best model: 2 conv layers and 2 FC 

In [26]:
model_2conv1d_2FC = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, 
                  weights=[embedding_matrix],input_length=MAX_SEQ_LEN, trainable=False ),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Flatten(),
            Dropout(p=0.5),
            Dense(500, activation='relu'),
            Dropout(p=0.5),
            Dense(N_LABELS, activation='sigmoid') ],
        name = 'model_2conv1d_2FC')
compile_fit_evaluate(model_2conv1d_2FC, flag_quick_test)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_7 (Embedding)          (None, 1000, 100)     0           embedding_input_7[0][0]          
____________________________________________________________________________________________________
convolution1d_11 (Convolution1D) (None, 996, 128)      64128       embedding_7[0][0]                
____________________________________________________________________________________________________
maxpooling1d_11 (MaxPooling1D)   (None, 199, 128)      0           convolution1d_11[0][0]           
____________________________________________________________________________________________________
convolution1d_12 (Convolution1D) (None, 195, 128)      82048       maxpooling1d_11[0][0]            
___________________________________________________________________________________________

## Extract embedding vector, and use SVM to predict

In [36]:
print model_2conv1d_2FC.layers[0].input
print model_2conv1d_2FC.layers[7].output

Tensor("embedding_input_7:0", shape=(?, 1000), dtype=int32)
Tensor("Relu_15:0", shape=(?, 500), dtype=float32)


In [41]:
from keras import backend as K
get_embedvec = K.function([model_2conv1d_2FC.layers[0].input, K.learning_phase()],
                                  [model_2conv1d_2FC.layers[7].output])
embedvec = lambda X: get_embedvec([X,0])[0]

In [42]:
# output in test mode = 0
layer_output = embedvec(X_train[:10])
print layer_output.shape

(10, 500)


In [49]:
def to_embedvec(X):
    BATCH_SZ = 128
    embedded = []
    for i in tqdm(xrange(0, X.shape[0], BATCH_SZ)):
        x_batch = X[i:min(i+BATCH_SZ, X.shape[0])]
        embedveci = embedvec(x_batch)
        embedded.append(embedveci)
    return np.vstack(embedded)

In [50]:
Xembed_train = to_embedvec(X_train)
print Xembed_train.shape

100%|██████████| 289/289 [03:43<00:00,  1.60it/s]

(36916, 500)





In [51]:
Xembed_val = to_embedvecedvec(X_val)
print Xembed_val.shape

100%|██████████| 73/73 [00:53<00:00,  1.37it/s]

(9229, 500)





In [52]:
def multilabel_evaluate(y_pred, y_true=Y_val):
    y_pred, y_true = y_pred[:,:-1], y_true[:,:-1] # test without last column considered
    tp = np.sum(y_true * y_pred, axis=-1) 
    sum_true = np.sum(y_true, axis=-1)
    sum_pred = np.sum(y_pred, axis=-1)
    union = np.sum(np.clip(y_true+y_pred, 0, 1), axis=-1)
    print 'precision =', np.mean(tp/(sum_pred+1e-10))
    print 'recall = ', np.mean(tp/(sum_true+1e-10))
    print 'F1 = ', 2*np.mean(tp/(sum_true+sum_pred+1e-10))
    print 'acc = ', np.mean(tp/(union+1e-10))

In [65]:
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV
parameters = {
              'alpha': [1, 1e-1, 3e-1, 1e-2, 3e-2, 1e-3, 3e-3, 1e-4],
              'n_iter': [10, 50, 200]}

for i in tqdm(range(N_LABELS)):
    sgd =  SGDClassifier(loss='hinge', penalty='l2', random_state=1, class_weight='balanced')
    clf = GridSearchCV(sgd, parameters, n_jobs=-1)
    clf.fit(Xembed_train, Y_train[:,i]) 
    clfs.append(clf)

preds = [clfs[i].predict(Xembed_val) for i in xrange(N_LABELS)]    
pred_svm = np.vstack(preds).T
print pred_svm.shape
print map(int, pred_svm.sum(axis=0))
multilabel_evaluate(y_pred=pred_svm, y_true=Y_val)

100%|██████████| 50/50 [49:49<00:00, 55.67s/it]


(9229, 50)
[6557, 2955, 4408, 3886, 4165, 3979, 5597, 4060, 5141, 1509, 1502, 4272, 5689, 5523, 4434, 5904, 4437, 5646, 5089, 3257, 1506, 3581, 4412, 3627, 6208, 5112, 3402, 3360, 3155, 4566, 4587, 3683, 1519, 3817, 5082, 4642, 3108, 3850, 2912, 3297, 5426, 3361, 803, 3801, 3872, 5761, 4437, 4002, 1511, 8399]
precision = 0.216178695678
recall =  0.773221777101
F1 =  0.310358096711
acc =  0.207232309888


In [55]:
clfs = []
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
for alpha in [1e-1, 1e-2, 1e-3]:
    print '=='*4, ' alpha=%f' % alpha, '=='*4 
    for i in tqdm(range(N_LABELS)):
        clf =  SGDClassifier(loss='hinge', penalty='l2', 
                       class_weight='balanced',
                       alpha=alpha, n_iter=200, random_state=1)
        clf.fit(Xembed_train, Y_train[:,i]) 
        clfs.append(clf)
    preds = [clfs[i].predict(Xembed_val) for i in xrange(N_LABELS)]    
    pred_svm_dp = np.vstack(preds).T
    print pred_svm_dp.shape
    print map(int, pred_svm_dp.sum(axis=0))
    multilabel_evaluate(y_pred=pred_svm_dp,y_true=Y_val)

100%|██████████| 50/50 [05:34<00:00,  5.94s/it]


(9229, 50)
[5727, 2743, 3909, 3443, 3934, 3653, 5096, 3669, 4665, 1522, 1518, 3676, 4428, 4431, 4042, 4023, 3755, 4248, 3932, 2726, 1523, 3002, 3545, 3041, 3907, 3812, 2792, 2939, 2718, 3821, 3583, 2994, 1521, 3022, 3682, 3431, 2486, 2700, 1963, 2760, 3935, 2873, 796, 3105, 3086, 3839, 3322, 3160, 1520, 8182]
precision = 0.229993005192
recall =  0.703589020873
F1 =  0.320225607346
acc =  0.215284325998


In [56]:
clfs = []
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
for i in tqdm(range(N_LABELS)):
#     clf = SVC(class_weight='balanced',random_state=1,C=0.2)
    clf =  SGDClassifier(loss='hinge', penalty='l2', 
                   class_weight='balanced',
                   alpha=1e-2, n_iter=200, random_state=1)
    clf.fit(Xembed_train, Y_train[:,i]) 
    clfs.append(clf)
preds = [clfs[i].predict(Xembed_val) for i in xrange(N_LABELS)]    
pred_svm_dp = np.vstack(preds).T
print pred_svm_dp.shape
print map(int, pred_svm_dp.sum(axis=0))
multilabel_evaluate(y_pred=pred_svm_dp,y_true=Y_val)

100%|██████████| 50/50 [05:42<00:00,  6.09s/it]


(9229, 50)
[5911, 2817, 3869, 3491, 3977, 3697, 5271, 3739, 4877, 1525, 1516, 4133, 4953, 4859, 4223, 4862, 4150, 4999, 4543, 2860, 1525, 3191, 3932, 3190, 4733, 4395, 3024, 3006, 2759, 4059, 4066, 3380, 1529, 3368, 4253, 4132, 2695, 3179, 2258, 2921, 4660, 3083, 806, 3433, 3434, 4488, 3800, 3552, 1525, 8254]
precision = 0.225381778819
recall =  0.735914801374
F1 =  0.317281640609
acc =  0.212802657313


In [57]:
clfs = []
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
for i in tqdm(range(N_LABELS)):
#     clf = SVC(class_weight='balanced',random_state=1,C=0.2)
    clf =  SGDClassifier(loss='hinge', penalty='l2', 
                   class_weight='balanced',
                   alpha=1e-1, n_iter=200, random_state=1)
    clf.fit(Xembed_train, Y_train[:,i]) 
    clfs.append(clf)
preds = [clfs[i].predict(Xembed_val) for i in xrange(N_LABELS)]    
pred_svm_dp = np.vstack(preds).T
print pred_svm_dp.shape
print map(int, pred_svm_dp.sum(axis=0))
multilabel_evaluate(y_pred=pred_svm_dp,y_true=Y_val)

100%|██████████| 50/50 [05:59<00:00,  6.51s/it]


(9229, 50)
[6557, 2955, 4408, 3886, 4165, 3979, 5597, 4060, 5141, 1509, 1502, 4272, 5689, 5523, 4434, 5904, 4437, 5646, 5089, 3257, 1506, 3581, 4412, 3627, 6208, 5112, 3402, 3360, 3155, 4566, 4587, 3683, 1519, 3817, 5082, 4642, 3108, 3850, 2912, 3297, 5426, 3361, 803, 3801, 3872, 5761, 4437, 4002, 1511, 8399]
precision = 0.216178695678
recall =  0.773221777101
F1 =  0.310358096711
acc =  0.207232309888


----

## Try different models

In [9]:
flag_quick_test = 0 # set to False/0 to run on whole data

In [10]:
model_1conv1d_dropout = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, weights=[embedding_matrix],
              input_length=MAX_SEQ_LEN, trainable=False # keep the embeddings fixed
             ),# embedding layer
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Flatten(),
            Dropout(p=0.2),
            Dense(N_LABELS, activation='sigmoid') 
        ], 
        name='model_1conv1d_dropout')
compile_fit_evaluate(model_1conv1d_dropout, flag_quick_test)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_2 (Embedding)          (None, 1000, 100)     0           embedding_input_2[0][0]          
____________________________________________________________________________________________________
convolution1d_2 (Convolution1D)  (None, 996, 128)      64128       embedding_2[0][0]                
____________________________________________________________________________________________________
maxpooling1d_2 (MaxPooling1D)    (None, 199, 128)      0           convolution1d_2[0][0]            
____________________________________________________________________________________________________
flatten_2 (Flatten)              (None, 25472)         0           maxpooling1d_2[0][0]             
___________________________________________________________________________________________

KeyboardInterrupt: 

In [11]:
# 2 conv1d layers
model_2conv1d_dropout = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, 
                  weights=[embedding_matrix],input_length=MAX_SEQ_LEN, trainable=False ),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Flatten(),
            Dropout(p=0.2),
            Dense(N_LABELS, activation='sigmoid') ],
        name = 'model_2conv1d_dropout')
compile_fit_evaluate(model_2conv1d_dropout, flag_quick_test)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_3 (Embedding)          (None, 1000, 100)     0           embedding_input_3[0][0]          
____________________________________________________________________________________________________
convolution1d_3 (Convolution1D)  (None, 996, 128)      64128       embedding_3[0][0]                
____________________________________________________________________________________________________
maxpooling1d_3 (MaxPooling1D)    (None, 199, 128)      0           convolution1d_3[0][0]            
____________________________________________________________________________________________________
convolution1d_4 (Convolution1D)  (None, 195, 128)      82048       maxpooling1d_3[0][0]             
___________________________________________________________________________________________

KeyboardInterrupt: 

In [None]:
model_3conv1d_dropout =Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, 
                  weights=[embedding_matrix],input_length=MAX_SEQ_LEN, trainable=False ),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Flatten(),
            Dropout(p=0.5),
            Dense(N_LABELS, activation='sigmoid') ],
        name = 'model_3conv1d_dropout')

compile_fit_evaluate(model_3conv1d_dropout, flag_quick_test)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_5 (Embedding)          (None, 1000, 100)     0           embedding_input_5[0][0]          
____________________________________________________________________________________________________
convolution1d_7 (Convolution1D)  (None, 996, 128)      64128       embedding_5[0][0]                
____________________________________________________________________________________________________
maxpooling1d_7 (MaxPooling1D)    (None, 199, 128)      0           convolution1d_7[0][0]            
____________________________________________________________________________________________________
convolution1d_8 (Convolution1D)  (None, 195, 128)      82048       maxpooling1d_7[0][0]             
___________________________________________________________________________________________

In [10]:
# with only 1 conv1d layer
model_1conv1d = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, weights=[embedding_matrix],
              input_length=MAX_SEQ_LEN, trainable=False # keep the embeddings fixed
             ),# embedding layer
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Flatten(),
            Dense(N_LABELS, activation='sigmoid') 
        ], 
        name='model_1conv1d')
compile_fit_evaluate(model_1conv1d, flag_quick_test)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_1 (Embedding)          (None, 1000, 100)     0           embedding_input_1[0][0]          
____________________________________________________________________________________________________
convolution1d_1 (Convolution1D)  (None, 996, 128)      64128       embedding_1[0][0]                
____________________________________________________________________________________________________
maxpooling1d_1 (MaxPooling1D)    (None, 199, 128)      0           convolution1d_1[0][0]            
____________________________________________________________________________________________________
flatten_1 (Flatten)              (None, 25472)         0           maxpooling1d_1[0][0]             
___________________________________________________________________________________________

In [11]:
# 2 conv1d layers
model_2conv1d = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, 
                  weights=[embedding_matrix],input_length=MAX_SEQ_LEN, trainable=False ),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Flatten(),
            Dense(N_LABELS, activation='sigmoid') ],
        name = 'model_2conv1d')
compile_fit_evaluate(model_2conv1d, flag_quick_test)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_2 (Embedding)          (None, 1000, 100)     0           embedding_input_2[0][0]          
____________________________________________________________________________________________________
convolution1d_2 (Convolution1D)  (None, 996, 128)      64128       embedding_2[0][0]                
____________________________________________________________________________________________________
maxpooling1d_2 (MaxPooling1D)    (None, 199, 128)      0           convolution1d_2[0][0]            
____________________________________________________________________________________________________
convolution1d_3 (Convolution1D)  (None, 195, 128)      82048       maxpooling1d_2[0][0]             
___________________________________________________________________________________________

In [12]:
# 3 conv1d layers 
model_3conv1d =Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, 
                  weights=[embedding_matrix],input_length=MAX_SEQ_LEN, trainable=False ),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Flatten(),
            Dense(N_LABELS, activation='sigmoid') ],
        name = 'model_3conv1d')

compile_fit_evaluate(model_3conv1d, flag_quick_test)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_3 (Embedding)          (None, 1000, 100)     0           embedding_input_3[0][0]          
____________________________________________________________________________________________________
convolution1d_4 (Convolution1D)  (None, 996, 128)      64128       embedding_3[0][0]                
____________________________________________________________________________________________________
maxpooling1d_4 (MaxPooling1D)    (None, 199, 128)      0           convolution1d_4[0][0]            
____________________________________________________________________________________________________
convolution1d_5 (Convolution1D)  (None, 195, 128)      82048       maxpooling1d_4[0][0]             
___________________________________________________________________________________________

In [13]:
# 2d conv models
'''for 2d conv, the nb_filters cann't be too big: 
   128*MAX_SEQ_LEN*EMBEDDING_DIM is too much memory
   nb_filter = 64 is fine for 1 conv2d layer
'''
model_1conv2d = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, weights=[embedding_matrix],
              input_length=MAX_SEQ_LEN, trainable=False),
            Reshape( (MAX_SEQ_LEN, EMBEDDING_DIM, 1) ), # **need to manually reshape and add a channel**
            Conv2D(8, 5, 5, activation='relu' ), # , input_shape=(MAX_SEQ_LEN, EMBEDDING_DIM, 1)
            MaxPooling2D((10,10)),# need to downsample heavily to reduce parameters... 
            Flatten(),
            Dense(N_LABELS, activation='sigmoid') ],
        name = 'model_1conv2d')
# model_1conv2d.summary()
compile_fit_evaluate(model_1conv2d, flag_quick_test) 

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_4 (Embedding)          (None, 1000, 100)     0           embedding_input_4[0][0]          
____________________________________________________________________________________________________
reshape_1 (Reshape)              (None, 1000, 100, 1)  0           embedding_4[0][0]                
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 996, 96, 8)    208         reshape_1[0][0]                  
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 99, 9, 8)      0           convolution2d_1[0][0]            
___________________________________________________________________________________________

In [14]:
model_2conv2d = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, weights=[embedding_matrix],
              input_length=MAX_SEQ_LEN, trainable=False),
            Reshape( (MAX_SEQ_LEN, EMBEDDING_DIM, 1) ), # **need to manually reshape and add a channel**
            Conv2D(32, 5, 5, activation='relu' ), # , input_shape=(MAX_SEQ_LEN, EMBEDDING_DIM, 1)
            MaxPooling2D((5,5)),
            Conv2D(8, 5, 5, activation='relu' ), 
            MaxPooling2D((2,2)),
            Flatten(),
            Dense(N_LABELS, activation='sigmoid') ],
        name = 'model_2conv2d')
compile_fit_evaluate(model_2conv2d, flag_quick_test)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_5 (Embedding)          (None, 1000, 100)     0           embedding_input_5[0][0]          
____________________________________________________________________________________________________
reshape_2 (Reshape)              (None, 1000, 100, 1)  0           embedding_5[0][0]                
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 996, 96, 32)   832         reshape_2[0][0]                  
____________________________________________________________________________________________________
maxpooling2d_2 (MaxPooling2D)    (None, 199, 19, 32)   0           convolution2d_2[0][0]            
___________________________________________________________________________________________

In [15]:
model_3conv2d = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, weights=[embedding_matrix],
              input_length=MAX_SEQ_LEN, trainable=False),
            Reshape( (MAX_SEQ_LEN, EMBEDDING_DIM, 1) ), # **need to manually reshape and add a channel**
            Conv2D(64, 5, 5, activation='relu' ), # , input_shape=(MAX_SEQ_LEN, EMBEDDING_DIM, 1)
            MaxPooling2D((5,5)),
            Conv2D(32, 5, 5, activation='relu' ), 
            MaxPooling2D((2,2)),
            Conv2D(8, 5, 5, activation='relu' ), 
            MaxPooling2D((2,2)),
            Flatten(),
            Dense(N_LABELS, activation='softmax') ],
        name='model_3conv2d')
print model_3conv2d.summary()
# maybe this is too slow to compute? estimated time: 100 * 30 * (N_EPOCH+1) ~= 9hours ...
# compile_fit_evaluate(model_3conv2d, flag_quick_test) 

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_6 (Embedding)          (None, 1000, 100)     0           embedding_input_6[0][0]          
____________________________________________________________________________________________________
reshape_3 (Reshape)              (None, 1000, 100, 1)  0           embedding_6[0][0]                
____________________________________________________________________________________________________
convolution2d_4 (Convolution2D)  (None, 996, 96, 64)   1664        reshape_3[0][0]                  
____________________________________________________________________________________________________
maxpooling2d_4 (MaxPooling2D)    (None, 199, 19, 64)   0           convolution2d_4[0][0]            
___________________________________________________________________________________________