In [1]:
import os,sys
import numpy as np
import pandas as pd
from tqdm import tqdm
import cPickle as pk
np.random.seed(1) # to be reproductive

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, Flatten
from keras.layers import Conv1D, MaxPooling1D, Embedding
from keras.models import Model, Sequential

Using TensorFlow backend.


In [2]:
# paths
NOTE_DATA_DIR = '/local/XW/DATA/MIMIC/noteevents_by_sid/'
ICD_FPATH = './subject_diag_icds.txt'
PK_FPATH = './diag_processed_data.pk'
# constants
N_LABELS = 50 
K_ICDS_TOKEEP = N_LABELS - 1 # predict only on top K frequent icd codes
N_SUBJECTS = 46146
# word2vec configurations
GLOVE_DIR = '/local/XW/DATA/glove.6B/'
MAX_SEQ_LEN = 1000 # max length of input sequence (pad/truncate to fix length)
MAX_NB_WORDS = 20000 # top 20k most freq words
EMBEDDING_DIM = 100
# learning configurations
VALIDATION_SPLIT = 0.2
N_EPOCHS = 2
SZ_BATCH = 128

# 1. Data preparation

**If this step is done once, directly go to [step 2](#2.-Model-training) for training. **

## prepare label: k-hot encoding for (diagnose) icd code 

In [5]:
# load the icd code into a dict
from collections import Counter
sid2icds = {} # map subject_id (int) ---> icd codes of this patient (str)
icd_ctr = Counter()
with open(ICD_FPATH) as f: 
    for line in tqdm(f, total=N_SUBJECTS): 
        sid, _icds = line.split(',')
        sid = int(sid)
        _icds = _icds.split()
        icd_ctr.update(_icds)
        sid2icds[sid] = set(_icds)
#         print sid, icd[sid]

100%|██████████| 46146/46146 [00:00<00:00, 83835.65it/s]


In [6]:
print icd_ctr.most_common(K_ICDS_TOKEEP)

[('4019', 17510), ('41401', 10736), ('42731', 10193), ('4280', 9802), ('5849', 7634), ('2724', 7421), ('25000', 7332), ('51881', 6632), ('5990', 5746), ('V053', 5678), ('V290', 5440), ('2720', 5320), ('53081', 5246), ('2859', 4967), ('486', 4391), ('2851', 4231), ('2762', 4120), ('2449', 3789), ('496', 3572), ('99592', 3504), ('V3000', 3503), ('0389', 3387), ('5070', 3362), ('V5861', 3184), ('3051', 2982), ('311', 2907), ('41071', 2902), ('5859', 2889), ('40390', 2814), ('2761', 2789), ('2875', 2783), ('412', 2775), ('V3001', 2707), ('4240', 2643), ('5119', 2554), ('V1582', 2534), ('78552', 2376), ('V4581', 2318), ('4241', 2302), ('9971', 2299), ('42789', 2297), ('V4582', 2247), ('7742', 2241), ('5845', 2154), ('2760', 2077), ('5180', 2072), ('45829', 2055), ('V5867', 2009), ('V502', 1978)]


In [7]:
icds = zip( *icd_ctr.most_common(K_ICDS_TOKEEP) )[0] + ('other',)
print icds # these are icds to predict

('4019', '41401', '42731', '4280', '5849', '2724', '25000', '51881', '5990', 'V053', 'V290', '2720', '53081', '2859', '486', '2851', '2762', '2449', '496', '99592', 'V3000', '0389', '5070', 'V5861', '3051', '311', '41071', '5859', '40390', '2761', '2875', '412', 'V3001', '4240', '5119', 'V1582', '78552', 'V4581', '4241', '9971', '42789', 'V4582', '7742', '5845', '2760', '5180', '45829', 'V5867', 'V502', 'other')


In [8]:
# now turn each subject into a k-hot vector
sid2khot = {} # map subject_id to k-hot vector

for sid in sid2icds.keys():
    _khot = np.zeros(N_LABELS)
    for _icd in sid2icds[sid]:
        if _icd in icds: 
            _khot[icds.index(_icd)] = 1
        else: # label 'other icds'
            _khot[-1] = 1
    sid2khot[sid] = _khot

print np.array( [sid2khot[i] for i in [2,3]] )


[[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  1.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]]


## prepare `X`: each note to be a (fixe-length) sequence of word ids

goal: generate a sid2seq dictionary

In [9]:
sids = []
# prepare text
texts = [] # text bodies
for fname in tqdm(os.listdir(NOTE_DATA_DIR)): # the data is 3.7G in size, can hold in memory...
    sids.append( int(fname[:-4]) )
    fpath = os.path.join(NOTE_DATA_DIR, fname)
    df = pd.read_csv(fpath)
    texts.append( '\n=======\n\n\n'.join(df['text']) )
print('found %d texts' % len(texts))

100%|██████████| 46146/46146 [01:45<00:00, 437.51it/s]

found 46146 texts





**Now vectorize the notes**

In [10]:
tokenizer = Tokenizer(nb_words=MAX_NB_WORDS, # filter out numbers, otherwise lots of numbers
                     filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n'+'0123456789') 
tokenizer.fit_on_texts(texts) # this is required before using `texts_to_sequences`, might take time
seqs = tokenizer.texts_to_sequences(texts) # turn article into list of ids
word_index = tokenizer.word_index # dictionary mapping words (str) to their index (int)

In [12]:
print 'found %s unique tokens, use most frequent %d of them'%(len(word_index), MAX_NB_WORDS)

found 371491 unique tokens, use most frequent 20000 of them


In [14]:
print sorted(word_index.items(), key=lambda (k,v): v)[:100] # TODO: remove stopwords

[('and', 1), ('to', 2), ('the', 3), ('of', 4), ('with', 5), ('for', 6), ('in', 7), ('no', 8), ('is', 9), ('on', 10), ('a', 11), ('was', 12), ('pt', 13), ('at', 14), ('am', 15), ('name', 16), ('mg', 17), ('s', 18), ('o', 19), ('left', 20), ('this', 21), ('as', 22), ('right', 23), ('ml', 24), ('patient', 25), ('pm', 26), ('p', 27), ('l', 28), ('there', 29), ('or', 30), ('not', 31), ('are', 32), ('last', 33), ('from', 34), ('w', 35), ('c', 36), ('chest', 37), ('plan', 38), ('normal', 39), ('po', 40), ('t', 41), ('reason', 42), ('hr', 43), ('clip', 44), ('pain', 45), ('be', 46), ('dl', 47), ('ct', 48), ('has', 49), ('d', 50), ('he', 51), ('continue', 52), ('blood', 53), ('cc', 54), ('assessment', 55), ('x', 56), ('location', 57), ('status', 58), ('by', 59), ('stable', 60), ('day', 61), ('but', 62), ('she', 63), ('year', 64), ('history', 65), ('noted', 66), ('hospital', 67), ('well', 68), ('contrast', 69), ('tube', 70), ('given', 71), ('will', 72), ('old', 73), ('iv', 74), ('examination', 7

In [19]:
seqs_padded = pad_sequences(seqs, maxlen=MAX_SEQ_LEN)
sid2seq = {}
for sid, seq in zip(sids,seqs_padded):
    sid2seq[sid] = seq

In [26]:
data, labels = [], []
for sid in sid2seq.keys():
    data.append(sid2seq[sid])
    labels.append(sid2khot[sid])

In [30]:
data=np.array(data)
labels = np.array(labels)

In [32]:
data.shape, labels.shape

((46146, 1000), (46146, 50))

## Split training and testing data

In [33]:
# split data
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data, labels = data[indices], labels[indices]
validset_sz = int(VALIDATION_SPLIT*data.shape[0])

X_train, Y_train = data[:-validset_sz], labels[:-validset_sz]
X_val, Y_val = data[-validset_sz:], labels[-validset_sz:]

## Prepare embedding matrix(vector of each wd in dictionary)

In [24]:
# build index mapping: map word to its vector

word2vec = {} # maps word ---> embedding vector
with open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt')) as f:
    for line in tqdm(f, total=400000):
        vals = line.split()
        word = vals[0]
        word2vec[word] = np.asarray(vals[1:], dtype='float')
print 'found %d word vectors.' % len(word2vec)

100%|██████████| 400000/400000 [00:08<00:00, 48848.56it/s]

found 400000 word vectors.





In [25]:
nb_words = min(MAX_NB_WORDS, len(word_index))
embedding_matrix = np.zeros( (nb_words+1, EMBEDDING_DIM) ) # +1 because ids in sequences starts from 1 ?
for word,wd_id in tqdm(word_index.items()): 
    if wd_id > MAX_NB_WORDS or word not in word2vec: # there might be 0 rows in embedding matrix
        continue # word_id>MAX_NB_WORDS, this id is not in the generated sequences, discard
    embedding_matrix[wd_id,:] = word2vec[word]

100%|██████████| 371491/371491 [00:00<00:00, 2127266.58it/s]


## Dump useful data to a file

In [34]:
# pickle things, for reuse, and reduce memory
data_to_pickle = {
    'embedding_matrix': embedding_matrix,
    'X_train': X_train,
    'Y_train': Y_train,
    'X_val': X_val,
    'Y_val': Y_val,
    'sid2khot': sid2khot,
    'sid2seq': sid2seq
}

In [35]:
pk.dump(data_to_pickle, open(PK_FPATH,'wb'), pk.HIGHEST_PROTOCOL)

In [36]:
# also dump a small version for quick check
data_to_pickle = {
    'embedding_matrix': embedding_matrix,
    'X_train': X_train[:1024],
    'Y_train': Y_train[:1024],
    'X_val': X_val[:128],
    'Y_val': Y_val[:128],
}
pk.dump(data_to_pickle, open('./diag_processed_data_small.pk','wb'), pk.HIGHEST_PROTOCOL)

In [34]:
# reset ipython env, to clear out useless objects in memory
%reset -f 

---

# 2. Model training

## Load data

In [1]:
import os,sys
import numpy as np
import pandas as pd
from tqdm import tqdm
import cPickle as pk
np.random.seed(1) # to be reproductive

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, Flatten
from keras.layers import Conv1D, MaxPooling1D, Embedding
from keras.layers import Conv2D, MaxPooling2D, Reshape
from keras.models import Sequential
from keras.models import load_model
from keras.utils.np_utils import to_categorical
from keras.callbacks import TensorBoard

Using TensorFlow backend.


In [2]:
# paths
NOTE_DATA_DIR = '/local/XW/DATA/MIMIC/noteevents_by_sid/'
ICD_FPATH = './subject_diag_icds.txt'
PK_FPATH = './diag_processed_data.pk' # './processed_data_small.pk'
MODEL_PATH = './models/'
LOG_PATH = './logs/'
# constants
N_LABELS = 50
K_ICDS_TOKEEP = N_LABELS - 1 # predict only on top K frequent icd codes
N_SUBJECTS = 41886
# word2vec configurations
GLOVE_DIR = '/local/XW/DATA/glove.6B/'
MAX_SEQ_LEN = 1000 # max length of input sequence (pad/truncate to fix length)
MAX_NB_WORDS = 20000 # top 20k most freq words
EMBEDDING_DIM = 100
# learning configurations
VALIDATION_SPLIT = 0.2
N_EPOCHS = 6
SZ_BATCH = 128

In [3]:
# load pickled data
pk_data = pk.load(open(PK_FPATH, 'rb'))
embedding_matrix = pk_data['embedding_matrix']
X_train, Y_train = pk_data['X_train'], pk_data['Y_train']
X_val, Y_val = pk_data['X_val'], pk_data['Y_val']
nb_words = MAX_NB_WORDS # forgot to pickle this number...

## Define evaluation metrics

**NB:** this metrics is the continus relaxation of what we really want, so the acc output during training is not precise.

In [4]:
import keras.backend as K

def relax_acc(y_true, y_pred): # shape: (None,N_LABELS)
    '''relaxed accuracy for the case when y_true is K-hot 
    if the predicted icd code is in the patient's icds, then it's good
    
    **note:**
    the y_pred is the softmax output, we need to make it into 1-hot encoding 
    * via K.round() -- doesn't work well , lots of 0s
    * by hand -- doesn't work either: 
    >InvalidArgumentError: You must feed a value for placeholder tensor 'embedding_input_4' with dtype int32
    
    ==> so the output is not the accuracy as we defined, but a *continus relaxation* version...
    '''
#     y_pred =K.round(y_pred) # doesn't work well, lots of 0s

#     onehot = np.zeros_like(y_pred)
#     onehot[:, K.eval(K.argmax(y_pred, axis=1))] = 1.0
#     import tensorflow as tf
#     y_pred = tf.constant(onehot)
    
#     idx = range(K.int_shape(y_pred)[0])
#     K.equal( y_true[idx,K.argmax(y_pred, axis=1)],
#             K.ones_like(idx) )
    y_int = y_pred * y_true # element-wise mul, intersection

    return K.mean( K.sum(y_int, axis=-1) )


In [5]:
# TODO: custom loss function suitable for multi-label senarios ??
# example: element-wise mul of softmax and y_true ? 

As the output metrics during training is not precise, define an `evaluate` function to calculate the metric on training and validation data. 

In [6]:
def evaluate_manual(model,X,Y):
    n_correct = 0
    for i,y_pred in enumerate(model.predict_classes(X)):
        y_true = Y[i,:]
        y_true = np.where(y_true==1)[0]
    #     print y_pred, y_true, ('correct' if y_pred in y_true else 'wrong')
        if y_pred in y_true: n_correct +=1 
    return '%d cases checked, %d correct, accuracy=%.9f' % (X.shape[0], n_correct, float(n_correct)/X.shape[0] )

def eval_using_relax_acc(model, X, Y):
    return '%.9f' % K.eval( relax_acc( K.variable(Y), 
                               K.variable((to_categorical(model.predict_classes(X)))) )  )

def evaluate_model(model):
    perf_train = evaluate_manual(model, X_train, Y_train)
    print 'relaxed accuracy on training set: %s' % perf_train
    
    perf_val = evaluate_manual(model, X_val, Y_val)
    print 'relaxed accuracy on validation set: %s' % perf_val
    
    

In [7]:
from keras.callbacks import Callback, EarlyStopping

class RelaxAccHistory(Callback): 
    'self-defined loss function, at the end of each epoch, run `evaluate_model` to get the true performance'
    def on_epoch_end(self, batch, logs={}):
        print ''
        evaluate_model(self.model)

In [8]:
# wraps up operations on models
def compile_fit_evaluate(model, quick_test=False, print_summary=True,
                         save_log=True, save_model=True, del_model=False):
    
    model.compile(loss='categorical_crossentropy',
             optimizer='rmsprop',
             metrics=[relax_acc])
    if print_summary:
        print model.summary()
        
    if quick_test: # use tiny data for quick test
        print '(quick test mode)'
        model.fit(X_train[:100], Y_train[:100], nb_epoch=1)
        return  
    
    _callbacks = [EarlyStopping(monitor='val_loss', patience=2)] #[RelaxAccHistory()]
    if save_log:
        logdir = os.path.join( LOG_PATH, str(model.name) )
        if not os.path.exists(logdir):
            os.makedirs(logdir)
        _callbacks.append(TensorBoard(log_dir=logdir))
        print 'run "tensorboard --logdir=%s" to launch tensorboard'%logdir
    
    model.fit( X_train, Y_train, 
              validation_data=(X_val, Y_val),
              nb_epoch=N_EPOCHS, batch_size=SZ_BATCH,
              callbacks=_callbacks )
    
    print 'evaluating model...'
    evaluate_model(model)
    
    if save_model: 
        model_fpath = os.path.join( MODEL_PATH, '%s.h5'% str(model.name) )
        model.save(model_fpath)
    
    if del_model:
        del model # delete the model to save memory

In [9]:
''' ***NOTE***
To load models from file, we have to modify metrics.py at: 
`/local/XW/SOFT/anaconda2/envs/thesis_nb/lib/python2.7/site-packages/keras` 
to add the `relax_acc` function, otherwise throws exception ! 

cf issue: https://github.com/fchollet/keras/issues/3911
'''
# m = load_model(os.path.sep.join([MODEL_PATH, 'model_1conv1d.h5']))

' ***NOTE***\nTo load models from file, we have to modify metrics.py at: \n`/local/XW/SOFT/anaconda2/envs/thesis_nb/lib/python2.7/site-packages/keras` \nto add the `relax_acc` function, otherwise throws exception ! \n\ncf issue: https://github.com/fchollet/keras/issues/3911\n'

## Try different models

In [10]:
flag_quick_test = 0 # set to False/0 to run on whole data

In [11]:
# with only 1 conv1d layer
model_1conv1d = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, weights=[embedding_matrix],
              input_length=MAX_SEQ_LEN, trainable=False # keep the embeddings fixed
             ),# embedding layer
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Flatten(),
            Dense(N_LABELS, activation='softmax') # candidate: sigmoid/tanh?
        ], 
        name='model_1conv1d')
compile_fit_evaluate(model_1conv1d, flag_quick_test)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_1 (Embedding)          (None, 1000, 100)     0           embedding_input_1[0][0]          
____________________________________________________________________________________________________
convolution1d_1 (Convolution1D)  (None, 996, 128)      64128       embedding_1[0][0]                
____________________________________________________________________________________________________
maxpooling1d_1 (MaxPooling1D)    (None, 199, 128)      0           convolution1d_1[0][0]            
____________________________________________________________________________________________________
flatten_1 (Flatten)              (None, 25472)         0           maxpooling1d_1[0][0]             
___________________________________________________________________________________________

In [12]:
# 2 conv1d layers
model_2conv1d = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, 
                  weights=[embedding_matrix],input_length=MAX_SEQ_LEN, trainable=False ),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Flatten(),
            Dense(N_LABELS, activation='softmax') ],
        name = 'model_2conv1d')
compile_fit_evaluate(model_2conv1d, flag_quick_test)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_2 (Embedding)          (None, 1000, 100)     0           embedding_input_2[0][0]          
____________________________________________________________________________________________________
convolution1d_2 (Convolution1D)  (None, 996, 128)      64128       embedding_2[0][0]                
____________________________________________________________________________________________________
maxpooling1d_2 (MaxPooling1D)    (None, 199, 128)      0           convolution1d_2[0][0]            
____________________________________________________________________________________________________
convolution1d_3 (Convolution1D)  (None, 195, 128)      82048       maxpooling1d_2[0][0]             
___________________________________________________________________________________________

In [13]:
# 3 conv1d layers 
model_3conv1d =Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, 
                  weights=[embedding_matrix],input_length=MAX_SEQ_LEN, trainable=False ),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Conv1D(128, 5, activation='relu'),
            MaxPooling1D(5),
            Flatten(),
            Dense(N_LABELS, activation='softmax') ],
        name = 'model_3conv1d')

compile_fit_evaluate(model_3conv1d, flag_quick_test)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_3 (Embedding)          (None, 1000, 100)     0           embedding_input_3[0][0]          
____________________________________________________________________________________________________
convolution1d_4 (Convolution1D)  (None, 996, 128)      64128       embedding_3[0][0]                
____________________________________________________________________________________________________
maxpooling1d_4 (MaxPooling1D)    (None, 199, 128)      0           convolution1d_4[0][0]            
____________________________________________________________________________________________________
convolution1d_5 (Convolution1D)  (None, 195, 128)      82048       maxpooling1d_4[0][0]             
___________________________________________________________________________________________

In [14]:
# 2d conv models
'''for 2d conv, the nb_filters cann't be too big: 
   128*MAX_SEQ_LEN*EMBEDDING_DIM is too much memory
   nb_filter = 64 is fine for 1 conv2d layer
'''
model_1conv2d = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, weights=[embedding_matrix],
              input_length=MAX_SEQ_LEN, trainable=False),
            Reshape( (MAX_SEQ_LEN, EMBEDDING_DIM, 1) ), # **need to manually reshape and add a channel**
            Conv2D(8, 5, 5, activation='relu' ), # , input_shape=(MAX_SEQ_LEN, EMBEDDING_DIM, 1)
            MaxPooling2D((10,10)),# need to downsample heavily to reduce parameters... 
            Flatten(),
            Dense(N_LABELS, activation='softmax') ],
        name = 'model_1conv2d')
# model_1conv2d.summary()
compile_fit_evaluate(model_1conv2d, flag_quick_test) 

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_4 (Embedding)          (None, 1000, 100)     0           embedding_input_4[0][0]          
____________________________________________________________________________________________________
reshape_1 (Reshape)              (None, 1000, 100, 1)  0           embedding_4[0][0]                
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 996, 96, 8)    208         reshape_1[0][0]                  
____________________________________________________________________________________________________
maxpooling2d_1 (MaxPooling2D)    (None, 99, 9, 8)      0           convolution2d_1[0][0]            
___________________________________________________________________________________________

In [15]:
model_2conv2d = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, weights=[embedding_matrix],
              input_length=MAX_SEQ_LEN, trainable=False),
            Reshape( (MAX_SEQ_LEN, EMBEDDING_DIM, 1) ), # **need to manually reshape and add a channel**
            Conv2D(32, 5, 5, activation='relu' ), # , input_shape=(MAX_SEQ_LEN, EMBEDDING_DIM, 1)
            MaxPooling2D((5,5)),
            Conv2D(8, 5, 5, activation='relu' ), 
            MaxPooling2D((2,2)),
            Flatten(),
            Dense(N_LABELS, activation='softmax') ],
        name = 'model_2conv2d')
compile_fit_evaluate(model_2conv2d, flag_quick_test)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_5 (Embedding)          (None, 1000, 100)     0           embedding_input_5[0][0]          
____________________________________________________________________________________________________
reshape_2 (Reshape)              (None, 1000, 100, 1)  0           embedding_5[0][0]                
____________________________________________________________________________________________________
convolution2d_2 (Convolution2D)  (None, 996, 96, 32)   832         reshape_2[0][0]                  
____________________________________________________________________________________________________
maxpooling2d_2 (MaxPooling2D)    (None, 199, 19, 32)   0           convolution2d_2[0][0]            
___________________________________________________________________________________________

Continue training the model of 2 conv2d layers..

In [16]:
model_2conv2d = load_model('./models/model_2conv2d.h5')

In [17]:
evaluate_manual(model_2conv2d, X_val, Y_val)

1312/9229 [===>..........................] - ETA: 135s

KeyboardInterrupt: 

In [18]:
model_2conv2d.fit( X_train, Y_train, 
              validation_data=(X_val, Y_val),
              nb_epoch=N_EPOCHS, batch_size=SZ_BATCH,
              callbacks=[TensorBoard(log_dir='logs/model_2conv2d/')] )

Train on 36917 samples, validate on 9229 samples
Epoch 1/6
  384/36917 [..............................] - ETA: 1729s - loss: 17.1296 - relax_acc: 0.3771

KeyboardInterrupt: 

---

In [19]:
model_3conv2d = Sequential(
        [ Embedding(input_dim=nb_words+1,output_dim=EMBEDDING_DIM, weights=[embedding_matrix],
              input_length=MAX_SEQ_LEN, trainable=False),
            Reshape( (MAX_SEQ_LEN, EMBEDDING_DIM, 1) ), # **need to manually reshape and add a channel**
            Conv2D(64, 5, 5, activation='relu' ), # , input_shape=(MAX_SEQ_LEN, EMBEDDING_DIM, 1)
            MaxPooling2D((5,5)),
            Conv2D(32, 5, 5, activation='relu' ), 
            MaxPooling2D((2,2)),
            Conv2D(8, 5, 5, activation='relu' ), 
            MaxPooling2D((2,2)),
            Flatten(),
            Dense(N_LABELS, activation='softmax') ],
        name='model_3conv2d')
print model_3conv2d.summary()
# maybe this is too slow to compute? estimated time: 100 * 30 * (N_EPOCH+1) ~= 9hours ...
# compile_fit_evaluate(model_3conv2d, flag_quick_test) 

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
embedding_6 (Embedding)          (None, 1000, 100)     0           embedding_input_7[0][0]          
____________________________________________________________________________________________________
reshape_3 (Reshape)              (None, 1000, 100, 1)  0           embedding_6[0][0]                
____________________________________________________________________________________________________
convolution2d_4 (Convolution2D)  (None, 996, 96, 64)   1664        reshape_3[0][0]                  
____________________________________________________________________________________________________
maxpooling2d_4 (MaxPooling2D)    (None, 199, 19, 64)   0           convolution2d_4[0][0]            
___________________________________________________________________________________________