In [1]:
%reset -f
import tensorflow as tf
sess = tf.Session()
import keras
from keras import backend as K
K.set_session(sess)

from tensorflow.python.client import device_lib
print('Available devices',[dvc.name for dvc in device_lib.list_local_devices()])

Using TensorFlow backend.


Available devices ['/cpu:0', '/gpu:0']


## Preprocess the Dataset

In [2]:
import numpy as np
import utils as data_utils
X,Y=data_utils.read_dataset('processed.csv')
X_vocab,Y_vocab=data_utils.get_vocab(X,Y)

# Invert the vocabulary dict for output mapping
X_inv={idx:word_string for word_string,idx in X_vocab.items()}
Y_inv={idx:word_string for word_string,idx in Y_vocab.items()}
newX,newY=data_utils.encode_dataset(X,Y,X_vocab,Y_vocab)

print('Sample sentence \n',newX[0])
print('Sample output \n',newY[0])
padded_X=keras.preprocessing.sequence.pad_sequences(newX,maxlen=50)
padded_Y=keras.preprocessing.sequence.pad_sequences(newY,maxlen=50)
padded_Y=np.identity(len(Y_vocab))[padded_Y]
print('X shape',padded_X.shape)
print('Y shape',padded_Y.shape)

Sample sentence 
 [7641, 3026, 1943, 8199, 11619, 5306, 3595, 972, 4611, 1, 1, 1211, 1, 7267, 5499, 7626, 4375, 1122, 3595, 8534, 6681, 6775, 4034, 16122, 464, 12445, 1420, 4375, 3595, 2888, 2101, 8244, 6681, 13281, 2789, 3791, 1, 13189, 3026, 15585, 5306, 1, 1211, 15271, 4611]
Sample output 
 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
X shape (40163, 50)
Y shape (40163, 50, 3)


## Prepare Train and Test Splits

In [3]:
# Shuffle the dataset order
from sklearn.utils import shuffle
padded_X,padded_Y= shuffle(padded_X,padded_Y)

# Use 10% of the dataset for test samples
datasize=padded_X.shape[0]
trainsize=(datasize//10)*9

#Split the dataset
X_train,Y_train=padded_X[:trainsize],padded_Y[:trainsize]
X_test,Y_test=padded_X[trainsize:],padded_Y[trainsize:]
print('X shape',X_train.shape)
print('Y shape',Y_train.shape)
print(X_train[10])

X shape (36144, 50)
Y shape (36144, 50, 3)
[    0     0     0     0     0     0     0     0     0     0     0  2264
  3026 10407 11974  7315  6572  9112  3595 15452  6681 14643  4375  7007
  8686  2981 15415  7276  1420  6350 13677 11829  2981   266 11785  9671
   549  1420  2981    12  9385  2981     1  9112  6775  2139 10422  6681
  7974  4611]


## Define a Single Layer RNN Sequence Labeler

In [4]:
#Import relevant modules from Keras
from keras.models import Sequential
from keras.layers import TimeDistributed,Bidirectional
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Embedding
from keras.layers import LSTM,GRU,Activation

#Define the model
model=Sequential()
model.add(Embedding(len(X_vocab),200,mask_zero=True))
model.add(LSTM(100,return_sequences=True)) # Can be changed to Bidirectional(LSTM())
model.add(Activation('relu')) 
model.add(keras.layers.BatchNormalization())
model.add(Dropout(0.25))
model.add(TimeDistributed(Dense(len(Y_vocab),activation='softmax')))
adagrad_optimizer=keras.optimizers.Adagrad(lr=0.1)
model.compile(loss='categorical_crossentropy',optimizer=adagrad_optimizer,metrics=['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 200)         3259400   
_________________________________________________________________
lstm_1 (LSTM)                (None, None, 100)         120400    
_________________________________________________________________
activation_1 (Activation)    (None, None, 100)         0         
_________________________________________________________________
batch_normalization_1 (Batch (None, None, 100)         400       
_________________________________________________________________
dropout_1 (Dropout)          (None, None, 100)         0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, None, 3)           303       
Total params: 3,380,503
Trainable params: 3,380,303
Non-trainable params: 200
________________________________________________________________

## Start the Training
One epoch is one entire dataset iteration or (Dataset Size)/(batch size)

In [5]:
model.fit(X_train,Y_train,epochs=1,batch_size=128,verbose=2,validation_split=0.1)

# Early Stopping
#mycallback=keras.callbacks.EarlyStopping(monitor='val_loss', patience =3)
#model.fit(X_train,Y_train,epochs=10,batch_size=128,verbose=2,validation_split=0.1,callbacks =[mycallback])

Train on 32529 samples, validate on 3615 samples
Epoch 1/1
38s - loss: 0.2827 - acc: 0.8966 - val_loss: 0.3097 - val_acc: 0.8985


<keras.callbacks.History at 0x7f9c3ec44a58>

## Evaluate Accuracy

In [6]:
score,accuracy=model.evaluate(X_test,Y_test)
print("Accuracy",accuracy)

Accuracy 0.896558438376


## Examine predictions

In [7]:
outp=model.predict(X_test)

In [8]:
def get_predictions(x,y,X_inv,Y_inv):
    input_words=[X_inv[word] for word in x]
    output_labels=np.argmax(outp[1],axis=1)
    output_labels=[Y_inv[label] for label in output_labels.tolist()]
    print(list(zip(input_words,output_labels)))
    
get_predictions(X_test[1],outp[1],X_inv,Y_inv)

[('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('__MASK__', 'clnd'), ('In', 'Outside'), ('brachial', 'Outside'), ('plexus', 'dsyn'), ('neuritis', 'dsyn'), (',', 'Outside'), ('conservative', 'Outside'), ('management', 'Outside'), ('m

## CNN-LSTM Model definition

In [None]:
from keras.layers import Conv1D
model=Sequential()
model.add(Embedding(len(X_vocab),200)) # Cannot use Mask because of CNN layer. An explicit mask can be passed to one of the later layers.
model.add(Conv1D(100,5,strides=1,padding='same'))
model.add(Activation('relu'))
model.add(GRU(50,return_sequences=True))
model.add(Activation('relu'))
model.add(keras.layers.BatchNormalization())
model.add(Dropout(0.25))
model.add(TimeDistributed(Dense(len(Y_vocab),activation='softmax')))
adagrad_optimizer=keras.optimizers.Adagrad(lr=0.1)
model.compile(loss='categorical_crossentropy',optimizer=adagrad_optimizer,metrics=['accuracy'])
print(model.summary())