In [1]:
from __future__ import print_function
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding
import numpy as np
from data_utils import Utility

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
batch_size = 128  # Batch size for training.
epochs = 100  # Number of epochs to train for.
latent_dim = 256  # Latent dimensionality of the encoding space.
num_samples = 10000  # Number of samples to train on.


In [9]:
# VQA_MED
val_images, val_questions, val_answers = Utility.read_dataset("Valid")
train_images, train_questions, train_answers = Utility.read_dataset("Train")

Valid data size= 500
Train data size= 5413


In [15]:
import pandas as pd
lines = pd.DataFrame({'eng':train_questions, 'fr':train_answers})
lines.sample(10)

Unnamed: 0,eng,fr
368,what shows fluid filled appendix with peri cae...,ct image
2547,what does the mass show ?,combination of fat calcification and soft tissue
2598,what shows the involvement of the right iliac ...,axial mri
1154,what is the post operative ct scan of the head...,the complete resection of the metastatic lesio...
2667,what show contrast enhancement in both joint s...,post contrast images
727,what shows diffuse ground glass opacities ?,ct thorax
4335,what shows a lesion with peripheral ossificati...,ct of the thigh
5179,what does multiplanar ct reconstruction and su...,vessels
3542,what does axial mri image show intense enhance...,of the mass
1022,what does sagittal mri image show the severe c...,of the cord


In [18]:
# Data Clean
#import string
#exclude = set(string.punctuation)
#lines.eng=lines.eng.apply(lambda x: ''.join(ch for ch in x if ch not in exclude))
#lines.fr=lines.fr.apply(lambda x: ''.join(ch for ch in x if ch not in exclude))

lines.fr = lines.fr.apply(lambda x : 'START_ '+ x + ' _END')


In [19]:
lines.head()

Unnamed: 0,eng,fr
0,what does mri show ?,START_ lesion at tail of pancreas _END
1,where does axial section mri abdomen show hypo...,START_ in distal pancreas _END
2,what do the arrows denote in the noncontrast c...,START_ complex fluid collection with layering ...
3,what was normal ?,START_ blood supply to the brain _END
4,what shows evidence of a contained rupture ?,START_ repeat ct scan of the abdomen _END


In [20]:
all_eng_words=set()
for eng in lines.eng:
    for word in eng.split():
        if word not in all_eng_words:
            all_eng_words.add(word)
    
all_french_words=set()
for fr in lines.fr:
    for word in fr.split():
        if word not in all_french_words:
            all_french_words.add(word)

In [21]:
len(all_eng_words), len(all_french_words)

(3332, 3270)

In [22]:
# Answers
lenght_list=[]
for l in lines.fr:
    lenght_list.append(len(l.split(' ')))
np.max(lenght_list)

28

In [23]:
# Questions
lenght_list=[]
for l in lines.eng:
    lenght_list.append(len(l.split(' ')))
np.max(lenght_list)

29

In [24]:
input_words = sorted(list(all_eng_words))
target_words = sorted(list(all_french_words))
num_encoder_tokens = len(all_eng_words)
num_decoder_tokens = len(all_french_words)
# del all_eng_words, all_french_words

In [28]:
input_token_index = dict(
    [(word, i) for i, word in enumerate(input_words)])
target_token_index = dict(
    [(word, i) for i, word in enumerate(target_words)])

In [29]:
len(lines.fr)*16*num_decoder_tokens

283208160

In [30]:
encoder_input_data = np.zeros(
    (len(lines.eng), 29),
    dtype='float32')
decoder_input_data = np.zeros(
    (len(lines.fr), 28),
    dtype='float32')
decoder_target_data = np.zeros(
    (len(lines.fr), 28, num_decoder_tokens),
    dtype='float32')

In [31]:
for i, (input_text, target_text) in enumerate(zip(lines.eng, lines.fr)):
    for t, word in enumerate(input_text.split()):
        encoder_input_data[i, t] = input_token_index[word]
    for t, word in enumerate(target_text.split()):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_input_data[i, t] = target_token_index[word]
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_target_data[i, t - 1, target_token_index[word]] = 1.

In [32]:
embedding_size = 50
from keras.layers import Input, LSTM, Embedding, Dense
from keras.models import Model
from keras.utils import plot_model

In [33]:
# Encoder model
encoder_inputs = Input(shape=(None,))
en_x=  Embedding(num_encoder_tokens, embedding_size)(encoder_inputs)
encoder = LSTM(50, return_state=True)
encoder_outputs, state_h, state_c = encoder(en_x)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]

In [34]:
# Decoder model
decoder_inputs = Input(shape=(None,))

dex=  Embedding(num_decoder_tokens, embedding_size)

final_dex= dex(decoder_inputs)


decoder_lstm = LSTM(50, return_sequences=True, return_state=True)

decoder_outputs, _, _ = decoder_lstm(final_dex,
                                     initial_state=encoder_states)

decoder_dense = Dense(num_decoder_tokens, activation='softmax')

decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])

In [35]:
model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, None)         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, None, 50)     166600      input_1[0][0]                    
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, None, 50)     163500      input_2[0][0]                    
__________________________________________________________________________________________________
lstm_1 (LS

In [36]:
model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=128,
          epochs=5,
          validation_split=0)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f907398bf60>

In [37]:
encoder_model = Model(encoder_inputs, encoder_states)
encoder_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None)              0         
_________________________________________________________________
embedding_1 (Embedding)      (None, None, 50)          166600    
_________________________________________________________________
lstm_1 (LSTM)                [(None, 50), (None, 50),  20200     
Total params: 186,800
Trainable params: 186,800
Non-trainable params: 0
_________________________________________________________________


In [38]:
decoder_state_input_h = Input(shape=(50,))
decoder_state_input_c = Input(shape=(50,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

final_dex2= dex(decoder_inputs)

decoder_outputs2, state_h2, state_c2 = decoder_lstm(final_dex2, initial_state=decoder_states_inputs)
decoder_states2 = [state_h2, state_c2]
decoder_outputs2 = decoder_dense(decoder_outputs2)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs2] + decoder_states2)

# Reverse-lookup token index to decode sequences back to
# something readable.
reverse_input_char_index = dict(
    (i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict(
    (i, char) for char, i in target_token_index.items())

In [39]:
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0] = target_token_index['START_']

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += ' '+sampled_char

        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_char == '_END' or
           len(decoded_sentence) > 52):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index

        # Update states
        states_value = [h, c]

    return decoded_sentence

In [44]:
print(lines.eng[0:1])

0    what does mri show ?
Name: eng, dtype: object


In [45]:
for seq_index in range(10):
    input_seq = encoder_input_data[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print('-')
    print('Input sentence:', lines.eng[seq_index])
    print('Actual sentence:', lines.fr[seq_index])
    print('Decoded sentence:', decoded_sentence)
    
    

-
Input sentence: what does mri show ?
Actual sentence: START_ lesion at tail of pancreas _END
Decoded sentence:  the the _END
-
Input sentence: where does axial section mri abdomen show hypoechoic mass ?
Actual sentence: START_ in distal pancreas _END
Decoded sentence:  the _END
-
Input sentence: what do the arrows denote in the noncontrast ct scan image of pelvis ?
Actual sentence: START_ complex fluid collection with layering consistent with hematoma _END
Decoded sentence:  the the _END
-
Input sentence: what was normal ?
Actual sentence: START_ blood supply to the brain _END
Decoded sentence:  the the the _END
-
Input sentence: what shows evidence of a contained rupture ?
Actual sentence: START_ repeat ct scan of the abdomen _END
Decoded sentence:  the the the _END
-
Input sentence: what does preoperative ct demonstrate ?
Actual sentence: START_ severe loosening of the pedicle screws at l3 and l5 _END
Decoded sentence:  the the the _END
-
Input sentence: what does the axial contrac

In [3]:

questions_vocab = set()
answers_vocab = set()

for i in range(len(train_answers)):
    train_answers[i] = "<go> "+train_answers[i]+" <eos>"
    
for q,a in zip(val_questions, val_answers):
    for w in q.split():
        if w not in questions_vocab:
            questions_vocab.add(w)
    for w in a.split():
        if w not in answers_vocab:
            answers_vocab.add(w)
            

for q,a in zip(train_questions, train_answers):
    for w in q.split():
        if w not in questions_vocab:
            questions_vocab.add(w)
    for w in a.split():
        if w not in answers_vocab:
            answers_vocab.add(w)
            


Valid data size= 500
Train data size= 5413


In [6]:
train_answers[2]

'<go> complex fluid collection with layering consistent with hematoma <eos>'

In [7]:
#VQA-MED

input_characters = sorted(list(questions_vocab))
target_characters = sorted(list(answers_vocab))
num_encoder_tokens = len(questions_vocab)
num_decoder_tokens = len(answers_vocab)
max_encoder_seq_length = max([len(txt.split()) for txt in train_questions])
max_decoder_seq_length = max([len(txt.split()) for txt in train_answers])

In [8]:
# input_words = sorted(list(input_characters))
# target_words = sorted(list(target_characters))
# num_encoder_tokens = len(input_characters)
# num_decoder_tokens = len(target_characters)
# max_encoder_seq_length = max([len(txt) for txt in input_texts])
# max_decoder_seq_length = max([len(txt) for txt in target_texts])

In [9]:
print('Number of samples:', len(train_answers))
print('Number of unique input tokens:', num_encoder_tokens)
print('Number of unique output tokens:', num_decoder_tokens)
print('Max sequence length for inputs:', max_encoder_seq_length)
print('Max sequence length for outputs:', max_decoder_seq_length)


Number of samples: 5413
Number of unique input tokens: 3400
Number of unique output tokens: 3381
Max sequence length for inputs: 29
Max sequence length for outputs: 28


In [99]:
input_token_index = dict(
    [(char, i) for i, char in enumerate(questions_vocab)])
target_token_index = dict(
    [(char, i) for i, char in enumerate(answers_vocab)])


In [11]:
ques_wor2tok = dict(
    [(char, i) for i, char in enumerate(questions_vocab)])
ans_wor2tok = dict(
    [(char, i) for i, char in enumerate(answers_vocab)])


ques_tok2wor = dict(
    (i, char) for char, i in ques_wor2tok.items())
ans_tok2wor = dict(
    (i, char) for char, i in ans_wor2tok.items())


In [44]:
ques_tok2wor[3400] = '<pad>'
ans_tok2wor[3381] = '<pad>'

ques_wor2tok['<pad>'] = 3400
ans_wor2tok['<pad>'] = 3381

In [43]:
ques_wor2tok["<pad>"]

KeyError: '<pad>'

In [97]:
encoder_input_data = np.zeros(
    (len(train_answers), max_encoder_seq_length, num_encoder_tokens),
    dtype='float32')
decoder_input_data = np.zeros(
    (len(train_answers), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')
decoder_target_data = np.zeros(
    (len(train_answers), max_decoder_seq_length, num_decoder_tokens),
    dtype='float32')

In [16]:
encoder_input_data = np.zeros(
    (len(train_answers), max_encoder_seq_length),
    dtype='float32')
decoder_input_data = np.zeros(
    (len(train_answers), max_decoder_seq_length),
    dtype='float32')
decoder_target_data = np.zeros(
    (len(train_answers), max_decoder_seq_length),
    dtype='float32')

In [74]:
ei = []
di = []
dt = []
for ques, ans in zip(train_questions, train_answers):
    ques_token = [ques_wor2tok[word] for word in ques.strip().split()]
    ei.append(ques_token + [ques_wor2tok["<pad>"]]* (max_encoder_seq_length - len(ques_token)))
    
    ans_token = [ans_wor2tok[word] for word in ans.strip().split()]
    di.append(ans_token + [ans_wor2tok["<pad>"]]* (max_decoder_seq_length - len(ans_token)))
    
    ans_token_dt = ans_token[1:]
    dt.append(ans_token_dt + [ans_wor2tok["<pad>"]]* (max_decoder_seq_length - len(ans_token_dt)))
    
    

In [80]:
ans_tok2wor[di[0][1]]

'lesion'

In [None]:
# for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
#     for t, char in enumerate(input_text):
#         encoder_input_data[i, t, input_token_index[char]] = 1.
#     for t, char in enumerate(target_text):
#         # decoder_target_data is ahead of decoder_input_data by one timestep
#         decoder_input_data[i, t, target_token_index[char]] = 1.
#         if t > 0:
#             # decoder_target_data will be ahead by one timestep
#             # and will not include the start character.
#             decoder_target_data[i, t - 1, target_token_index[char]] = 1.

In [100]:
for i, (ques, ans) in enumerate(zip(train_questions, train_answers)):
    for t, char in enumerate(ques.split()):
        encoder_input_data[i, t, input_token_index[char]] = 1.
    for t, char in enumerate(ans.split()):
        # decoder_target_data is ahead of decoder_input_data by one timestep
        decoder_input_data[i, t, target_token_index[char]] = 1.
        if t > 0:
            # decoder_target_data will be ahead by one timestep
            # and will not include the start character.
            decoder_target_data[i, t - 1, target_token_index[char]] = 1.

In [93]:
len(ei[0])

TypeError: object of type 'int' has no len()

In [84]:
#with tf.device("/gpu:0"):
# Define an input sequence and process it.
encoder_inputs = Input(shape=(None,))
x = Embedding(num_encoder_tokens, latent_dim)(encoder_inputs)
x, state_h, state_c = LSTM(latent_dim,
                           return_state=True)(x)
encoder_states = [state_h, state_c]

#encoder = LSTM(latent_dim, return_state=True)
#    encoder_outputs, state_h, state_c = encoder(encoder_inputs)
# We discard `encoder_outputs` and only keep the states.
#encoder_states = [state_h, state_c]

In [105]:
# Define an input sequence and process it.
encoder_inputs = Input(shape=(None,))
x = Embedding(num_encoder_tokens, latent_dim)(encoder_inputs)
x, state_h, state_c = LSTM(latent_dim,
                           return_state=True)(x)
encoder_states = [state_h, state_c]

# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None,))
x = Embedding(num_decoder_tokens, latent_dim)(decoder_inputs)
x = LSTM(latent_dim, return_sequences=True)(x, initial_state=encoder_states)
decoder_outputs = Dense(num_decoder_tokens, activation='softmax')(x)

# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)



In [85]:
# with tf.device("/gpu:1"):
# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None,))
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the
# return states in the training model, but we will use them in inference.
x = Embedding(num_decoder_tokens, latent_dim)(decoder_inputs)
x = LSTM(latent_dim, return_sequences=True)(x, initial_state=encoder_states)
decoder_outputs = Dense(num_decoder_tokens, activation='softmax')(x)




# decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
# decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
#                                  initial_state=encoder_states)
# decoder_dense = Dense(num_decoder_tokens, activation='softmax')
# decoder_outputs = decoder_dense(decoder_outputs)

In [86]:
# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
from keras.utils.training_utils import multi_gpu_model

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"

In [87]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['mae'])
model.fit([np.array(ei), np.array(di)], np.array(dt), batch_size=batch_size, epochs=1)

ValueError: Error when checking target: expected dense_6 to have 3 dimensions, but got array with shape (5413, 28)

In [117]:
x

<tf.Tensor 'lstm_25/transpose_1:0' shape=(?, ?, 256) dtype=float32>

In [None]:
model.save('s2s.h5')

In [119]:
# Next: inference mode (sampling).
# Here's the drill:
# 1) encode input and retrieve initial decoder state
# 2) run one step of decoder with this initial state
# and a "start of sequence" token as target.
# Output will be the next target token
# 3) Repeat with the current target token and current states

# Define sampling models
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(None, None, 256))
decoder_state_input_c = Input(shape=(None, None, 256))

decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

ValueError: Layer lstm_2 expects 19 inputs, but it received 3 input tensors. Input received: [<tf.Tensor 'input_32:0' shape=(?, ?) dtype=float32>, <tf.Tensor 'input_45:0' shape=(?, ?, ?, 256) dtype=float32>, <tf.Tensor 'input_46:0' shape=(?, ?, ?, 256) dtype=float32>]

In [None]:

# Reverse-lookup token index to decode sequences back to
# something readable.
reverse_input_char_index = dict(
    (i, char) for char, i in input_token_index.items())
reverse_target_char_index = dict(
    (i, char) for char, i in target_token_index.items())


In [69]:
def decode_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, num_decoder_tokens))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0, target_token_index['<go>']] = 1.

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = reverse_target_char_index[sampled_token_index]
        decoded_sentence += sampled_char+" "

        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_char == '<eos>' or
           len(decoded_sentence.split()) > max_decoder_seq_length):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.

        # Update states
        states_value = [h, c]

    return decoded_sentence



In [70]:
from bleu import moses_multi_bleu
bleus = moses_multi_bleu(["Hello world Hello world"], ["Hello world Hello world"])

/tmp/tmp624pcnsr


In [81]:
epochs = 10
# Run training
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['mae'])

for i in range(epochs):
    print("Epoch",str(i),", out of", str(epochs))
    model.fit([ei, di], dt,
          batch_size=batch_size,
          epochs=1)
    
    if (i+1)%5 ==0:
        print("Testing ...")
        ref = []
        pred = []
        for seq_index in range(10):
            
            input_seq = encoder_input_data[seq_index: seq_index + 1]
            decoded_sentence = decode_sequence(input_seq)
            
            reference = train_answers[seq_index].replace("<eos>", "")
            reference = reference.replace("<go>", "")
            ref.append(reference.strip())
            
            predicted = decoded_sentence.replace("<eos>", "")
            predicted = predicted.replace("<go>", "")
            pred.append(predicted.strip())
            
            if seq_index <=10:
                print('\n-')
                print('Input Question  :', train_questions[seq_index])
                print('Traget Answer   :', reference)
                print('Predicted Answer:', predicted)
                
            
        bleus = moses_multi_bleu(ref, pred)
        print("Bleu:", str(bleus))

Epoch 0 , out of 10


AttributeError: 'list' object has no attribute 'ndim'

In [None]:
max_decoder_seq_length