# Setup

In [1]:
import json
import matplotlib.pyplot  as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Input, Softmax, RNN, Dense, Embedding, LSTM, Layer, Dropout 

In [3]:
tf.config.list_physical_devices()


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [4]:
# tf.debugging.set_log_device_placement(True)
gpus = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(gpus[0], 'GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

# Attention model

In [7]:
image_model =  tf.keras.applications.DenseNet121(weights='CheXNet_weights.h5', classes = 14, input_shape=(256,256,3))

In [8]:
base_chest_x_net = Model(image_model.input, image_model.layers[-2].output)

In [9]:
merged_df = np.load('df_new.npy',allow_pickle=True)

train_split = merged_df[:3000] 
test_split = merged_df[3000:]

In [10]:
train_split.shape

(3000, 6)

In [11]:
train_split = pd.DataFrame(train_split, columns = ['front_view', 'lateral_view', 'findings','dec_ip','dec_op','extracted_feature'])
test_split = pd.DataFrame(test_split, columns = ['front_view', 'lateral_view', 'findings','dec_ip','dec_op','extracted_feature'])

Tokennizing the finding columns to frequency space to easly compute

In [12]:

# Initialize Tokenizer with specified filters
tokn_obj = Tokenizer(filters='!"#$%&()*+,-/:;=?@[\\]^_`{|}~\t\n')

# Fit Tokenizer on the 'findings' column of the 'train' DataFrame
tokn_obj.fit_on_texts(train_split['findings'])

# Add padding token to the word index
padd_obje = '<pad>'
tokn_obj.word_index[padd_obje] = 0
tokn_obj.index_word[0] = padd_obje

# Calculate the vocabulary size
len_all_words = len(tokn_obj.word_index) + 1

# Print the vocabulary size
print('Vocab size -', len_all_words)

Vocab size - 1399


In [13]:
# Sequence in train and validation
decoder_tokn_inp = tokn_obj.texts_to_sequences(train_split.dec_ip)
decoder_tokn_opt = tokn_obj.texts_to_sequences(train_split.dec_op)

test_decoder_tokn_inp = tokn_obj.texts_to_sequences(test_split.dec_ip)
test_decoder_tokn_opt = tokn_obj.texts_to_sequences(test_split.dec_op)

# Padding in the train and validation
max_len = 100
final_decoder_input = keras.preprocessing.sequence.pad_sequences(decoder_tokn_inp, maxlen=max_len, padding='post')
final_decoder_output = keras.preprocessing.sequence.pad_sequences(decoder_tokn_opt, maxlen=max_len, padding='post') 

final_test_decoder_input = keras.preprocessing.sequence.pad_sequences(test_decoder_tokn_inp, maxlen=max_len, padding='post')
final_test_decoder_output = keras.preprocessing.sequence.pad_sequences(test_decoder_tokn_opt, maxlen=max_len, padding='post') 

print(final_decoder_input[100])

# Create two inverse dictionalries of token_index in list and index in token object for easy search queries
obj_place = {obj: place for obj, place in tokn_obj.word_index.items()}
place_obj = {place: obj for obj, place in obj_place.items()}



[  5  16   4  20  74  13   3   2  18  24   9  14  10 104  29  25   3  19
   7  21   4  34   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0]


In [14]:
type(train_split.extracted_feature)

pandas.core.series.Series

# Text encoder decoder model

In [15]:
len_all_words

1399

In [16]:
block_lstm = 256
batch = 64
Buffer_words = 500
latent_dim = 300
attentionblock = 64

In [17]:
class primary_E_block(Model):
    def __init__(self,block_lstm):
        super().__init__()
        self.layer1  = Dense(block_lstm, kernel_initializer="glorot_uniform",)
        
    def call(self,x):
      encoder_output = self.layer1(x)
      return encoder_output  

In [18]:
class Primary_D_block(Model):
    def __init__(self, len_all_words, latent_dim, block_lstm, attentionblock):
          
          super().__init__()

          self.block_lstm = block_lstm
          self.len_all_words = len_all_words
          self.latent_dim = latent_dim
          self.attentionblock = attentionblock
          
          self.jump_decode = jump_decode(self.len_all_words, self.latent_dim, self.block_lstm, self.attentionblock)

    # @tf.function
    def call(self, x, training=None):
        
        io_decode, e_block_op, decoder_hidden = x
        batch_op = tf.TensorArray(tf.float32,size = io_decode.shape[1])
        batch_list = []
        for window in tf.range(io_decode.shape[1]):
            y=io_decode[:,window:window+1]
            output,middle_st,weights,sentance_vector = self.jump_decode((y,e_block_op,decoder_hidden))
            # output = tf.round(output)
            # output = tf.cast(output, tf.int32)
            batch_op = batch_op.write(window,output)
            # batch_list.append((window,output))
            # batch_list.append(output)
        
        # batch_op = tf.unstack(batch_op)
        # batch_op = tf.stack(batch_op)
        # tensors_temp = tf.unstack(batch_op)
        # print(batch_op)
        
        transposed_tensors = [batch_op.read(i) for i in range(io_decode.shape[1])]
        batch_op = tf.stack(transposed_tensors)
        batch_op = tf.transpose(batch_op, perm=[1, 0, 2])
        
        return batch_op

In [19]:
class Attention_block_class(Model):
    def __init__(self,attentionblock):
        super().__init__()
        self.attentionblock = attentionblock
        # self.multi_headed_attention = tf.keras.layers.MultiHeadAttention(2,key_dim= attentionblock//2)
        self.layyer1=  Dense(self.attentionblock, kernel_initializer="glorot_uniform", name='atn1')
        self.layyer2 =  Dense(self.attentionblock, kernel_initializer="glorot_uniform", name='atn2')
        self.layyer3 = Dense(1, kernel_initializer="glorot_uniform", name = 'atn3')
    
    
    @tf.function
    def call(self, x):
        decoder_hidden, e_block_op = x
        decoder_hidden = tf.expand_dims(decoder_hidden,axis = 1)

        # e_block_op = tf.reshape(e_block_op, (tf.shape(e_block_op)[0], -1, 2, self.attentionblock // 2))
        
        sam_OP = self.layyer3(tf.nn.tanh(self.layyer1(decoder_hidden) + self.layyer2(e_block_op)))
        
        A_weights    = tf.nn.softmax(sam_OP, axis=1)
        word_vector = A_weights * e_block_op
        word_vector = tf.reduce_sum(word_vector, axis=1)
        
        return word_vector, A_weights

In [20]:
class jump_decode(Model):
  def __init__(self, len_all_words, latent_dim, block_lstm, attentionblock):
      super().__init__()
      
      self.len_all_words = len_all_words
      self.latent_dim = latent_dim
      self.block_lstm = block_lstm
      self.attentionblock = attentionblock
      
      self.layerd = Dense(self.len_all_words, kernel_initializer="glorot_uniform")
      self.attentio_block = Attention_block_class( self.attentionblock)
      self.dec_emb = Embedding(self.len_all_words, self.latent_dim, trainable = True , name = 'embedding')           
      self.dec_gru = GRU(self.block_lstm, return_state=True, return_sequences=True, name="DLSTM") 
      
      
      self.dp1 = Dropout(0.2,name = 'd1')
      self.dp2 = Dropout(0.2,name = 'd2')
      self.dp3 = Dropout(0.2,name = 'd3')
  
  @tf.function
  def call(self,x,training=None):
    
    io_decode, e_block_op, dec_hidden = x

    embedded_output = self.dec_emb(io_decode)
    embedded_output = self.dp1(embedded_output)
    
   
    word_vector, layer_weights = self.attentio_block([dec_hidden,e_block_op])

    comp_dec_ip = tf.concat([tf.expand_dims(word_vector, 1),embedded_output], -1)
    comp_dec_ip = self.dp2(comp_dec_ip)

    gru_op, gru_hidden = self.dec_gru(comp_dec_ip, initial_state=dec_hidden)
    
    gru_op = tf.reshape(gru_op, (-1, gru_op.shape[2]))
    gru_op = self.dp3(gru_op)

    output = self.layerd(gru_op)

    return output,gru_hidden,layer_weights,word_vector

In [49]:
class Combine_model(Model):

  def __init__(self, len_all_words, latent_dim, block_lstm, attentionblock, batch):
        super().__init__()

        self.len_all_words = len_all_words
        self.batch = batch
        self.block_lstm = block_lstm
        self.latent_dim = latent_dim
        self.attentionblock = attentionblock

        self.encoder = primary_E_block(self.block_lstm)
        self.decoder = Primary_D_block(len_all_words, latent_dim, block_lstm, attentionblock)
        self.layerd   = Dense(self.len_all_words, kernel_initializer="glorot_uniform", name = 'last_layer_dense')


  
  def call(self,X):
    
    inbase, outbase = X[0], X[1]

    encoder_middle = tf.zeros((self.batch, self.block_lstm))
      
    enc_output = self.encoder(inbase)
    print(enc_output.shape)
    comb_x = [outbase,enc_output,encoder_middle]
    output = self.decoder(comb_x)
    
    return output

In [50]:

hloss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

def Custom_loss(orig,pred):
  print(orig.shape,pred.shape)  
  flter = tf.math.logical_not(tf.math.equal(orig, 0))
  hloss_ans = hloss(orig, pred)
  flter = tf.cast(flter, dtype=hloss_ans.dtype)
  hloss_ans *= flter

  return tf.reduce_mean(hloss_ans)

In [51]:
#Reshaping the Image tensors for training
train_image_features = np.vstack(train_split.extracted_feature)
test_image_features = np.vstack(test_split.extracted_feature)

In [52]:
train_df = ((train_image_features, final_decoder_input), final_decoder_output)
test_df = ((test_image_features,final_test_decoder_input),final_test_decoder_output)

# problem area in the 

In [53]:
train_df = tf.data.Dataset.from_tensor_slices(((train_image_features, final_decoder_input), final_decoder_output))
train_df = train_df.shuffle(Buffer_words).batch(batch).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

test_df = tf.data.Dataset.from_tensor_slices(((test_image_features,final_test_decoder_input),final_test_decoder_output))
test_df = test_df.shuffle(Buffer_words).batch(batch).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

In [54]:
# len(train_df[0][1][0])

In [55]:
Attention_model = Combine_model(attentionblock=attentionblock,len_all_words =len_all_words, latent_dim=latent_dim,  batch=batch, block_lstm=block_lstm)

In [56]:
Attention_model.compile(optimizer=tf.keras.optimizers.Adam(0.001),loss=Custom_loss)

In [57]:
early_measure = tf.keras.callbacks.EarlyStopping(monitor='val_loss',  patience = 5, baseline=None, verbose = 1, restore_best_weights=True)
adaptive_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', min_lr=0.000001, factor=0.1, patience=5, mode = 'min',verbose = 1)


In [58]:
md_history = Attention_model.fit(train_df, validation_data=0.2, batch_size=64,epochs=100, callbacks=[early_measure,adaptive_lr], shuffle=True)

Epoch 1/100
(None, 256)
(None, 100) (64, 100, 1399)




(None, 256)
(None, 100) (64, 100, 1399)

InvalidArgumentError: Graph execution error:

Detected at node 'Custom_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert' defined at (most recent call last):
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\traitlets\config\application.py", line 1046, in launch_instance
      app.start()
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelapp.py", line 736, in start
      self.io_loop.start()
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\tornado\platform\asyncio.py", line 195, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 600, in run_forever
      self._run_once()
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 1896, in _run_once
      handle._run()
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 516, in dispatch_queue
      await self.process_one()
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 505, in process_one
      await dispatch(*args)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 412, in dispatch_shell
      await result
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 740, in execute_request
      reply_content = await reply_content
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\zmqshell.py", line 546, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3024, in run_cell
      result = self._run_cell(
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3079, in _run_cell
      result = runner(coro)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3284, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3466, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3526, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\k2lea\AppData\Local\Temp\ipykernel_15988\101232946.py", line 1, in <module>
      md_history = Attention_model.fit(train_df, validation_data=0.2, batch_size=64,epochs=100, callbacks=[early_measure,adaptive_lr], shuffle=True)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 994, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1052, in compute_loss
      return self.compiled_loss(
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\losses.py", line 152, in __call__
      losses = call_fn(y_true, y_pred)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\losses.py", line 272, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\k2lea\AppData\Local\Temp\ipykernel_15988\2361301951.py", line 6, in Custom_loss
      hloss_ans = hloss(orig, pred)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\losses.py", line 152, in __call__
      losses = call_fn(y_true, y_pred)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\losses.py", line 272, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\losses.py", line 2084, in sparse_categorical_crossentropy
      return backend.sparse_categorical_crossentropy(
    File "C:\Users\k2lea\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\backend.py", line 5630, in sparse_categorical_crossentropy
      res = tf.nn.sparse_softmax_cross_entropy_with_logits(
Node: 'Custom_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert'
assertion failed: [Condition x == y did not hold element-wise:] [x (Custom_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [56 100] [y (Custom_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [64 100]
	 [[{{node Custom_loss/sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert}}]] [Op:__inference_train_function_21075]