In [1]:
import tensorflow as tf
from tensorflow.keras.applications import densenet
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.layers import Dense, LSTM, Input, Embedding, Conv2D, Concatenate, Flatten, Add, Dropout, GRU
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import os
import cv2
import tensorflow as tf
import re
import pickle
from PIL import Image
from skimage.transform import resize
import warnings

In [2]:
def load_image(img_name):
    image = Image.open(img_name)
    X = np.asarray(image.convert("RGB"))
    X = np.asarray(X)
    X = preprocess_input(X)
    X = resize(X, (224,224,3))
    X = np.expand_dims(X, axis=0)
    X = np.asarray(X)
    
    return X

In [3]:
chex = densenet.DenseNet121(include_top=False, weights = None, input_shape=(224,224,3), pooling="avg")

In [4]:
X = chex.output
X = Dense(14, activation="sigmoid", name="predictions")(X)
model = Model(inputs=chex.input, outputs=X)
model.load_weights(r'./utils/Image_features_enc.h5')
imgModel = Model(inputs = model.input, outputs = model.layers[-2].output)

In [5]:
# loading
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

In [6]:
type(tokenizer)

keras_preprocessing.text.Tokenizer

In [7]:
# tokenizer.index_word

In [8]:
vocab_size = len(tokenizer.word_counts) + 1
f = open('Image_features_attention.pickle','rb') # contains the features from chexNet
Xnet_Features = pickle.load(f)
f.close()

In [9]:
with open(r"embedding_matrix.pickle", "rb") as output_file:
    embedding_matrix = pickle.load(output_file)

In [10]:
input1 = Input(shape=(2048), name='Image_1')
dense1 = Dense(256, kernel_initializer=tf.keras.initializers.glorot_uniform(seed = 56), name='dense_encoder')(input1)

input2 = Input(shape=(153), name='Text_Input')
emb_layer = Embedding(input_dim = vocab_size, output_dim = 300, input_length=153, mask_zero=True, trainable=False, 
                weights=[embedding_matrix], name="Embedding_layer")
emb = emb_layer(input2)

LSTM1 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True, 
            kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23),
            recurrent_initializer=tf.keras.initializers.orthogonal(seed=7),
            bias_initializer=tf.keras.initializers.zeros(), return_sequences=True, name="LSTM1")(emb)
#LSTM1_output = LSTM1(emb)

LSTM2 = LSTM(units=256, activation='tanh', recurrent_activation='sigmoid', use_bias=True, 
            kernel_initializer=tf.keras.initializers.glorot_uniform(seed=23),
            recurrent_initializer=tf.keras.initializers.orthogonal(seed=7),
            bias_initializer=tf.keras.initializers.zeros(), name="LSTM2")
LSTM2_output = LSTM2(LSTM1)

dropout1 = Dropout(0.5, name='dropout1')(LSTM2_output)

dec =  tf.keras.layers.Add()([dense1, dropout1])

fc1 = Dense(256, activation='relu', kernel_initializer=tf.keras.initializers.he_normal(seed = 63), name='fc1')
fc1_output = fc1(dec)
dropout2 = Dropout(0.4, name='dropout2')(fc1_output)
output_layer = Dense(vocab_size, activation='softmax', name='Output_layer')
output = output_layer(dropout2)

encoder_decoder = Model(inputs = [input1, input2], outputs = output)
encoder_decoder.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Text_Input (InputLayer)         [(None, 153)]        0                                            
__________________________________________________________________________________________________
Embedding_layer (Embedding)     (None, 153, 300)     393900      Text_Input[0][0]                 
__________________________________________________________________________________________________
LSTM1 (LSTM)                    (None, 153, 256)     570368      Embedding_layer[0][0]            
__________________________________________________________________________________________________
Image_1 (InputLayer)            [(None, 2048)]       0                                            
____________________________________________________________________________________________

In [11]:
encoder_decoder.load_weights(r"./Weights_re/encoder_decoder_epoch_20.h5")

In [12]:
# encoder
encoder_input = encoder_decoder.input[0]
encoder_output = encoder_decoder.get_layer('dense_encoder').output
encoder_model = Model(encoder_input, encoder_output)


# decoder
text_input = encoder_decoder.input[1]
enc_output = Input(shape=(256,), name='Enc_Output')
text_output = encoder_decoder.get_layer('LSTM2').output
add1 = tf.keras.layers.Add()([text_output, enc_output])
fc_1 = fc1(add1)
decoder_output = output_layer(fc_1)
decoder_model = Model(inputs = [text_input, enc_output], outputs = decoder_output)

In [13]:
def beamsearch(image, beam_width):
    
    start = [tokenizer.word_index['startseq']]

    sequences = [[start, 0]]
    
    img_features = image
    img_features = encoder_model.predict(img_features)
    finished_seq = []
    
    for i in range(153):
        all_candidates = []
        new_seq = []
        for s in sequences:

            text_input = pad_sequences([s[0]], 153, padding='post')
            predictions = decoder_model.predict([text_input,img_features])
            top_words = np.argsort(predictions[0])[-beam_width:]
            seq, score = s
            
            for t in top_words:
                candidates = [seq + [t], score - np.log(predictions[0][t])]
                all_candidates.append(candidates)
                
        sequences = sorted(all_candidates, key = lambda l: l[1])[:beam_width]
        # checks for 'endseq' in each seq in the beam
        count = 0
        for seq,score in sequences:
            if seq[len(seq)-1] == tokenizer.word_index['endseq']:
                score = score/len(seq)   # normalized
                finished_seq.append([seq, score])
                count+=1
            else:
                new_seq.append([seq, score])
        beam_width -= count
        sequences = new_seq
        
        # if all the sequences reaches its end before 155 timesteps
        if not sequences:
            break
        else:
            continue
        
    sequences = finished_seq[-1] 
    rep = sequences[0]
    score = sequences[1]
    temp = []
    rep.pop(0)
    for word in rep:
        if word != tokenizer.word_index['endseq']:
            temp.append(tokenizer.index_word[word])
        else:
            break    
    rep = ' '.join(e for e in temp)        
    
    return rep, score

In [14]:
def greedysearch(img):
    image = img # Extract the initial chexnet features for the images
    input_ = 'startseq'  # initial partial report
    image_features = encoder_model.predict(image) # encoder output
    
    result = [] 
    for i in range(153):
        input_tok = [tokenizer.word_index[w] for w in input_.split()]
        input_padded = pad_sequences([input_tok], 153, padding='post')
        predictions = decoder_model.predict([input_padded, image_features])
        arg = np.argmax(predictions)
        if arg != tokenizer.word_index['endseq']:   # endseq
            result.append(tokenizer.index_word[arg])
            input_ = input_ + ' ' + tokenizer.index_word[arg]
        else:
            break
    rep = ' '.join(e for e in result)
    return rep

In [15]:
def predict(img1, img2):
    img1 = load_image(img1)
    img1_features = imgModel.predict(img1)
    img2 = load_image(img2)
    img2_features = imgModel.predict(img2)
    image_input = np.concatenate((img1_features, img2_features), axis=1)
    # print(image_input.shape)
    y_pred = beamsearch(image_input, 5)
    print(y_pred)
    y_pred = greedysearch(image_input)
    print(y_pred)

    

In [16]:
predict(r"C:\Users\prana\OneDrive\Desktop\ProjectsInProgress\MDP_Xray\imgs\CXR152_IM-0335-1001.png", r"C:\Users\prana\OneDrive\Desktop\ProjectsInProgress\MDP_Xray\imgs\CXR152_IM-0335-2001.png")

('the cardiomediastinal silhouette within normal limits for size and contour the lungs are normally inflated without evidence focal airspace disease pleural effusion pneumothorax osseous structures are within normal limits for patient age', 0.2372301875594156)
the heart normal size the mediastinum unremarkable the lungs are clear


In [19]:
predict(r"C:\Users\prana\OneDrive\Desktop\ProjectsInProgress\MDP_Xray\imgs\CXR1_1_IM-0001-4001.png", r"C:\Users\prana\OneDrive\Desktop\ProjectsInProgress\MDP_Xray\imgs\CXR1_1_IM-0001-3001.png")

('the cardiomediastinal silhouette within normal limits for size and contour the lungs are normally inflated without evidence focal airspace disease pleural effusion pneumothorax no acute bone abnormality', 0.23247014566895186)
the heart normal size the mediastinum unremarkable the lungs are clear


In [18]:
predict(r"C:\Users\prana\OneDrive\Desktop\ProjectsInProgress\MDP_Xray\imgs\CXR8_IM-2333-1001.png", r"C:\Users\prana\OneDrive\Desktop\ProjectsInProgress\MDP_Xray\imgs\CXR8_IM-2333-2001.png")

('the cardiomediastinal silhouette within normal limits for size and contour the lungs are normally inflated without evidence focal airspace disease pleural effusion pneumothorax no acute bone abnormality', 0.22209578860214418)
the lungs are clear there no pleural effusion the heart and mediastinum are normal the skeletal structures and soft tissues are normal
