# Mount from drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import libraries

In [2]:
import gensim
import h5py
import copy
import sys
from random import shuffle, seed
import numpy as np
import nltk
from nltk.tokenize import word_tokenize
import re
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import os
import tensorflow as tf
from tensorflow.keras import Model

# load pre-trained models for image and word embedding

In [3]:
model_path = '/content/drive/MyDrive/ComputerVisionProject/used_models/GoogleNews-vectors-negative300.bin.gz'          
model_w2v = gensim.models.KeyedVectors.load_word2vec_format(model_path, binary=True) 

In [4]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [3]:
# model for extract feature from images
image_feature_extraction = tf.keras.applications.VGG16(include_top=False,weights='imagenet',input_shape=(448,448,3))
last_layer = image_feature_extraction.layers[-1].output   # Last layer has an output layer of (14,14,512)
image_feature_extraction = Model(image_feature_extraction.input,last_layer)
image_feature_extraction.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [6]:
'''  The below model will convert (196,512) to (21,300) (i.e same as the dimension of word embedding)  '''
dimen_red = tf.keras.Sequential()  # Use for converting (196,512) -> (21,300)
dimen_red.add(tf.keras.layers.Conv2D(300,kernel_size=(1,1),input_shape= (14,14,512)))
dimen_red.add(tf.keras.layers.Reshape((196,300)))
dimen_red.add(tf.keras.layers.Permute((2,1)))  # Reshaping about the axis, useful for applying the dense network
dimen_red.add(tf.keras.layers.Dense(11))
dimen_red.add(tf.keras.layers.Permute((2,1)))  # Reshaping about the axis, useful for applying the dense network

# load and prepare dataset

In [29]:
# important pathes
data = pd.read_csv('/content/drive/MyDrive/ComputerVisionProject/moreData/MedicalDataset/train/All_QA_Pairs_train.csv') 
image_path = '/content/drive/MyDrive/ComputerVisionProject/moreData/MedicalDataset/train/TrainImages'

In [30]:

number_of_answer = len(data["answer"].unique())
max_length = max([len(x.split()) for x in data["question"]]) # Max Length of the question
length_of_data = len(data)

In [31]:
data["image_name"] = data["image_name"].apply(lambda x : x + ".jpg")
data.head()

Unnamed: 0,image_name,question,answer
0,synpic41148.jpg,what kind of image is this?,cta - ct angiography
1,synpic43984.jpg,is this a t1 weighted image?,no
2,synpic38930.jpg,what type of imaging modality is used to acqui...,us - ultrasound
3,synpic52143.jpg,is this a noncontrast mri?,no
4,synpic20934.jpg,what type of image modality is this?,xr - plain film


In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12792 entries, 0 to 12791
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   image_name  12792 non-null  object
 1   question    12792 non-null  object
 2   answer      12792 non-null  object
dtypes: object(3)
memory usage: 299.9+ KB


### Getting word embedding for question 

In [9]:
def tokenize(sentence):
    ''' Perform Tokenization '''
    return [i for i in re.split(r"([-.\"',:? !$#@~()*&\^%;/\\+<>\n=])", sentence) if i!='' and i!=' ' and i!='\n']

In [10]:
def get_text_embeding_matrix(text):
  embedding_matrix = np.zeros((max_length, 300))
  text = re.sub(r'\?$', '', text)
  tokenized_text = tokenize(text)
  for i, word in enumerate(tokenized_text):
    try:
      embedding_matrix[i] = model_w2v[word]
    except:
      pass
  return embedding_matrix

In [11]:
(get_text_embeding_matrix(data.loc[0]["question"])).shape

(11, 300)

### Getting embedding for images 

In [4]:
def read_images_as_array(image_name):
  image = cv2.imread(os.path.join(image_path, image_name) , cv2.IMREAD_COLOR) # Reading the image
  image = cv2.cvtColor(image , cv2.COLOR_BGR2RGB)
  image = cv2.resize(image , (448,448)) # Reshape.
  image = image / 225
  image = np.expand_dims(image, axis=0)
  return image

In [5]:
def get_image_embedding_matrix(image_name):
  image_array = read_images_as_array(image_name)
  image_feature_from_vgg = image_feature_extraction(image_array)
  print(image_feature_from_vgg.shape)
  image_reduced = dimen_red(image_feature_from_vgg) # TensorShape([1, 21, 300])
  image_reduced = image_reduced.numpy().reshape(-1, image_reduced.shape[-1])
  return image_reduced

In [98]:
image1 = get_image_embedding_matrix(data.loc[0]["image_name"])

(1, 14, 14, 512)


In [17]:
image1.shape

(11, 300)

### Apply pre-processing to whole data

In [18]:
data["question_vectors"] = data["question"].apply(get_text_embeding_matrix)

In [19]:
(data.loc[0]["question_vectors"]).shape

(11, 300)

In [20]:
image_data = pd.DataFrame(data["image_name"].unique(),columns=["image_name"])

In [21]:
len(image_data)

3200

In [22]:
image_data["image_vectors"] = image_data["image_name"].apply(get_image_embedding_matrix)

(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 512)
(1, 14, 14, 51

In [23]:
(image_data.loc[0]["image_vectors"]).shape

(11, 300)

In [24]:
(data.loc[0]["question_vectors"]).shape

(11, 300)

In [25]:
df_merged = pd.merge(image_data, data, on="image_name")

In [26]:
final_data = df_merged.filter(items=["qid","image_name","image_organ","question_vectors","image_vectors","answer"])

In [27]:
final_data.loc[0]["question_vectors"]

array([[ 0.13964844, -0.00616455,  0.21484375, ...,  0.05712891,
         0.09960938, -0.234375  ],
       [ 0.13476562,  0.0456543 ,  0.05541992, ...,  0.12060547,
         0.01275635,  0.02038574],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])