In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import os, json, pathlib, shutil, PIL
import itertools

In [2]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image

https://www.kaggle.com/datasets/masoudnickparvar/brain-tumor-mri-dataset

In [3]:
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.densenet import DenseNet121

In [4]:
vgg_layer = VGG16(weights= 'imagenet', include_top= False, input_shape=(224, 224, 3))
'''
densenet_layer = DenseNet121(weights= 'imagenet', include_top= False, input_shape=(200, 200, 3))
'''

"\ndensenet_layer = DenseNet121(weights= 'imagenet', include_top= False, input_shape=(200, 200, 3))\n"

In [5]:
test_folder_dir = "brain_tumor/Testing/"
train_folder_dir = "brain_tumor/Training/"
test_file_dir = pathlib.Path(test_folder_dir)
train_file_dir = pathlib.Path(train_folder_dir)
print(train_file_dir.exists())

True


In [6]:
test_img_cnt = len(list(test_file_dir.glob("*/*.jpg")))
train_img_cnt = len(list(train_file_dir.glob("*/*.jpg")))
print(test_img_cnt)
print(train_img_cnt)

1311
5712


In [7]:
class_names = [name for name in os.listdir(test_folder_dir) if os.path.isdir(os.path.join(test_folder_dir, name))]
print(class_names)

['glioma', 'meningioma', 'notumor', 'pituitary']


In [8]:
#divide trian and validation data
total_train_nums = []
for class_name in class_names:
  class_dir = pathlib.Path(train_folder_dir, class_name)
  cl_length = len(list(class_dir.glob("*.jpg")))
  train_ratio = int(cl_length * 0.8)
  valid_ratio = cl_length - train_ratio

  nums = np.zeros(cl_length)
  nums[:valid_ratio] = 1
  np.random.shuffle(nums)
  total_train_nums.append(list(nums))

merged_nums = list(itertools.chain.from_iterable(total_train_nums))

In [9]:
len(merged_nums)

5712

In [10]:
datagen = ImageDataGenerator(rescale=1./255)
batch_size = 50

In [11]:
test_generator = datagen.flow_from_directory(test_file_dir, target_size=(224,224), batch_size = batch_size, class_mode= 'categorical', shuffle=False)
train_generator = datagen.flow_from_directory(train_file_dir, target_size=(224,224), batch_size = batch_size, class_mode= 'categorical', shuffle=False)

test_filepaths = []
for filepath in test_generator.filepaths:
  test_filepaths.append(filepath)
  
test_filenames = []
for filename in test_generator.filenames:
  test_filenames.append(filename)

train_filepaths = []
for filepath in train_generator.filepaths:
  train_filepaths.append(filepath)
  
train_filenames = []
for filename in train_generator.filenames:
  train_filenames.append(filename)

Found 1311 images belonging to 4 classes.
Found 5712 images belonging to 4 classes.


In [44]:
ground_truth_label = []
train_file_names = []
for file in train_filenames:
  f = file.split("\\")
  ground_truth_label.append(f[0])
  train_file_names.append(f[1])

test_ground_truth_label = []
test_file_names = []
for file in test_filenames:
  f = file.split("\\")
  test_ground_truth_label.append(f[0])
  test_file_names.append(f[1])

train_index_list = list(range(0, train_img_cnt))
test_index_list = list(range(0, test_img_cnt))

In [13]:
def extract_features(generator, data_num, class_num, feature_shape, pretrained_model):
  features = np.zeros(shape = feature_shape)
  labels = np.zeros(shape=(data_num, class_num))
  #generator = datagen.flow_from_directory(directory, target_size=(224, 224), batch_size = batch_size, class_mode= 'categorical', shuffle=False)
  i = 0
  for inputs_batch, labels_batch in generator:
    features_batch = pretrained_model.predict(inputs_batch)
    features[i * batch_size : (i+1) * batch_size] = features_batch
    labels[i * batch_size : (i+1) * batch_size] = labels_batch
    i += 1
    if i * batch_size >= data_num:
      break

  return features, labels

In [14]:
vgg_final_layer = list(vgg_layer.layers)[-1].output_shape
vgg_final_layer = list(vgg_final_layer)
print("final layer of VGG16 : " +  str(list(vgg_layer.layers)[-1]) + " and its shape : " + str(vgg_final_layer))

vgg_conv_layers = []
for l in range(len(vgg_layer.layers)):
  layer = vgg_layer.layers[l]
  if 'Conv' not in layer.__class__.__name__:
    continue
  vgg_conv_layers.append((layer.name, layer.output.shape))

vgg_conv_base_shape = []
for i in vgg_final_layer:
  if i != None:
    vgg_conv_base_shape.append(i)
print("conv_base_shape : ", vgg_conv_base_shape)

train_vgg_feat_shape = tuple([train_img_cnt] + vgg_conv_base_shape)
print(train_vgg_feat_shape)

test_vgg_feat_shape = tuple([test_img_cnt] + vgg_conv_base_shape)
print(test_vgg_feat_shape)

vgg_input_dimension = np.prod(vgg_conv_base_shape)
print(vgg_input_dimension)

final layer of VGG16 : <keras.layers.pooling.MaxPooling2D object at 0x00000215A3174448> and its shape : [None, 7, 7, 512]
conv_base_shape :  [7, 7, 512]
(5712, 7, 7, 512)
(1311, 7, 7, 512)
25088


In [15]:
train_vgg_features, train_vgg_labels = extract_features(train_generator, train_img_cnt, len(class_names), train_vgg_feat_shape, vgg_layer)

In [16]:
test_vgg_features, test_vgg_labels = extract_features(test_generator, test_img_cnt, len(class_names), test_vgg_feat_shape, vgg_layer)

In [17]:
from json import JSONEncoder
class NumpyArrayEncoder(JSONEncoder):
  def default(self, obj):
    if isinstance(obj, np.ndarray):
      return obj.tolist()
    return JSONEncoder.default(self, obj)


In [18]:
train_labels_int = []
for idx in range(len(train_vgg_labels)):
  train_labels_int.append(np.argmax(train_vgg_labels[idx]))

'''test_labels_int = []
for idx in range(len(test_vgg_labels)):
  test_labels_int.append(np.argmax(test_vgg_labels[idx]))'''

'test_labels_int = []\nfor idx in range(len(test_vgg_labels)):\n  test_labels_int.append(np.argmax(test_vgg_labels[idx]))'

In [19]:
train_vgg_features = np.reshape(train_vgg_features, (train_img_cnt, vgg_input_dimension))
test_vgg_features = np.reshape(test_vgg_features, (test_img_cnt, vgg_input_dimension))

In [20]:
for idx in range(len(train_vgg_features)):
  cur_fea = train_vgg_features[idx]
  for j in range(len(cur_fea)):
    if cur_fea[j] > 0.0:
      cur_fea[j] = float(f"{cur_fea[j]:.4f}")

for idx in range(len(test_vgg_features)):
  cur_fea = test_vgg_features[idx]
  for j in range(len(cur_fea)):
    if cur_fea[j] > 0.0:
      cur_fea[j] = float(f"{cur_fea[j]:.4f}")


In [21]:
train_feas = {"features": train_vgg_features, "labels" : train_vgg_labels}
with open("vgg_extracted_brain_tumor_train.json", "w") as outfile:
  json.dump(train_feas, outfile, cls=NumpyArrayEncoder)

test_feas = {"features": test_vgg_features, "labels" : test_vgg_labels}
with open("vgg_extracted_brain_tumor_test.json", "w") as outfile:
  json.dump(test_feas, outfile, cls=NumpyArrayEncoder)

In [22]:
# Add classifier on pre-trained model
vgg_model = keras.models.Sequential()
#vgg_model.add(keras.layers.Reshape((vgg_input_dimension,), input_shape = tuple(vgg_conv_base_shape)))
vgg_model.add(keras.layers.Dense(512, activation='relu', input_dim = vgg_input_dimension))
vgg_model.add(keras.layers.Dense(512, activation='relu'))
vgg_model.add(keras.layers.Dense(len(class_names), activation = 'softmax'))
vgg_model.compile(optimizer=keras.optimizers.Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [23]:
pmt_order = np.random.permutation(np.arange(train_img_cnt))

In [42]:
vgg_data_df = pd.DataFrame({'index': train_index_list, 'file_names': train_file_names, 'feature' : list(train_vgg_features), 'label' : list(train_vgg_labels), 'int_label' : train_labels_int, 'assign' : merged_nums})


In [45]:
vgg_test_data_df = pd.DataFrame({'file_names': test_file_names, 'feature' : list(test_vgg_features), 'label' : list(test_vgg_labels)})

In [26]:
vgg_data_df

Unnamed: 0,index,file_names,feature,label,int_label,assign
0,0,Tr-glTr_0000.jpg,"[0.0831, 0.0, 0.0, 0.085, 0.1811, 0.0, 0.0, 0....","[1.0, 0.0, 0.0, 0.0]",0,0.0
1,1,Tr-glTr_0001.jpg,"[0.05, 0.0, 0.0, 0.0506, 0.2202, 0.0, 0.0, 0.0...","[1.0, 0.0, 0.0, 0.0]",0,0.0
2,2,Tr-glTr_0002.jpg,"[0.0387, 0.0, 0.0, 0.0575, 0.1709, 0.0, 0.0, 0...","[1.0, 0.0, 0.0, 0.0]",0,0.0
3,3,Tr-glTr_0003.jpg,"[0.14, 0.0, 0.0, 0.0653, 0.0042, 0.0, 0.0, 0.0...","[1.0, 0.0, 0.0, 0.0]",0,0.0
4,4,Tr-glTr_0004.jpg,"[0.2238, 0.0, 0.0, 0.0516, 0.0, 0.0, 0.0, 0.12...","[1.0, 0.0, 0.0, 0.0]",0,1.0
...,...,...,...,...,...,...
5707,5707,Tr-pi_1452.jpg,"[0.1222, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5258,...","[0.0, 0.0, 0.0, 1.0]",3,1.0
5708,5708,Tr-pi_1453.jpg,"[0.0884, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6972,...","[0.0, 0.0, 0.0, 1.0]",3,0.0
5709,5709,Tr-pi_1454.jpg,"[0.0974, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5389,...","[0.0, 0.0, 0.0, 1.0]",3,0.0
5710,5710,Tr-pi_1455.jpg,"[0.1804, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0975,...","[0.0, 0.0, 0.0, 1.0]",3,0.0


In [27]:
data_df_sf = vgg_data_df.iloc[pmt_order]

In [28]:
data_df_sf.head()

Unnamed: 0,index,file_names,feature,label,int_label,assign
1303,1303,Tr-gl_1303.jpg,"[0.0385, 0.0, 0.0, 0.0287, 0.2284, 0.0, 0.0, 0...","[1.0, 0.0, 0.0, 0.0]",0,0.0
4804,4804,Tr-pi_0549.jpg,"[0.1716, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0....","[0.0, 0.0, 0.0, 1.0]",3,1.0
2487,2487,Tr-me_1166.jpg,"[0.0802, 0.0, 0.0, 0.0794, 0.1041, 0.0, 0.0, 0...","[0.0, 1.0, 0.0, 0.0]",1,0.0
2246,2246,Tr-me_0925.jpg,"[0.0502, 0.0, 0.0, 0.0563, 0.0915, 0.0, 0.0, 0...","[0.0, 1.0, 0.0, 0.0]",1,0.0
5703,5703,Tr-pi_1448.jpg,"[0.0, 0.0, 0.0, 0.0474, 0.0, 0.0, 0.0, 0.0, 0....","[0.0, 0.0, 0.0, 1.0]",3,0.0


In [29]:
vgg_train_df = data_df_sf[data_df_sf['assign'] == 0]
vgg_valid_df = data_df_sf[data_df_sf['assign'] == 1]

In [32]:
vgg_train_features = list(vgg_train_df['feature'])
vgg_train_labels = list(vgg_train_df['label'])

vgg_train_features = np.reshape(vgg_train_features, (len(vgg_train_df), vgg_input_dimension))
vgg_train_labels = np.reshape(vgg_train_labels, (len(vgg_train_df), len(class_names)))

vgg_valid_features = list(vgg_valid_df['feature'])
vgg_valid_labels = list(vgg_valid_df['label'])

vgg_valid_features = np.reshape(vgg_valid_features, (len(vgg_valid_df), vgg_input_dimension))
vgg_valid_labels = np.reshape(vgg_valid_labels, (len(vgg_valid_df), len(class_names)))

In [33]:
vgg_train_history = vgg_model.fit(vgg_train_features, vgg_train_labels, epochs = 12, batch_size = batch_size, validation_data = (vgg_valid_features, vgg_valid_labels))

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [34]:
vgg_loss, vgg_acc = vgg_model.evaluate(test_vgg_features, test_vgg_labels)



In [36]:
vgg_test_prediction_score = vgg_model.predict(test_vgg_features)

In [37]:
vgg_test_predicted_label = np.argmax(vgg_test_prediction_score, axis= -1)

In [47]:
vgg_test_data_df

Unnamed: 0,file_names,feature,label
0,Te-glTr_0000.jpg,"[0.0767, 0.0, 0.0, 0.032, 0.0858, 0.0, 0.0, 0....","[1.0, 0.0, 0.0, 0.0]"
1,Te-glTr_0001.jpg,"[0.1021, 0.0, 0.0, 0.028, 0.0209, 0.0, 0.0, 0....","[1.0, 0.0, 0.0, 0.0]"
2,Te-glTr_0002.jpg,"[0.0859, 0.0, 0.0, 0.0506, 0.2151, 0.0, 0.0, 0...","[1.0, 0.0, 0.0, 0.0]"
3,Te-glTr_0003.jpg,"[0.1153, 0.0, 0.0, 0.0458, 0.0999, 0.0, 0.0, 0...","[1.0, 0.0, 0.0, 0.0]"
4,Te-glTr_0004.jpg,"[0.0729, 0.0, 0.0, 0.0156, 0.058, 0.0, 0.0, 0....","[1.0, 0.0, 0.0, 0.0]"
...,...,...,...
1306,Te-pi_0295.jpg,"[0.0489, 0.0, 0.0, 0.0487, 0.0, 0.0, 0.0, 0.0,...","[0.0, 0.0, 0.0, 1.0]"
1307,Te-pi_0296.jpg,"[0.0037, 0.0, 0.0, 0.0209, 0.0, 0.0, 0.0, 0.0,...","[0.0, 0.0, 0.0, 1.0]"
1308,Te-pi_0297.jpg,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0836, 0....","[0.0, 0.0, 0.0, 1.0]"
1309,Te-pi_0298.jpg,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 1.0]"


In [49]:
test_result = {}
test_result['filename'] = list(vgg_test_data_df['file_names'])
#test_result['prediction_score'] = vgg_test_prediction_score
test_result['predicted_label'] = vgg_test_predicted_label

In [50]:
with open("vgg_result_brain_tumor.json", "w") as outfile:
  json.dump(test_result, outfile, indent=3, cls=NumpyArrayEncoder)