In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.callbacks import LearningRateScheduler
from sklearn.model_selection import train_test_split
# from skimage import io
import pandas as pd
import os
import matplotlib.pyplot as plt
import pathlib
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
import tensorflow_io as tfio

import PIL
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [2]:
PROJECT_PATH = os.getcwd()

os.chdir('data/Healthcare AI Datasets/Brain_MRI')
os.listdir()

['data.csv',
 'data_mask.csv',
 'resnet-50-MRI.json',
 'ResUNet-MRI.json',
 'TCGA_CS_4941_19960909',
 'TCGA_CS_4942_19970222',
 'TCGA_CS_4943_20000902',
 'TCGA_CS_4944_20010208',
 'TCGA_CS_5393_19990606',
 'TCGA_CS_5395_19981004',
 'TCGA_CS_5396_20010302',
 'TCGA_CS_5397_20010315',
 'TCGA_CS_6186_20000601',
 'TCGA_CS_6188_20010812',
 'TCGA_CS_6290_20000917',
 'TCGA_CS_6665_20010817',
 'TCGA_CS_6666_20011109',
 'TCGA_CS_6667_20011105',
 'TCGA_CS_6668_20011025',
 'TCGA_CS_6669_20020102',
 'TCGA_DU_5849_19950405',
 'TCGA_DU_5851_19950428',
 'TCGA_DU_5852_19950709',
 'TCGA_DU_5853_19950823',
 'TCGA_DU_5854_19951104',
 'TCGA_DU_5855_19951217',
 'TCGA_DU_5871_19941206',
 'TCGA_DU_5872_19950223',
 'TCGA_DU_5874_19950510',
 'TCGA_DU_6399_19830416',
 'TCGA_DU_6400_19830518',
 'TCGA_DU_6401_19831001',
 'TCGA_DU_6404_19850629',
 'TCGA_DU_6405_19851005',
 'TCGA_DU_6407_19860514',
 'TCGA_DU_6408_19860521',
 'TCGA_DU_7008_19830723',
 'TCGA_DU_7010_19860307',
 'TCGA_DU_7013_19860523',
 'TCGA_DU_7014_

In [3]:
def plot_loss(history):
    plt.plot(history.history['loss'], label='loss')
    if history.history.get('val_loss'):
        plt.plot(history.history['val_loss'], label='val_loss', alpha=0.5)
    plt.yscale('log')
    plt.xlabel('Epoch')
    plt.ylabel('Error')
    plt.legend()
    plt.grid(True)
    plt.show()

In [4]:
data = pd.read_csv('data_mask.csv')

In [5]:
data

Unnamed: 0,patient_id,image_path,mask_path,mask
0,TCGA_CS_5395_19981004,TCGA_CS_5395_19981004/TCGA_CS_5395_19981004_1.tif,TCGA_CS_5395_19981004/TCGA_CS_5395_19981004_1_...,0
1,TCGA_CS_5395_19981004,TCGA_CS_4944_20010208/TCGA_CS_4944_20010208_1.tif,TCGA_CS_4944_20010208/TCGA_CS_4944_20010208_1_...,0
2,TCGA_CS_5395_19981004,TCGA_CS_4941_19960909/TCGA_CS_4941_19960909_1.tif,TCGA_CS_4941_19960909/TCGA_CS_4941_19960909_1_...,0
3,TCGA_CS_5395_19981004,TCGA_CS_4943_20000902/TCGA_CS_4943_20000902_1.tif,TCGA_CS_4943_20000902/TCGA_CS_4943_20000902_1_...,0
4,TCGA_CS_5395_19981004,TCGA_CS_5396_20010302/TCGA_CS_5396_20010302_1.tif,TCGA_CS_5396_20010302/TCGA_CS_5396_20010302_1_...,0
...,...,...,...,...
3924,TCGA_DU_6401_19831001,TCGA_HT_A61B_19991127/TCGA_HT_A61B_19991127_86...,TCGA_HT_A61B_19991127/TCGA_HT_A61B_19991127_86...,0
3925,TCGA_DU_6401_19831001,TCGA_HT_A61A_20000127/TCGA_HT_A61A_20000127_87...,TCGA_HT_A61A_20000127/TCGA_HT_A61A_20000127_87...,0
3926,TCGA_DU_6401_19831001,TCGA_HT_A61B_19991127/TCGA_HT_A61B_19991127_87...,TCGA_HT_A61B_19991127/TCGA_HT_A61B_19991127_87...,0
3927,TCGA_DU_6401_19831001,TCGA_HT_A61A_20000127/TCGA_HT_A61A_20000127_88...,TCGA_HT_A61A_20000127/TCGA_HT_A61A_20000127_88...,0


In [6]:
neg, pos = np.bincount(data['mask'])
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

Examples:
    Total: 3929
    Positive: 1373 (34.95% of total)



In [7]:
brain_df_train = data.drop(columns = ['patient_id'])
brain_df_train.shape

(3929, 3)

In [8]:
brain_df_train['mask'] = brain_df_train['mask'].apply(lambda x: str(x))
brain_df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3929 entries, 0 to 3928
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   image_path  3929 non-null   object
 1   mask_path   3929 non-null   object
 2   mask        3929 non-null   object
dtypes: object(3)
memory usage: 92.2+ KB


In [9]:
train, test = train_test_split(brain_df_train, test_size = 0.15)

In [10]:
from keras_preprocessing.image import ImageDataGenerator

In [11]:
datagen = ImageDataGenerator(rescale=1./255., validation_split = 0.15)
train_generator=datagen.flow_from_dataframe(
dataframe=train,
directory= './',
x_col='image_path',
y_col='mask',
subset="training",
batch_size=16,
shuffle=True,
class_mode="categorical",
target_size=(256,256))

valid_generator=datagen.flow_from_dataframe(
dataframe=train,
directory= './',
x_col='image_path',
y_col='mask',
subset="validation",
batch_size=16,
shuffle=True,
class_mode="categorical",
target_size=(256,256))

Found 2839 validated image filenames belonging to 2 classes.
Found 500 validated image filenames belonging to 2 classes.


In [12]:
test_datagen=ImageDataGenerator(rescale=1./255.)
test_generator=test_datagen.flow_from_dataframe(
dataframe=test,
directory= './',
x_col='image_path',
y_col='mask',
batch_size=16,
shuffle=False,
class_mode='categorical',
target_size=(256,256))

Found 590 validated image filenames belonging to 2 classes.


In [13]:
from keras import Input
from keras.applications import ResNet50

basemodel = ResNet50(weights = 'imagenet', include_top = False, input_tensor = Input(shape=(256,256, 3)))
basemodel.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 256, 256, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 262, 262, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 128, 128, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                           

In [14]:
for layer in basemodel.layers:
    layers.trainable = False

In [15]:
from keras import Model
from keras.layers import AveragePooling2D, Flatten, Dense, Dropout

headmodel = basemodel.output
headmodel = AveragePooling2D(pool_size = (4,4))(headmodel)
headmodel = Flatten(name= 'flatten')(headmodel)
headmodel = Dense(256, activation = "relu")(headmodel)
headmodel = Dropout(0.3)(headmodel)
headmodel = Dense(256, activation = "relu")(headmodel)
headmodel = Dropout(0.3)(headmodel)
#headmodel = Dense(256, activation = "relu")(headmodel)
#headmodel = Dropout(0.3)(headmodel)
headmodel = Dense(2, activation = 'softmax')(headmodel)
model = Model(inputs = basemodel.input, outputs = headmodel)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 256, 256, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 262, 262, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 128, 128, 64  9472        ['conv1_pad[0][0]']              
                                )                                                                 
                                                                                              

In [16]:
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics= ["accuracy"])

In [17]:
from keras.callbacks import EarlyStopping

earlystopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)

In [18]:
from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(filepath="classifier-resnet-weights.hdf5", verbose=1,
save_best_only=True)
history = model.fit(train_generator, steps_per_epoch= train_generator.n // 16, epochs = 1,
validation_data= valid_generator, validation_steps= valid_generator.n // 16,
callbacks=[checkpointer, earlystopping])

Epoch 1: val_loss improved from inf to 2.54168, saving model to classifier-resnet-weights.hdf5


In [19]:
model_json = model.to_json()
with open("classifier-resnet-model.json","w") as json_file:
    json_file.write(model_json)

In [20]:
#pred = model.evaluate(test)

def model300():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=(256, 256, 3)))
    model.add(tf.keras.layers.Dense(1000, activation=tf.nn.relu))
    model.add(tf.keras.layers.Dropout(.2))
    model.add(tf.keras.layers.Dense(250, activation=tf.nn.relu))
    model.add(tf.keras.layers.Dropout(.2))
    model.add(tf.keras.layers.Dense(100, activation=tf.nn.relu))
    return model


def nn():
    reduce_lr_cb = tf.keras.callbacks.ReduceLROnPlateau(patience=5)
    early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10)

    model = model300()

    model.add(tf.keras.layers.Dense(2, activation=tf.nn.softmax))
    model.compile(optimizer=tf.optimizers.Adam(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    history = model.fit(train_ds, validation_data=val_ds, epochs=200, validation_split=0.2,
                          callbacks=[reduce_lr_cb, early_stopping_cb])

    val_loss, val_acc = model.evaluate(train_ds)
    plot_loss(history)
    print('Validation loss: ', val_loss)
    print('Accuracy: ', round(val_acc, 2))


nn()

In [21]:
# tf.math.confusion_matrix(val_ds, pred, dtype=tf.uint8)

In [22]:
for name, value in zip(model.metrics_names, pred):
  print(name, ': ', value)
print()

NameError: name 'pred' is not defined

In [None]:
# X_train.from sklearn.metrics import ConfusionMatrixDisplay
# pred=model.predict(val_ds).round(0).ravel().astype('int')

In [None]:
# from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
#
# model = ResNet50(input_shape=(256,256,3),include_top=False)
#
# model.compile(optimizer=tf.optimizers.Adam(),
#               loss='sparse_categorical_crossentropy',
#               metrics=['accuracy'])
#
# history = model.fit(X_train, y_train, epochs=5, validation_split=0.2)
#
# val_loss, val_acc = model.evaluate(X_test, y_test)