In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
import pandas as pd
import sklearn as sk
import tensorflow_addons as tfa


from data_preprocessing import *
from model import *
from train import *
from utils import *

In [2]:
# pre pre-process images
# python preprocess.py

# Build dataset
# python -m tensorflow_datasets.scripts.download_and_prepare --datasets=mri_dataset --module_import=datasets.mri_dataset --manual_dir=data/processed --data_dir=data/

In [3]:
# def hist_norm(img):
#     gray_img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
#     norm_gray_img = cv.equalizeHist(gray_img)
#     norm_img = cv.cvtColor(norm_gray_img, cv.COLOR_GRAY2RGB)
#     return norm_img


# def clahe(img, clipLimit=4, tileGridSize=(40, 40)):
#     clahe = cv.createCLAHE(clipLimit, tileGridSize)
#     gray_img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
#     cl_img = clahe.apply(gray_img)
#     cl_img = cv.cvtColor(cl_img, cv.COLOR_GRAY2RGB)
#     return cl_img


# data_folder = 'data'
# raw_folder = 'raw'
# processed_folder = 'processed'
# dataset = 'mri_dataset'
# train_folder = 'train'
# test_folder = 'test'
# train_label = 'train_label.csv'

# img_path = os.path.join(data_folder, raw_folder, train_folder, '851.png')
# img = cv.imread(img_path)
# n_img = hist_norm(img)
# clahe_img1 = clahe(img, clipLimit=8, tileGridSize=(32, 32))
# clahe_img2 = clahe(img, clipLimit=8, tileGridSize=(8, 8))

# # n_clahe_img = clahe(n_img, clipLimit=clip, tileGridSize=tile_size)
# combined_imgs = np.hstack((img, clahe_img1, clahe_img2))
# plt.figure(figsize=(20, 20))
# plt.imshow(combined_imgs)

## GPU and Mixed Precision Setup

In [4]:
gpu_setup()
mixed_precision_setup()

Some of your GPUs may run slowly with dtype policy mixed_float16 because they do not all have compute capability of at least 7.0. Your GPUs:
  Tesla V100S-PCIE-32GB, compute capability 7.0
  Tesla V100-PCIE-32GB, compute capability 7.0
  Tesla P100-PCIE-16GB, compute capability 6.0 (x2)
See https://developer.nvidia.com/cuda-gpus for a list of GPUs and their compute capabilities.


## Load Dataset

In [5]:
import tensorflow_datasets as tfds
from datasets.mri_dataset import MriDataset
from tensorflow.data import Dataset
from tensorflow.keras.callbacks import TensorBoard


data_folder = 'data'
raw_folder = 'raw'
processed_folder = 'processed'
dataset = 'mri_dataset'
train_folder = 'train'
test_folder = 'test'
train_label = 'train_label.csv'

SEED = 0

experiment_name = 'MobileNetV2'
model_params = {
    'image_shape': (512, 512, 3),
    'num_classes': 3,
}
base_hyperparams = {
    'train_batch_size': 64,
    'valid_batch_size': 64,
    'test_batch_size': 64,
    'num_epochs': 1,
    'learning_rate': 1e-4,
    'dropout': 0.2
}
fine_hyperparams = {
    'num_epochs': 1,
    'learning_rate': 1e-5,
    'fine_tune_at': 100,
}

tf.random.set_seed(SEED)

train_folds = tfds.load(
    name='mri_dataset', 
    split=[f'train[:{k}%]+train[{k+10}%:]' for k in range(0, 100, 20)],
    download=False, 
    shuffle_files=False, 
    as_supervised=True,
    data_dir=data_folder
)
valid_folds = tfds.load(
    name='mri_dataset', 
    split=[f'train[{k}%:{k+10}%]' for k in range(0, 100, 20)],
    download=False, 
    shuffle_files=False, 
    as_supervised=True,
    data_dir=data_folder
)
test_ds_raw, test_info_raw = tfds.load(
    name='mri_dataset', 
    split='test', 
    download=False, 
    shuffle_files=False, 
    as_supervised=False, 
    with_info=True,
    data_dir=data_folder
)

train_folds = [ preprocess(ds, model_params, batch_size=base_hyperparams['train_batch_size'], ds_type='train') for ds in train_folds ]
valid_folds = [ preprocess(ds, model_params, batch_size=base_hyperparams['valid_batch_size'], ds_type='valid') for ds in valid_folds ]
test_ds = preprocess(test_ds_raw, model_params, batch_size=base_hyperparams['test_batch_size'], ds_type='test')

In [6]:
train_valid_df = pd.read_csv(os.path.join(data_folder, processed_folder, train_label))
train_ds = train_folds[0]
valid_ds = valid_folds[0]

print(f'Number of train batches: {train_ds.cardinality()}')
print(f'Number of valid batches: {valid_ds.cardinality()}')
print(f'Number of test batches: {test_ds.cardinality()}')

Number of train batches: 17
Number of valid batches: 2
Number of test batches: 5


## Visualise Dataset

In [7]:
# # disable image resize and rescale in preprocess function ONLY for visualisation
# # Train Data
# plt.figure(figsize=(10, 10))
# for images, labels in train_ds.take(9):
#     for i in range(9):
#       ax = plt.subplot(3, 3, i + 1)
#       plt.imshow(images[i].numpy().astype("uint8"))
#       plt.title(labels[i].numpy())
#       plt.axis("off")

In [8]:
# # Test Data
# plt.figure(figsize=(10, 10))
# for i, ds in enumerate(test_ds_raw.take(9)):
#     ax = plt.subplot(3, 3, i + 1)
#     plt.imshow(ds['image'].numpy().astype("uint8"))
#     plt.title('ID: {}'.format(ds['id'].numpy()))
#     plt.axis("off")

In [9]:
# TODO: Fix visualisation
# data_augmentation = create_augmentation_layer()

# for image, _ in train_ds.take(1):
#   plt.figure(figsize=(10, 10))
#   first_image = image[0]
#   for i in range(9):
#     ax = plt.subplot(3, 3, i + 1)
#     augmented_image = data_augmentation(tf.expand_dims(first_image, 0))
#     plt.imshow(augmented_image[0] / 255)
#     plt.axis('off')

## Train and Validate

### Feature Extraction + Fine Tuning

In [10]:
tf.random.set_seed(SEED)

model = feature_extract_and_fine_tune(experiment_name, train_ds, valid_ds, model_params, base_hyperparams, fine_hyperparams)

Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Number of layers in the base model:  155
Epoch 2/2


### K-Fold Cross Validation

In [11]:
tf.random.set_seed(SEED)

models = cross_validate(experiment_name, train_folds, valid_folds, model_params, base_hyperparams, fine_hyperparams)
#     print('Saving model\n')
#     filename = os.path.join('models', experiment_name, '.h5')
#     model.save(filename)

Loss: 0.9352759718894958 | Train Accuracy: 0.5658397078514099 | Validation Loss: 0.772611677646637 | Validation Accuracy: 0.681034505367279

# -------------------- MobileNetV2: 2-fold -------------------- #
Number of layers in the base model:  155
Epoch 2/2
MobileNetV2 | Train Loss: 0.8662073612213135 | Train Accuracy: 0.6345419883728027 | Validation Loss: 0.75861656665802 | Validation Accuracy: 0.6724137663841248

# -------------------- MobileNetV2: 3-fold -------------------- #
Number of layers in the base model:  155
Epoch 2/2
MobileNetV2 | Train Loss: 0.9137934446334839 | Train Accuracy: 0.6030534505844116 | Validation Loss: 0.7702316045761108 | Validation Accuracy: 0.7586206793785095

# -------------------- MobileNetV2: 4-fold -------------------- #
Number of layers in the base model:  155
Epoch 2/2
MobileNetV2 | Train Loss: 0.9814920425415039 | Train Accuracy: 0.5167143940925598 | Validation Loss: 0.8220709562301636 | Validation Accuracy: 0.6581196784973145

# -------------------

In [12]:
# tf.random.set_seed(SEED)
# hyperparams = {
#     'initial_epochs': 150,
#     'learning_rate': 1e-4,
#     'label_smoothing': 0.1,
# }
# experiment_name = f'Ensemble02'


# ensemble_model = create_ensemble_model()
# train_ds = ensemble_input(train_folds[3], ds_type='train')
# valid_ds = ensemble_input(valid_folds[3], ds_type='valid')
# ensemble_model = train_validate(ensemble_model, train_ds, valid_ds, hyperparams, experiment_name)


In [13]:
# test_ds = ensemble_input(test_ds, ds_type='train')
# predictions = ensemble_model.predict(test_ds)
# predicted_indices = tf.argmax(predictions, 1)
# predicted_labels = predicted_indices.numpy()
# img_ids = tfds.as_dataframe(test_ds_raw, test_info_raw)
# save_results('Ensemble02_submission.csv', img_ids, predicted_labels)
# print('done')

## Predict

In [14]:
# TODO: analyse predicted results
# image_batch, label_batch = valid_ds.as_numpy_iterator().next()
# predictions = model.predict_on_batch(image_batch)
# predicted_indices = tf.argmax(predictions, 1)
# predicted_labels = predicted_indices.numpy()


# plt.figure(figsize=(10, 10))
# for i in range(9):
#   ax = plt.subplot(3, 3, i + 1)
#   plt.imshow(image_batch[i].astype("uint8"))
#   plt.title(f'pred: {predicted_labels[i]} true: {label_batch[i]}')
#   plt.axis("off")

## Evaluate

In [15]:
# predict test labels
predicted_labels = evaluate(model, test_ds)
predicted_labels

array([2, 1, 1, 1, 2, 1, 1, 1, 0, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 2, 1, 1, 1, 2, 0, 1, 0, 2, 0, 1, 2, 1, 1, 2, 2,
       0, 1, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 0, 2, 1, 0, 2, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 0, 0, 1, 1, 2, 0, 0, 1, 1,
       2, 1, 0, 2, 2, 0, 1, 0, 2, 0, 2, 1, 0, 1, 2, 1, 0, 1, 0, 0, 2, 1,
       1, 1, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 0, 2, 1, 1, 1, 2, 1,
       1, 1, 2, 1, 1, 1, 2, 0, 2, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 2,
       1, 0, 2, 0, 1, 2, 0, 1, 2, 1, 0, 1, 2, 1, 1, 0, 1, 2, 0, 2, 1, 1,
       1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 2, 0, 1, 1, 1,
       1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 0, 0, 0, 1, 1, 2, 1, 1, 1, 2,
       2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 2, 2, 1, 2, 1,
       1, 1, 0, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1, 0, 1, 1, 1, 2, 0, 1,
       0, 1, 0, 1, 1, 2, 1, 1, 0, 1, 2, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2,
       1, 2, 1, 1, 1, 1])

In [16]:
result_save_path = experiment_name + '_' + 'submission.csv'
img_ids = tfds.as_dataframe(test_ds_raw, test_info_raw)
save_results(img_ids, predicted_labels, result_save_path)

# main.py

In [17]:
# enhance images
# create dataset
# load data
# train
# evaluate