# Task B: Data Preprocessing of Transfer Learning with VGG16

## 1. Implement one-hot encoding to label file 

In [1]:
# Import necessary libraries
import pickle
import numpy as np
import pandas as pd

from tensorflow.keras.utils import to_categorical

In [2]:
# Load preprocessed label of image data with help of pickle.
with open('DataAfterProcess/label_AfterProcess.pickle', 'rb') as handle:
    y = pickle.load(handle)

# Load preprocessed label of test dataset with help of pickle.
with open('DataAfterProcess/test_label_AfterProcess.pickle', 'rb') as handle:
    y_test = pickle.load(handle)

# Check result.
print(y.shape, y_test.shape) 

(3000,) (200,)


In [3]:
# Implement one-hot encoding to labels.
y = to_categorical(y)
y_test = to_categorical(y_test)

# Check result.
print(y.shape, y_test.shape) 

(3000, 4) (200, 4)


## 2. Preprocess MRI images for transfer learning

In [4]:
# Import necessary libraries
import cv2
from glob import glob

In [5]:
# Read 3000 512x512 pixel gray-scale MRI images
images_train = [cv2.imread(image) 
                for image in glob('dataset/image/*.jpg')]

# Read 200 512x512 pixel gray-scale MRI images
images_test = [cv2.imread(image) 
               for image in glob('test/image/*.jpg')]

# Resize images into 256*256 pixel
resize = 256
images_train_resized = [cv2.resize(images_train[i], (resize,resize))
                        for i in range(0, len(images_train))]
images_test_resized = [cv2.resize(images_test[i], (resize,resize))
                        for i in range(0, len(images_test))]

# Normalize images.
X = np.array(images_train_resized)/255
X_test = np.array(images_test_resized)/255

# Check result.
print(X.shape, X_test.shape)

(3000, 256, 256, 3) (200, 256, 256, 3)


## 1. Formulate base model of transfer learning: VGG16

In [6]:
# Import necessary libraries
from tensorflow.keras.applications import vgg16

# Define input of the VGG16, since they are fixed!
height, width, channel = X.shape[1], X.shape[2], X.shape[-1]

# Create and configure the VGG16 model.
path_VGG16_weights='Model/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
base_model = vgg16.VGG16(weights = path_VGG16_weights,
                         include_top = False, 
                         input_shape = (height, width, channel))
base_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 256, 256, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 256, 256, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 256, 256, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 128, 128, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 128, 128, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 128, 128, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 64, 64, 128)       0     

## 2. Compute output of convolutional layer in VGG16

In [7]:
# Compute output of the convolutional layer in VGG16 as new dataset and test set used for transfer learning.
X_VGG =  base_model.predict(X)
X_test_VGG =  base_model.predict(X_test)

# Check result.
print(X_VGG.shape)
print(X_test_VGG.shape)

(3000, 8, 8, 512)
(200, 8, 8, 512)


## 2. Save new dataset and test set used for transfer learning

In [8]:
# Generate data array to store new dataset and test set.
X_VGG_256 = X_VGG.reshape(3000, 
                          X_VGG.shape[1]*X_VGG.shape[1]*X_VGG.shape[3])
X_test_VGG_256 = X_test_VGG.reshape(200, 
                                    X_test_VGG.shape[1]*X_test_VGG.shape[1]*X_test_VGG.shape[3])


# Save new dataset and test set with help of pickle.
with open('DataAfterProcess/X_VGG_256.pickle', 'wb') as handle:
    pickle.dump(X_VGG_256, handle)  
    
with open('DataAfterProcess/X_test_VGG_256.pickle', 'wb') as handle:
    pickle.dump(X_test_VGG_256, handle)
    
# Save label file after one-hot encoding with help of pickle.
with open('DataAfterProcess/y.pickle', 'wb') as handle:
    pickle.dump(y, handle)  
    
with open('DataAfterProcess/y_test.pickle', 'wb') as handle:
    pickle.dump(y_test, handle)