In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import glob
import cv2
import pickle

from keras.models import Model
import os
from keras.applications.vgg16 import VGG16
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

In [None]:
#Define a function to perform additional preprocessing after datagen.
#For example, scale images, convert masks to categorical, etc. 
def preprocess_data(img, mask, num_class):
    #Scale images
    img = img / 255. #This can be done in ImageDataGenerator but showing it outside as an example
    #Convert mask to one-hot
    labelencoder = LabelEncoder()
    n, h, w = mask.shape  
    mask = mask.reshape(-1,1)
    mask = labelencoder.fit_transform(mask)
    mask = mask.reshape(n, h, w)
    mask = to_categorical(mask, num_class)
      
    return img, mask



In [None]:
#Capture training image info as a list
train_images = []

for directory_path in glob.glob("../data/small_batch/train_images/"):
    for img_path in glob.glob(os.path.join(directory_path, "*.tif")):
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)       
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        train_images.append(img)
#Convert list to array for machine learning processing        
train_images = np.array(train_images)

#Capture mask/label info as a list
train_masks = [] 
for directory_path in glob.glob("../data/small_batch/train_masks/"):
    for mask_path in glob.glob(os.path.join(directory_path, "*.tif")):
        mask = cv2.imread(mask_path, 0)
        train_masks.append(mask)
#Convert list to array for machine learning processing          
train_masks = np.array(train_masks)

In [None]:
X_train = train_images
y_train = train_masks
y_train = np.expand_dims(y_train, axis=3)

In [None]:
np.unique(y_train)
labelencoder = LabelEncoder()
n, h, w, c = y_train.shape  
y_train = y_train.reshape(-1,1)
y_train = labelencoder.fit_transform(y_train)
y_train = y_train.reshape(n, h, w, c)
mask = to_categorical(y_train, 4)

In [None]:
#Load VGG16 model wothout classifier/fully connected layers
#Load imagenet weights that we are going to use as feature generators
VGG_model = VGG16(weights='imagenet', include_top=False, input_shape=(512, 512, 3))

In [None]:
#Make loaded layers as non-trainable. This is important as we want to work with pre-trained weights
for layer in VGG_model.layers:
	layer.trainable = False
    
VGG_model.summary()  #Trainable parameters will be 0

In [None]:
#After the first 2 convolutional layers the image dimension changes. 
#So for easy comparison to Y (labels) let us only take first 2 conv layers
#and create a new model to extract features
#New model with only first 2 conv layers
new_model = Model(inputs=VGG_model.input, outputs=VGG_model.get_layer('block1_conv2').output)
new_model.summary()

In [None]:
#Now, let us apply feature extractor to our training data
features=new_model.predict(X_train)

In [None]:
#Plot features to view them
square = 4
ix=1

fig = plt.figure(figsize=(12,12))
for _ in range(square):
    for _ in range(square):
        fig.add_subplot(square, square, ix)
        plt.imshow(features[0,:,:,ix-1], cmap='gray')
        plt.axis('off')
        ix +=1
plt.show()

In [None]:
#Reassign 'features' as X to make it easy to follow
X=features
X = X.reshape(-1, X.shape[3])

In [None]:
X.shape

In [None]:
#Reshape Y to match X
Y = y_train.reshape(-1)
print(Y.shape)
#Combine X and Y into a dataframe to make it easy to drop all rows with Y values 0
#In our labels Y values 0 = unlabeled pixels. 
dataset = pd.DataFrame(X)
dataset['Label'] = Y
print(dataset['Label'].unique())
print(dataset['Label'].value_counts())

In [None]:
##If we do not want to include pixels with value 0 
##e.g. Sometimes unlabeled pixels may be given a value 0.
dataset = dataset[dataset['Label'] != 0]

#Redefine X and Y for Random Forest
X_for_training = dataset.drop(labels = ['Label'], axis=1)
X_for_training = X_for_training.values  #Convert to array
Y_for_training = dataset['Label']
Y_for_training = Y_for_training.values  #Convert to array

In [None]:
X_for_training.shape

In [None]:

#XGBOOST
import xgboost as xgb
model = xgb.XGBClassifier()

# Train the model on training data
model.fit(X_for_training, Y_for_training) 

#Save model for future use
filename = 'model_XG.sav'
pickle.dump(model, open(filename, 'wb'))
