In [1]:
import warnings
warnings.filterwarnings('ignore')
import sys

import keras
import numpy as np
from keras_preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Activation, Conv2D, Dense, Dropout, Flatten, MaxPooling2D, BatchNormalization
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils import class_weight

Using TensorFlow backend.


In [6]:
datagen = ImageDataGenerator(rotation_range=30, 
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             horizontal_flip=True,
                             vertical_flip=True)

if 'win32' in sys.platform:
    data_directory='F:\\Segmentation_Data\\'
    full_training_directory = data_directory + 'Labelled_imgs\\stratified_data\\training\\'
    full_validation_directory = data_directory +'Labelled_imgs\\stratified_data\\validation\\'

if 'darwin' in sys.platform:
    data_directory='/Volumes/Samsung_T5/Segmentation_Data/'
    full_training_directory = data_directory + 'Labelled_imgs/stratified_data/training/'
    full_validation_directory = data_directory +'Labelled_imgs/stratified_data/validation/'


In [7]:
batch_size = 32
train_generator = datagen.flow_from_directory(full_training_directory, 
                                              target_size=(128, 128), 
                                              color_mode="rgb", 
                                              batch_size=batch_size)
validation_generator = datagen.flow_from_directory(full_validation_directory,
                                                   target_size=(128, 128), 
                                                   color_mode='rgb',
                                                   batch_size=batch_size, 
                                                   shuffle=False)
num_classes = len(train_generator.class_indices)
num_test_samples = 8984

class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

Found 20966 images belonging to 8 classes.
Found 8984 images belonging to 8 classes.


In [4]:
model = Sequential()
model.add(Conv2D(20, (5,5), activation='relu', input_shape=(128,128,3)))
model.add(BatchNormalization(momentum=0.9))
model.add(MaxPooling2D())
model.add(Conv2D(50, (5,5), activation='relu'))
model.add(BatchNormalization(momentum=0.9))
model.add(MaxPooling2D())
model.add(Conv2D(100, (4,4), activation='relu'))
model.add(MaxPooling2D())
model.add(Conv2D(200, (4,4), activation='relu'))
model.add(MaxPooling2D())
model.add(Dropout(rate=0.25))
model.add(Flatten())
model.add(Dense(500))
model.add(Dropout(rate=0.25))
model.add(Dense(num_classes))
model.add(Activation('softmax'))


W0829 19:02:29.079627  6536 deprecation_wrapper.py:119] From C:\Users\acsch\Anaconda3\envs\Tensorflow-GPU\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0829 19:02:29.104561  6536 deprecation_wrapper.py:119] From C:\Users\acsch\Anaconda3\envs\Tensorflow-GPU\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0829 19:02:29.112539  6536 deprecation_wrapper.py:119] From C:\Users\acsch\Anaconda3\envs\Tensorflow-GPU\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0829 19:02:29.136981  6536 deprecation_wrapper.py:119] From C:\Users\acsch\Anaconda3\envs\Tensorflow-GPU\lib\site-packages\keras\backend\tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_de

In [5]:
sgd = keras.optimizers.SGD(lr=0.01,
                          momentum=0.9,
                          decay=0.0001)

In [24]:

model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator,
                    verbose=1, 
                    steps_per_epoch=1000, 
                    epochs=1, 
                    validation_data=validation_generator,
                   validation_steps=300)

Epoch 1/1


<keras.callbacks.History at 0x195771bb630>

In [25]:
Y_pred = model.predict_generator(validation_generator, num_test_samples // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
print('Classification Report')
target_names = ['0_colonies','1_colony', 
                '2_colonies', '3_colonies',
                '4_colonies', '5_colonies', 
                '6_colonies', 'outlier']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))


Confusion Matrix
[[1762 1148  977   18  222   68   46]
 [ 766   52  162    8  201  121  286]
 [1101   36    1    0    0    0    1]
 [ 554   10    3    0    0    0    1]
 [ 282    4    0    0    0    0    2]
 [ 300    4    0    0    0    0    0]
 [ 382    3    2    0    0    0    1]]
Classification Report
              precision    recall  f1-score   support

    1_colony       0.34      0.42      0.38      4241
  2_colonies       0.04      0.03      0.04      1596
  3_colonies       0.00      0.00      0.00      1139
  4_colonies       0.00      0.00      0.00       568
  5_colonies       0.00      0.00      0.00       288
  6_colonies       0.00      0.00      0.00       304
     outlier       0.00      0.00      0.00       388

    accuracy                           0.21      8524
   macro avg       0.06      0.06      0.06      8524
weighted avg       0.18      0.21      0.19      8524



In [15]:
# https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045

Y_pred = model.predict_generator(validation_generator, num_test_samples // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
print('Classification Report')
target_names = ['1_colony', '2_colonies', '3_colonies', '4_colonies', '5_colonies', '6_colonies', 'outlier']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))


Confusion Matrix
[[4148   77    4    1    0    1   10]
 [ 126 1328  128    6    0    3    5]
 [  34  174  805  116    5    5    0]
 [  15   12  125  331   48   37    0]
 [   5    2   11   98   67  105    0]
 [   7    1    5   22   23  212   34]
 [  40    7    1    0    0    0  340]]
Classification Report
              precision    recall  f1-score   support

    1_colony       0.95      0.98      0.96      4241
  2_colonies       0.83      0.83      0.83      1596
  3_colonies       0.75      0.71      0.73      1139
  4_colonies       0.58      0.58      0.58       568
  5_colonies       0.47      0.23      0.31       288
  6_colonies       0.58      0.70      0.64       304
     outlier       0.87      0.88      0.88       388

    accuracy                           0.85      8524
   macro avg       0.72      0.70      0.70      8524
weighted avg       0.84      0.85      0.84      8524



## Balance Classes

In [6]:
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
balanced_history = model.fit_generator(train_generator,
                                       verbose=1, 
                                       steps_per_epoch=1000, 
                                       epochs=100 , 
                                       validation_data=validation_generator,
                                      validation_steps=300,
                                      class_weight=class_weights)
model.save('balanced_classes.hdf5')

W0822 18:06:15.504884 13624 deprecation_wrapper.py:119] From C:\Users\acsch\Anaconda3\envs\Tensorflow-GPU\lib\site-packages\keras\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0822 18:06:15.572340 13624 deprecation.py:323] From C:\Users\acsch\Anaconda3\envs\Tensorflow-GPU\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


In [7]:
model = keras.models.load_model('balanced_classes.hdf5')
Y_pred = model.predict_generator(validation_generator, num_test_samples // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
print('Classification Report')
target_names = ['1_colony', '2_colonies', '3_colonies', '4_colonies', '5_colonies', '6_colonies', 'outlier']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))


Confusion Matrix
[[4187   46    2    0    0    0    6]
 [ 141 1348   99    5    0    2    1]
 [  40  194  840   59    4    1    1]
 [  14   15  165  322   36   16    0]
 [   5    2   24  114   82   61    0]
 [   9    1    4   32   47  210    1]
 [  25    9    1    0    0    0  353]]
Classification Report
              precision    recall  f1-score   support

    1_colony       0.95      0.99      0.97      4241
  2_colonies       0.83      0.84      0.84      1596
  3_colonies       0.74      0.74      0.74      1139
  4_colonies       0.61      0.57      0.59       568
  5_colonies       0.49      0.28      0.36       288
  6_colonies       0.72      0.69      0.71       304
     outlier       0.98      0.91      0.94       388

    accuracy                           0.86      8524
   macro avg       0.76      0.72      0.73      8524
weighted avg       0.85      0.86      0.86      8524



In [17]:
model = keras.models.load_model('balanced_classes.hdf5')
Y_pred = model.predict_generator(validation_generator, num_test_samples // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
print('Classification Report')
target_names = ['1_colony', '2_colonies', '3_colonies', '4_colonies', '5_colonies', '6_colonies', 'outlier']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))


Confusion Matrix
[[4243   36    3    0    0    0    3]
 [ 129 1443   59    1    0    0    0]
 [  23  163  845   55    1    2    0]
 [  12   10  164  316   34   14    0]
 [   7    2   18  116   88   55    0]
 [   5    2    4   25   56  209    0]
 [  23    2    0    0    0    1  352]]
Classification Report
              precision    recall  f1-score   support

    1_colony       0.96      0.99      0.97      4285
  2_colonies       0.87      0.88      0.88      1632
  3_colonies       0.77      0.78      0.77      1089
  4_colonies       0.62      0.57      0.59       550
  5_colonies       0.49      0.31      0.38       286
  6_colonies       0.74      0.69      0.72       301
     outlier       0.99      0.93      0.96       378

    accuracy                           0.88      8521
   macro avg       0.78      0.74      0.75      8521
weighted avg       0.87      0.88      0.88      8521



## Imbalanced Classes

In [22]:
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator,
                    verbose=1, 
                    steps_per_epoch=1000, 
                    epochs=1, 
                    validation_data=validation_generator,
                   validation_steps=300,
                   class_weight=class_weights)

Epoch 1/1


<keras.callbacks.History at 0x193ba8cf0b8>

In [23]:
Y_pred = model.predict_generator(validation_generator, num_test_samples // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
print('Classification Report')
target_names = ['1_colony', '2_colonies', '3_colonies', '4_colonies', '5_colonies', '6_colonies', 'outlier']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))


Confusion Matrix
[[2740 1107  328   27    0   14   25]
 [ 221  446  550   94   13  257   15]
 [ 760   41   21    3    2   45  267]
 [ 552   15    0    0    0    0    1]
 [ 277   10    0    0    0    0    1]
 [ 290   13    0    0    0    0    1]
 [ 378   10    0    0    0    0    0]]
Classification Report
              precision    recall  f1-score   support

    1_colony       0.53      0.65      0.58      4241
  2_colonies       0.27      0.28      0.28      1596
  3_colonies       0.02      0.02      0.02      1139
  4_colonies       0.00      0.00      0.00       568
  5_colonies       0.00      0.00      0.00       288
  6_colonies       0.00      0.00      0.00       304
     outlier       0.00      0.00      0.00       388

    accuracy                           0.38      8524
   macro avg       0.12      0.13      0.13      8524
weighted avg       0.32      0.38      0.34      8524



## Grayscale

In [18]:
batch_size = 32
train_generator = datagen.flow_from_directory(full_training_directory, 
                                              target_size=(128, 128), 
                                              color_mode="grayscale", 
                                              batch_size=batch_size)
validation_generator = datagen.flow_from_directory(full_validation_directory,
                                                   target_size=(128, 128), 
                                                   color_mode='grayscale',
                                                   batch_size=batch_size, 
                                                   shuffle=False)
num_classes = len(train_generator.class_indices)
num_test_samples = 8521

class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

Found 19886 images belonging to 7 classes.
Found 8521 images belonging to 7 classes.


In [7]:
model = Sequential()
model.add(Conv2D(20, (5,5), activation='relu', input_shape=(128,128,1)))
model.add(BatchNormalization(momentum=0.9))
model.add(MaxPooling2D())
model.add(Conv2D(50, (5,5), activation='relu'))
model.add(BatchNormalization(momentum=0.9))
model.add(MaxPooling2D())
model.add(Conv2D(100, (4,4), activation='relu'))
model.add(MaxPooling2D())
model.add(Conv2D(200, (4,4), activation='relu'))
model.add(MaxPooling2D())
model.add(Dropout(rate=0.25))
model.add(Flatten())
model.add(Dense(500))
model.add(Dropout(rate=0.25))
model.add(Dense(num_classes))
model.add(Activation('softmax'))


In [8]:
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
balanced_history = model.fit_generator(train_generator,
                                       verbose=1, 
                                       steps_per_epoch=1000, 
                                       epochs=100 , 
                                       validation_data=validation_generator,
                                      validation_steps=300,
                                      class_weight=class_weights)
model.save('balanced_gray_classes.hdf5')

W0829 19:07:58.671031  6536 deprecation_wrapper.py:119] From C:\Users\acsch\Anaconda3\envs\Tensorflow-GPU\lib\site-packages\keras\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0829 19:07:58.739823  6536 deprecation.py:323] From C:\Users\acsch\Anaconda3\envs\Tensorflow-GPU\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [19]:
model = keras.models.load_model('balanced_gray_classes.hdf5')
Y_pred = model.predict_generator(validation_generator, num_test_samples // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
print('Classification Report')
target_names = ['1_colony', '2_colonies', '3_colonies', '4_colonies', '5_colonies', '6_colonies', 'outlier']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))


Confusion Matrix
[[4205   61    9    3    0    1    6]
 [ 160 1323  140    5    0    2    2]
 [  25  158  782  115    3    5    1]
 [  11   12  142  301   46   38    0]
 [   8    4   16  110   54   94    0]
 [   5    2    4   26   29  233    2]
 [  16    4    0    0    0    0  358]]
Classification Report
              precision    recall  f1-score   support

    1_colony       0.95      0.98      0.97      4285
  2_colonies       0.85      0.81      0.83      1632
  3_colonies       0.72      0.72      0.72      1089
  4_colonies       0.54      0.55      0.54       550
  5_colonies       0.41      0.19      0.26       286
  6_colonies       0.62      0.77      0.69       301
     outlier       0.97      0.95      0.96       378

    accuracy                           0.85      8521
   macro avg       0.72      0.71      0.71      8521
weighted avg       0.84      0.85      0.85      8521



In [10]:
balanced_history

<keras.callbacks.History at 0x1bff5b65d68>

In [14]:
full_validation_directory

'F:\\Segmentation_Data\\Labelled_imgs\\stratified_data\\validation\\'