# Setup
## Import Necessary Modules

In [14]:
import glob
import json
import os
import shutil
import sys
import warnings
warnings.filterwarnings('ignore')

import cv2
import numpy as np
import keras
from keras_preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Activation, Conv2D, Dense, Dropout, Flatten, MaxPooling2D, BatchNormalization
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils import class_weight
import tensorflow as tf

sys.path.append( '../src')
import counting_functions as mcf
import marsican_functions as msn
import augment_functions as maf

## Set up correct paths/folders

In [6]:
if 'darwin' in sys.platform:
    segment_json = "../sample_data/*.json"
    data_directory = 'sample_data/'
    full_data_directory = '../' + data_directory + 'labelled_imgs/'

In [15]:
msn_wd = os.getcwd()
folders_to_make = ['0_colonies',
                   '1_colony', 
                   '2_colonies', 
                   '3_colonies',
                   '4_colonies',
                   '5_colonies',
                   '6_colonies',
                   'outlier']
os.chdir(full_data_directory)
for folder_name in folders_to_make:
    os.mkdir(folder_name)

os.chdir(msn_wd)

In [20]:
data_tags ={
    '1 Colony': '1_colony/',
    '2 Colonies': '2_colonies/',
    '3 Colonies': '3_colonies/',
    '4 Colonies': '4_colonies/',
    '5 Colonies': '5_colonies/',
    '6 Colonies': '6_colonies/',
    'Confluential': 'confluential/',
}

## Getting Blood agar Segment data into the correct folders

### Moving Images to correct folders

In [21]:
msn_wd = os.getcwd()
segment_metadata_loc = glob.glob(segment_json)

In [24]:
with open(segment_metadata_loc[0], 'r') as f:
    segment_metadata_json = f.read()
    segment_pic_metadata = json.loads(segment_metadata_json)

In [28]:
for segment_name, segment_info in segment_pic_metadata.items():
    segment_label = msn.deep_find_dict(segment_info, 'text')
    segment_rel_path = msn.deep_find_dict(segment_info, 'Segment Relative Path')
    segment_name = segment_rel_path.split('/')[1]
    try:
        folder_transferred_to = data_tags[segment_label]
        shutil.copyfile('../sample_data/blood_agar_imgs/' + segment_name,
                        '../sample_data/labelled_imgs/' + folder_transferred_to + segment_name)
    except:
        continue

### Checking our images are there

In [7]:
datagen = ImageDataGenerator(rotation_range=30, 
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             horizontal_flip=True,
                             vertical_flip=True
                            )
datagen.flow_from_directory(full_data_directory, target_size=(128, 128), color_mode="grayscale")

Found 349 images belonging to 8 classes.


<keras_preprocessing.image.directory_iterator.DirectoryIterator at 0x128392320>

Keras has found our data

## Augmenting clear agar and 0-colony data

In [2]:
imgs_0_cols = glob.glob('../sample_data/clear_agar_and_zero_colony_imgs/0_colonies/*.jpg')
imgs_1_col = glob.glob('../sample_data/clear_agar_and_zero_colony_imgs/1_colony/*.jpg')
imgs_2_cols = glob.glob('../sample_data/clear_agar_and_zero_colony_imgs/2_colonies/*.jpg') 

In [3]:
for ex in imgs_0_cols:
    maf.augment_img_both(ex, "../sample_data/labelled_imgs/0_colonies", 
                         k=[0, 1, 2, 3], periods=3, slashes=4)
    
for ex in imgs_1_col:
    maf.augment_img_both(ex, "../sample_data/labelled_imgs/1_colony",
                         k=[0, 1, 2, 3], periods=3, slashes=4)
    
for ex in imgs_2_cols:
    maf.augment_img_both(ex, "../sample_data/labelled_imgs/2_colonies",
                         k=[0, 1, 2, 3], periods=3, slashes=4)

In [8]:
datagen.flow_from_directory(full_data_directory, target_size=(128, 128), color_mode="grayscale")

Found 349 images belonging to 8 classes.


<keras_preprocessing.image.directory_iterator.DirectoryIterator at 0x12838b668>

Once again, our images are found.

## Splitting into train-validation sets.

### Make the necessary directories

In [9]:
msn_wd = os.getcwd()
os.chdir('../sample_data/labelled_imgs')
os.mkdir('stratified_data')
os.chdir("stratified_data")
folders_to_make = ['0_colonies',
                   '1_colony', 
                   '2_colonies', 
                   '3_colonies',
                   '4_colonies',
                   '5_colonies',
                   '6_colonies',
                   'outlier']

os.mkdir('training')
os.mkdir('validation')

msn.make_folders(os.getcwd() + '/training', folders_to_make)
msn.make_folders(os.getcwd() + '/validation', folders_to_make)

os.chdir(msn_wd)

### Pool and split the data into the train-validation sets

In [10]:
msn_wd = os.getcwd()
os.chdir('../sample_data/labelled_imgs')
folders_to_make = ['0_colonies',
                   '1_colony', 
                   '2_colonies', 
                   '3_colonies',
                   '4_colonies',
                   '5_colonies',
                   '6_colonies',
                   'outlier']
msn.train_validation_imgsplit(os.getcwd(),
                              folder_names=folders_to_make,
                              train_size=0.7,
                              training_dir='stratified_data/training/',
                              val_dir='stratified_data/validation/')

os.chdir(msn_wd)

# Training the Model

Keras makes it very easy to set up a sequential neural network. The neural network will be set up according to [this paper](https://www.semanticscholar.org/paper/Bacterial-colony-counting-with-Convolutional-Neural-Ferrari-Lombardi/646cc8ef9bc7b41fb6297c45a092b5628d5da5d0), except using batch normalization instead of local response normalization.

In [5]:
datagen = ImageDataGenerator(rotation_range=30, 
                             width_shift_range=0.2,
                             height_shift_range=0.2,
                             horizontal_flip=True,
                             vertical_flip=True)

if 'darwin' in sys.platform:
    data_directory='../sample_data/labelled_imgs/'
    full_training_directory = data_directory + 'stratified_data/training/'
    full_validation_directory = data_directory +'stratified_data/validation/'

In [30]:
batch_size = 32
train_generator = datagen.flow_from_directory(full_training_directory, 
                                              target_size=(128, 128), 
                                              color_mode="grayscale", 
                                              batch_size=batch_size)
validation_generator = datagen.flow_from_directory(full_validation_directory,
                                                   target_size=(128, 128), 
                                                   color_mode='grayscale',
                                                   batch_size=batch_size, 
                                                   shuffle=False)
num_classes = len(train_generator.class_indices)
NUM_TRAIN_SAMPLES = 246
NUM_TEST_SAMPLES = 106

class_weights = class_weight.compute_class_weight(
           'balanced',
            np.unique(train_generator.classes), 
            train_generator.classes)

Found 246 images belonging to 8 classes.
Found 106 images belonging to 8 classes.


In [18]:
model = Sequential()
model.add(Conv2D(20, (5,5), activation='relu', input_shape=(128,128,1)))
model.add(BatchNormalization(momentum=0.9))
model.add(MaxPooling2D())
model.add(Conv2D(50, (5,5), activation='relu'))
model.add(BatchNormalization(momentum=0.9))
model.add(MaxPooling2D())
model.add(Conv2D(100, (4,4), activation='relu'))
model.add(MaxPooling2D())
model.add(Conv2D(200, (4,4), activation='relu'))
model.add(MaxPooling2D())
model.add(Dropout(rate=0.25))
model.add(Flatten())
model.add(Dense(500, activation='relu')) 
model.add(Dropout(rate=0.25))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

In [19]:
sgd = keras.optimizers.SGD(lr=0.01,
                          momentum=0.9,
                          decay=0.0001)

In [26]:
model.compile(optimizer=sgd, loss='categorical_crossentropy',
              metrics=['accuracy'])
balanced_history = model.fit_generator(train_generator,
                                       verbose=1,
                                       steps_per_epoch=NUM_TRAIN_SAMPLES // batch_size+1,
                                       epochs=5,
                                       validation_data=validation_generator,
                                       validation_steps=num_test_samples // batch_size+1,
                                       class_weight=class_weights)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


This confusion matrix was taken from a [post](https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045) by Ryan Akilos.

In [35]:
validation_generator.reset()
Y_pred = model.predict_generator(validation_generator, NUM_TEST_SAMPLES // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
print('Classification Report')
target_names = ['0_colonies','1_colony', '2_colonies', '3_colonies', '4_colonies', '5_colonies', '6_colonies', 'outlier']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))

Confusion Matrix
[[31  1  4  0  0  0  0  0]
 [ 3 29  0  0  0  0  0  0]
 [ 2 12 13  0  0  0  0  0]
 [ 0  0  2  0  0  0  0  0]
 [ 0  0  2  0  0  0  0  0]
 [ 0  0  2  0  0  0  0  0]
 [ 1  0  1  0  0  0  0  0]
 [ 1  0  2  0  0  0  0  0]]
Classification Report
              precision    recall  f1-score   support

  0_colonies       0.82      0.86      0.84        36
    1_colony       0.69      0.91      0.78        32
  2_colonies       0.50      0.48      0.49        27
  3_colonies       0.00      0.00      0.00         2
  4_colonies       0.00      0.00      0.00         2
  5_colonies       0.00      0.00      0.00         2
  6_colonies       0.00      0.00      0.00         2
     outlier       0.00      0.00      0.00         3

    accuracy                           0.69       106
   macro avg       0.25      0.28      0.26       106
weighted avg       0.61      0.69      0.65       106

