# images

In [None]:
import glob
from collections import Counter

In [None]:
images = glob.glob("/root/data/lice-data/lice_only/*/*/*/*/*.jpg")
images += glob.glob('/root/data/lice-data/declassified/*')

In [None]:
print("total number of images: {}".format(len(images)))

In [None]:
count = Counter([img.split('/')[-2] for img in images])
print(count)

# generator

In [None]:
import keras

In [None]:
import glob
import json
import os

import cv2
import matplotlib.pyplot as plt
import numpy as np
import skimage.io as io
from albumentations import PadIfNeeded, HorizontalFlip, VerticalFlip, Compose, RandomRotate90
from keras.callbacks import Callback


CLASS_MAP = {"ADULT_FEMALE": 0,
             "FIXED_MOVING": 1,
             "GARBAGE": 2}

In [None]:
def get_data_dict(data):
    """quick hack for uniform sampling during training"""
    classdict = {}
    for d in data:
        liceclass = d.split('/')[-2]
        if liceclass == "UNSURE":
            continue
        if liceclass == "MOVING" or liceclass == "FIXED":
            liceclass = "FIXED_MOVING"
        if liceclass not in classdict:
            classdict[liceclass] = []
        classdict[liceclass].append(d)
    return classdict

# load model

In [None]:
import os

In [None]:
!nvidia-smi

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
import random
from collections import Counter
from tqdm import tqdm

In [None]:
random.seed(258)

In [None]:
batch_size = 32
input_shape = [64, 64, 3]
random.shuffle(images)
cutoff = int(0.8*len(images))

In [None]:
def load_lice(img_path):
    '''load lice'''
    aug = Compose([PadIfNeeded(p=1.0, min_height=input_shape[0], min_width=input_shape[1], border_mode=0),
               HorizontalFlip(p=0.5),
               RandomRotate90(p=0.5)])
    image = cv2.imread(img_path)
    height, width, _ = image.shape
    if height > width:
        ratio = input_shape[0] / float(height)
        x = cv2.resize(image, (int(width*ratio), input_shape[1]), interpolation=cv2.INTER_LINEAR)
    else:
        ratio = input_shape[1] / float(width)
        x = cv2.resize(image, (input_shape[0], int(height*ratio)), interpolation=cv2.INTER_LINEAR)
    augmented = aug(image=x)
    x = augmented["image"]
    return x 

In [None]:
# def load_random_patch(ppath):
#     '''crop to mimic the adult female lice and add some augmentation'''
#     patch = cv2.imread(ppath) #64x64 patch   
#     aug = Compose([PadIfNeeded(p=1.0, min_height=input_shape[0], min_width=input_shape[1], border_mode=0),
#                    HorizontalFlip(p=0.5),
#                    RandomRotate90(p=0.5)])
#     augmented = aug(image=patch)
#     x = augmented["image"]
#     return x 

In [None]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, classdict, batch_size):
        'init stuff'
        self.classdict = classdict
        self.batch_size = batch_size
        self.on_epoch_end()
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(len(self.classdict['ADULT_FEMALE'])*3 / self.batch_size)
    
    def __getitem__(self, index):
        'Generate one batch of data'
        xbatch = []
        ybatch = []
        c = 0
        start = index*self.batch_size
        end = (index+1)*self.batch_size
        # print('#'*30)
        # print('index: {}'.format(index))
        for i in range(start, end, 1):
            # print('{}/{}'.format(i, end))
            image_index = int(i/3)
            if c == 0:
                img_path = self.classdict['ADULT_FEMALE'][image_index]
                x = load_lice(img_path)
                y = [1, 0, 0]
                c += 1
                
            elif c == 1:
                img_path = self.classdict['FIXED_MOVING'][image_index]
                x = load_lice(img_path)
                y = [0, 1, 0]
                c += 1
                
            elif c == 2:
                # to be fair, it would be better to take any random image 
                # but overcomplicates the code for no reasons
                img_path = self.classdict['declassified'][image_index]
                x = load_lice(img_path)
                y = [0, 0, 1]
                c = 0
            
            xbatch.append(x)
            ybatch.append(y)
        return np.array(xbatch), np.array(ybatch)
    
    def on_epoch_end(self):
        for v in self.classdict.values():
            random.shuffle(v)
        return

In [None]:
train_data = images[:cutoff]
train_dict = get_data_dict(train_data)
train_generator = DataGenerator(train_dict, batch_size)
steps_per_epoch = train_generator.__len__()
# config["train_dataset_size"] = len(train_data)
print([(k, len(v)) for (k,v) in train_dict.items()])
print("Train dataset size: {}. Steps per epoch: {}".format(len(train_data), steps_per_epoch))

# validation data
validation_data = images[cutoff:]
validation_dict = get_data_dict(validation_data)
validation_generator = DataGenerator(validation_dict, batch_size)
val_steps_per_epoch = validation_generator.__len__()
# config["val_dataset_size"] = len(validation_data)
print([(k, len(v)) for (k,v) in validation_dict.items()])
print("Val dataset size: {}. Steps per epoch: {}".format(len(validation_data), val_steps_per_epoch))

In [None]:
for i in range(train_generator.__len__()):
    x, y = train_generator[i]
#     print([(np.argmax(y[j]), a.shape) for (j,a) in enumerate(x)])

# PLOT

In [None]:
import matplotlib.pyplot as plt

In [None]:
X, Y = validation_generator[0]

In [None]:
# for i in range(batch_size):
#     plt.imshow(X[i, ...])
#     plt.show()
#     print(Y[i, ...])

# TRAIN

In [None]:
from keras.applications.resnet50 import ResNet50
# from keras.applications.mobilenet import _depthwise_conv_block
from utils import depthwise_conv_block, conv_block
from keras import layers
from keras.models import Model
from keras.callbacks import  ModelCheckpoint
from keras import Sequential
from keras.optimizers import Adam

In [None]:
# resnet50 = ResNet50(include_top=False, weights='imagenet', input_shape=input_shape)
# x = layers.GlobalAveragePooling2D(name='avg_pool')(resnet50.output)
# x = layers.Dense(2, activation='softmax', name='fc1000')(x)
# model = Model(inputs=[resnet50.input], outputs=[x])

In [None]:
# img_input = layers.Input(shape=input_shape)
# # Block 1
# x = layers.Conv2D(64, (3, 3),
#       activation='relu',
#       padding='same',
#       name='block1_conv1')(img_input)
# x = layers.Conv2D(64, (3, 3),
#       activation='relu',
#       padding='same',
#       name='block1_conv2')(x)
# x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

# # Block 2
# x = layers.Conv2D(128, (3, 3),
#       activation='relu',
#       padding='same',
#       name='block2_conv1')(x)
# x = layers.Conv2D(128, (3, 3),
#       activation='relu',
#       padding='same',
#       name='block2_conv2')(x)
# x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

# x = layers.Flatten(name='flatten')(x)
# x = layers.Dense(4096, activation='relu', name='fc1')(x)
# x = layers.Dense(3, activation='softmax', name='predictions')(x)

In [None]:
classes = 3
alpha = 0.5
dropout = 1e-3
depth_multiplier = 1
shape = (1, 1, int(512 * alpha))

In [None]:
img_input = layers.Input(shape=input_shape)
x = conv_block(img_input, 32, alpha, strides=(2, 2))
x = depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)

x = depthwise_conv_block(x, 128, alpha, depth_multiplier,
                          strides=(2, 2), block_id=2)
x = depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)

x = depthwise_conv_block(x, 256, alpha, depth_multiplier,
                          strides=(2, 2), block_id=4)
x = depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)

x = depthwise_conv_block(x, 512, alpha, depth_multiplier,
                          strides=(2, 2), block_id=6)
x = depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
x = depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
x = depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
x = depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
x = depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)

x = layers.GlobalAveragePooling2D()(x)
x = layers.Reshape(shape, name='reshape_1')(x)
x = layers.Dropout(dropout, name='dropout')(x)
x = layers.Conv2D(classes, (1, 1),
                  padding='same',
                  name='conv_preds')(x)
x = layers.Activation('softmax', name='act_softmax')(x)
x = layers.Reshape((classes,), name='reshape_2')(x)
model = Model(inputs=[img_input], outputs=[x])

In [None]:
adam = Adam(lr=1e-3)
model.compile(adam, loss="categorical_crossentropy", metrics=["categorical_accuracy"])

In [None]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
import math

In [None]:
def step_decay(epoch):
    initial_lrate = 1e-3
    drop = 0.5
    epochs_drop = 10.0
    lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
    return lrate
lrate = LearningRateScheduler(step_decay)

In [None]:
chkpt = ModelCheckpoint('/root/data/models/lice-classification/QAPQ/v1/model.{epoch:02d}.hdf5', 
                monitor='val_loss', 
                verbose=0, 
                save_best_only=False, 
                save_weights_only=False, 
                mode='auto', 
                period=1)


In [None]:
history = model.fit_generator(generator=train_generator,
                              # steps_per_epoch=steps_per_epoch // 10,
                              epochs=70,
                              verbose=1,
                              validation_data=validation_generator,
                              callbacks=[lrate, chkpt]
                              # validation_steps=val_steps_per_epoch // 10
                             )

In [None]:
h = history.history

In [None]:
for (k,v) in h.items():
    h[k] = [float(vi) for vi in v]

In [None]:
with open('/root/data/models/lice-classification/QAPQ/v1/history.json', 'w') as f:
    json.dump(h, f)

In [None]:
plt.plot(h["loss"])
plt.plot(h["val_loss"])
plt.legend(["loss", "val_loss"])
# plt.ylim([0, 2])
plt.show()

In [None]:
plt.plot(h["categorical_accuracy"])
plt.plot(h["val_categorical_accuracy"])
plt.legend(["acc", "val_acc"])
plt.show()

In [None]:
np.argmin(h["val_loss"])

In [None]:
np.argmax(h["val_categorical_accuracy"])

# confusion matrix

In [None]:
# model = keras.models.load_model('/root/data/models/lice-classification/QAPQ/model.46.hdf5')

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
y_true_all = []
y_pred_all = []
for i in tqdm(range(val_steps_per_epoch)):
    X, Y_true = validation_generator[i]
    Y_pred = model.predict_on_batch(X)
    for j in range(batch_size):
        y_true_all.append(np.argmax(Y_true[j, :]))
        y_pred_all.append(np.argmax(Y_pred[j, :]))

In [None]:
# CLASS_MAP = {"ADULT_FEMALE": 2,
#              "FIXED_MOVING": 1,
#              "UNSURE": 0}

In [None]:
confusion_matrix(y_true_all, y_pred_all)

# config

In [None]:
config = {'input_size': (64, 64, 3),
          'output_size': [3],
          'description': 'lice classification',
          'labels': ['ADULT_FEMALE', 'FIXED_MOVING', 'NOT_LICE']}

In [None]:
with open('/root/data/models/lice-classification/QAPQ/v1/config.json', 'w') as f:
    json.dump(config, f)