# #0 Create dataset

In [None]:
import os 
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import glob

### Useful functions

In [None]:
# get average dimensions of lice from dataset
lices = glob.glob('/root/data/lice_detection/0_lice_classification/*/*.jpg')
print(len(lices))
average_size = np.zeros((1, 2))
max_height = 0
max_width = 0
for lice in lices:
    width, height = Image.open(lice).size
    if width > max_width:
        max_width = width
    if height > max_height:
        max_height = height
    average_size += np.array(Image.open(lice).size)
    
average_size = average_size[0]
average_size = average_size[0]/len(lices), average_size[1]/len(lices)
print(average_size)
print(max_width, max_height)

In [None]:
def IoU(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
 
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
 
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
 
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)
 
    # return the intersection over union value
    return iou

In [None]:
def chunks(img, template, bboxes, thresh):
    """takes a image and cut it into chunks
    - img: (H, W, 3) numpy array
    - template: (Ht, Wt) tuple
    - bboxes: bboxes coordinates
    output:
    - chunks: list of N (Ht, Wt, 3) numpy arrays
    - classes: list N integers with class id"""
    height, width, channels = img.shape
    new_height = template[0]*((height // template[0])+1) if height % template[0] != 0 else height
    new_width = template[1]*((width // template[1])+1) if width % template[1] != 0 else width
    
    # pad the array
    padded_img = np.zeros((new_height, new_width, channels), dtype=np.uint8)
    padded_img[:height, : width, :] = img
    # print(padded_img.shape)
    # cut it into chunks
    chunks = []
    classes = []
    for i in range(new_height // template[0]):
        for j in range(new_width // template[1]):
            chunk = padded_img[(i*template[0]):((i+1)*template[0]), 
                               (j*template[1]):((j+1)*template[1]),
                               :]
            bbox_chunk = [i*template[0], j*template[1], (i+1)*template[0], (j+1)*template[1]]
            # class id: 0 not lice, 1 lice
            class_id = 0
            for bbox in bboxes:
                iou = IoU(bbox, bbox_chunk)
                if iou > thresh:
                    class_id = 1
            if class_id == 0:
                dice = np.random.rand()
                if dice > 0.8:
                    classes.append(class_id)
                    chunks.append(chunk) 
            else:   
                classes.append(class_id)
                chunks.append(chunk)    
    return chunks, classes     
    
    

### Main code

In [None]:
import csv

In [None]:
base_dir = '/root/data/lice_detection/2_chunk_classification'
subdirs = ['train', 'val']
subsubdirs = ['lice', 'not_lice']
if not os.path.isdir(base_dir):
    os.makedirs(base_dir)
for sd in subdirs:
    for ssd in subsubdirs:
        sub = os.path.join(base_dir, sd, ssd)
        if not os.path.isdir(sub):
            os.makedirs(sub)

In [None]:
# load annotations
dataset = []
with open('/root/data/lice_detection/lice_dataset_fish_only.csv', 'r') as f:
    reader = csv.reader(f, delimiter=',', quoting=csv.QUOTE_MINIMAL)
    for row in reader:
        dataset.append(row)

In [None]:
print(len(dataset))

In [None]:
# change format
lice_dic = {}
for data in dataset:
    if data[0] not in lice_dic:
        lice_dic[data[0]] = []
    lice_dic[data[0]].append([int(d) for d in data[1:5]])

In [None]:
template = (100, 100)
thresh = 0.0

In [None]:
total = 0
for (path, bboxes) in lice_dic.iteritems():
    print(path)
    img = np.array(Image.open(path))
    index = 0
    code = path.split('/')[-1].split('.')[0].split('_')[-1]
    # print(img.shape, bboxes)
    chunks_list, classes  = chunks(img, template, bboxes, thresh)
    print(len(chunks_list), np.sum(classes))
    for (chunk, cl) in zip(chunks_list, classes):
        if np.random.rand() > 0.9:
            data_type = 'val'
        else:
            data_type = 'train'
        if cl == 0:
            dice = np.random.rand()
            if dice > 0.5:
                chunk_path = os.path.join(base_dir, data_type, 'not_lice', code + '_' + str(index) + '.jpg')
                Image.fromarray(chunk).save(chunk_path)
        else:
            chunk_path = os.path.join(base_dir, data_type, 'lice', code + '_' + str(index) + '.jpg')
            Image.fromarray(chunk).save(chunk_path)
        index += 1

In [None]:
print("Image in train: {} lices, {} not lices".format(
    len(os.listdir('/root/data/lice_detection/2_chunk_classification/train/lice/')),
    len(os.listdir('/root/data/lice_detection/2_chunk_classification/train/not_lice/'))))

In [None]:
print("Image in val: {} lices, {} not lices".format(
    len(os.listdir('/root/data/lice_detection/2_chunk_classification/val/lice/')),
    len(os.listdir('/root/data/lice_detection/2_chunk_classification/val/not_lice/'))))

# #1 Look at the data

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import os
import glob

In [None]:
lice_chunks = glob.glob('/root/data/lice_detection/2_chunk_classification/*/lice/*.jpg')
print(len(lice_chunks))

In [None]:
random_lice_chunks = np.random.choice(lice_chunks, 10)
for rlc in random_lice_chunks:
    plt.imshow(np.array(Image.open(rlc)))
    plt.show()

# #2 Train a quick algorithm

In [None]:
import keras
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from keras.applications import vgg16
from keras.models import Model
from keras.optimizers import Adam
import numpy as np
from PIL import Image
import os
import glob

In [None]:
img_input = Input(shape=(100, 100, 3))

# Block 1
x = Conv2D(32, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = Conv2D(32, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

# Block 2
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

# Block 3
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

x = Flatten(name='flatten')(x)
x = Dense(100, activation='relu', name='fc1')(x)
x = Dense(2, activation='softmax', name='predictions')(x)

In [None]:
model = Model([img_input], [x])

In [None]:
adam = Adam(lr=1e-3)
model.compile(adam, loss="categorical_crossentropy", metrics=['categorical_accuracy', 'top_k_categorical_accuracy'])

In [None]:
BATCH_SIZE = 16
INPUT_SHAPE = (100, 100, 3)
train_steps = 100
val_steps = 10

In [None]:
def myGenerator(data_type, steps_per_epoch, BATCH_SIZE, INPUT_SHAPE):
    i = 0
    img_size = INPUT_SHAPE[0]
    while True:
        x_batch = np.empty((BATCH_SIZE, INPUT_SHAPE[0], INPUT_SHAPE[1], INPUT_SHAPE[2]))
        y_batch = np.empty((BATCH_SIZE, 2))
        for (ind, j) in enumerate(range(i*BATCH_SIZE, (i+1)*BATCH_SIZE)):
            # roll dice
            dice = np.random.rand()
            if dice > 0.5:
                class_type = 'not_lice'
                chunk = np.random.choice(glob.glob('/root/data/lice_detection/2_chunk_classification/{}/{}/*.jpg'
                                                   .format(data_type, class_type)))
                xb = np.array(Image.open(chunk))
                yb = 0
            else:
                class_type = 'lice'
                chunk = np.random.choice(glob.glob('/root/data/lice_detection/2_chunk_classification/{}/{}/*.jpg'
                                                    .format(data_type, class_type)))
                xb = np.array(Image.open(chunk))
                yb = 1
                
            x_batch[ind,...] = xb
            y_batch[ind,...] = yb
        # bunch of augmentation

        i += 1
        if i >= steps_per_epoch:
            i = 0
        yield x_batch, y_batch

In [None]:
# create the generators
train_generator = myGenerator("train", train_steps, BATCH_SIZE, INPUT_SHAPE)
validation_generator = myGenerator("val", val_steps, BATCH_SIZE, INPUT_SHAPE)

In [None]:
X,Y = train_generator.next()

In [None]:
Y.shape

In [None]:
# start training
history = model.fit_generator(
        generator=train_generator,
        steps_per_epoch=train_steps,
        epochs=50,
        verbose=1,
        validation_data=validation_generator,
        validation_steps=len())