<h1> Mobilenet Model Training </h1>

<h3> In this notebook, we will train a Mobilenet binary classifier that can classify images into "contains fish" and "does not contain fish". </h3>

In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('/root/alok/repos/cv_research/alok')

import glob
import json
import os
import numpy as np
from PIL import Image
from random import shuffle

import keras
from keras.models import Model
from keras.applications.mobilenet import MobileNet
from keras.layers import Input, Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
from keras import backend as K
from keras.backend import tf as ktf

from matplotlib import pyplot as plt
from matplotlib import cm

from skimage.io import imread
from skimage.transform import resize
import time

%matplotlib inline

os.environ["CUDA_VISIBLE_DEVICES"]="1"
np.random.seed(0)

<h1> First step: Prepare the data </h1>

In [None]:
# get data ready

# define input locations
image_dir = '/root/data/alok/filtration_classifier_data/images_resized'
metadata_dir = '/root/data/alok/filtration_classifier_data/labels'
image_fs = glob.glob(os.path.join(image_dir, '*.jpg'))
metadata_fs = glob.glob(os.path.join(metadata_dir, '.json'))

# create full dataset
dataset = []
for image_f in image_fs:
    image_f_name = os.path.basename(image_f)
    metadata_f_name = image_f_name.replace('.jpg', '.json')
    metadata_f = os.path.join(metadata_dir, metadata_f_name)
    if not os.path.exists(metadata_f):
        print('This file path does not exist: {}'.format(metadata_f))
    dataset.append((image_f, metadata_f))
    
# shuffle data
shuffle(dataset)

# define training, validation, and testing sets

n = len(dataset)
training_size = int(n / 2)
# validation_size = int(n / 3)
training_dataset = dataset[:training_size]
# validation_dataset = dataset[training_size: training_size + validation_size]
# testing_dataset = dataset[training_size + validation_size:]
testing_dataset = dataset[training_size:]




In [None]:
# # create digits dataset
# import shutil
# for d in dataset:
#     image_f, metadata_f = d
#     metadata = json.load(open(metadata_f))
#     if metadata['model'] == 'contains_fish':
#         new_class = "fish"
#     else:
#         new_class = "no_fish"
        
#     new_path = "/root/data/alok/filtration_classifier_data/digits/{}/{}".format(new_class, os.path.basename(image_f))
#     shutil.copy(image_f, new_path)

<h1> Define The Model </h1>

In [None]:
# Generate a model with all layers (with top)
mobilenet = MobileNet(input_shape=(224, 224, 3))
x = Dense(1, activation='sigmoid')(mobilenet.layers[-1].output)
model = Model(input=mobilenet.input, output=x)
model.summary()

<h1> Define Generator </h1>

In [None]:
def data_generator(dset, steps_per_epoch, batch_size, input_shape):
    N = len(dset)
    i = 0
    while True:
        x_batch = np.empty((batch_size, *input_shape))
        y_batch = np.empty(batch_size)
        batch = dset[batch_size * i : min(batch_size * (i + 1), N)]
        for idx, d in enumerate(batch):
            image_f, metadata_f = d
            im = Image.open(image_f)
            metadata = json.load(open(metadata_f))
            x_batch[idx, :] = im
            y_batch[idx] = 1 if metadata['model'] == 'contains_fish' else 0
        
        i += 1
        if i > steps_per_epoch:
            i = 0
        yield x_batch, y_batch


<h1> Train Model </h1>

In [None]:
# optimizer = keras.optimizers.adam(lr=0.0001, decay=0.1)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
BATCH_SIZE = 25
steps_per_epoch = int(len(training_dataset)/BATCH_SIZE) + 1
gen = data_generator(training_dataset, steps_per_epoch, BATCH_SIZE, (224, 224, 3))

In [None]:
model.fit_generator(gen, steps_per_epoch=steps_per_epoch, epochs=100, verbose=True)

In [None]:
t = time.time()
eval_gen = data_generator(testing_dataset, np.inf, BATCH_SIZE, (224, 224, 3))
scores = model.evaluate_generator(eval_gen, 41)
print(scores)
print(time.time() - t)

In [None]:
eval_gen = data_generator(testing_dataset, np.inf, BATCH_SIZE, (224, 224, 3))
predictions = model.predict_generator(eval_gen, 41)

In [None]:
false_positive_cnt = 0
false_negative_cnt = 0
true_positive_cnt = 0
true_negative_cnt = 0
for i, p in enumerate(predictions):
    gt = ground_truth_classes[i]
    if gt == 0:
        true_positive_cnt += 1
    elif gt == 1:
        true_negative_cnt += 1
    if (1 if p > 0.5 else 0) != gt:
        if gt == 1:
            false_positive_cnt += 1
            print(p)
        elif gt == 0:
            false_negative_cnt += 1
        

In [None]:
true_positive_cnt

In [None]:
false_negative_cnt

In [None]:
false_positive_cnt

In [None]:
true_negative_cnt

In [None]:
ground_truth_classes = []
for i in range(1025):
    c = json.load(open(testing_dataset[i][1]))['model']
    ground_truth_classes.append(1 if c == 'contains_fish' else 0)

<h1> Quick test </h1>

In [None]:
files = glob.glob('/root/data/alok/filtration_classifier_data/fish_present/images_resized/*.jpg')

In [None]:
adhoc_dataset = []
for i, f in enumerate(files):
    adhoc_dataset.append((f, dataset[i][1]))
    

In [None]:
BATCH_SIZE = 1
adhoc_gen = data_generator(adhoc_dataset, np.inf, BATCH_SIZE, (224, 224, 3))
predictions = model.predict_generator(adhoc_gen, len(adhoc_dataset))

In [None]:
pass_cnt = 0
for p in predictions:
    if p > 0.5:
        pass_cnt += 1

        

In [None]:
pass_cnt

In [None]:
len(predictions)

In [None]:
762 / 25

In [None]:
(750 - 476) / 750.

In [None]:
len(files) / 25

In [None]:
(1500 - 937) / 1500