In [None]:
import cv2
import os
from pathlib import Path
import random as rd
import numpy as np
from PIL import Image

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import applications
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras import Model
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam

from hulp_functies import plot_model, plot_images, generate_metadata
rd.seed(42)

### Create directories where you can put the photos
The code in this cell creates a directory `camera` containing two sub-directories: `objects` and `background`. In the sub-directory `objects` you can create new sub-directories for each object that you are going to photograph (say 2-5 sub-directories). In these own sub-directories you have to put about 35 different images of your object. In the sub-directory `background` you have to put about 60 images without the objects. Don't spend too much time taking the pictures. The pictures don't have to be artistic, they just have to be clear and from different angles.

In [None]:
camera_dir = Path('camera')
object_dir = camera_dir / 'objects'
background_fn = 'background'

camera_dir.mkdir(exist_ok=True)
object_dir.mkdir(exist_ok=True)
(camera_dir / background_fn).mkdir(exist_ok=True)

### Resize the pictures
Ensure that the captured images are the size of the neural network (224 x 224 x 3). And split the pictures of the objects into a training set and a test set.

The code in the next two cells reads the images from the `camera` folder, resizes them to the correct format and splits them into a `train` folder and a `test` folder. The result will be in the folder `images_224`.

The images from `train` are used to train the neural network, the images from `test` are used to see how well the training has been successful.

In [None]:
size = 224
channels = 3
input_shape = (size, size, channels)

object_fn = Path('objects.txt')
resize_dir = Path(f'images_{size}')

classes = os.listdir(object_dir)
n_classes = len(classes)
class_nums = {c:i for i,c in enumerate(classes)}

with open(object_fn, 'w') as f:
    f.write('\n'.join(classes))    

In [None]:
(resize_dir / background_fn).mkdir(exist_ok=True, parents=True)
for c in classes:
    (resize_dir / 'train' / c).mkdir(exist_ok=True, parents=True)
    (resize_dir / 'test'  / c).mkdir(exist_ok=True, parents=True) 

for c in classes:
    ims = os.listdir(object_dir / c)
    rd.shuffle(ims)
    Ntest = len(ims) // 5
    for i, im in enumerate(ims):
        dir_name = 'test' if i < Ntest else 'train'
        image = cv2.imread(str(object_dir / c / im))
        image_resized = cv2.resize(image, (size, size))
        cv2.imwrite(str(resize_dir / dir_name / c / f'{str(i)}.png'), image_resized)
            
for i, im in enumerate(os.listdir(camera_dir / background_fn)):
    image = cv2.imread(str(camera_dir / background_fn / im))
    image_resized = cv2.resize(image, (size, size))
    cv2.imwrite(str(resize_dir / background_fn / f'{str(i)}.png'), image_resized)    

### Train the neural network

#### Make the directories
During the training we make a number of models and a number of graphs. These models and graphs are placed in separate directories. The code in the next cell creates those directories.

In [None]:
keras_name = 'model_mobilenet.h5'
lite_name = 'model_mobilenet.tflite'
json_name = 'model_mobilenet.json'

model_dir = Path('models')
saved_model_dir = Path('saved_models')
export_model_dir = Path('export_models')
plot_dir = Path('plots')

model_dir.mkdir(exist_ok=True)
saved_model_dir.mkdir(exist_ok=True)
export_model_dir.mkdir(exist_ok=True)
plot_dir.mkdir(exist_ok=True)

#### Read the data

The images you placed in the directory `camera` have been resized and put in the directory `images_224`. The next two cells read in these images and put them in an array in the working memory of your computer.

In [None]:
def import_objects(p):
    images = {}
    
    for c in classes:
        class_dir = p / c
        im_names = os.listdir(class_dir)
        images[c] = np.zeros((len(im_names), size, size, channels))
        for i, img in enumerate(im_names):
            im = np.asarray(Image.open(class_dir / img))
            images[c][i] = im/255

    data = np.concatenate([images[c] for c in classes], axis=0)
    labels = []
    for c in classes:
        labels += [class_nums[c]]*len(images[c])
    labels = np.array(labels)
    
    return data, labels, to_categorical(labels, n_classes)

def import_background(p, N=60):
    im_names = os.listdir(p)
    NN = min(N, len(im_names))

    npd_images = np.zeros((NN, size, size, channels))
    for i, img in enumerate(im_names[:NN]):
        im = np.asarray(Image.open(p / img))
        npd_images[i] = im/255
    class_npd = np.full((npd_images.shape[0], n_classes), 1/n_classes)

    return npd_images, class_npd

In [None]:
training_data, training_labels, class_train = import_objects(resize_dir / 'train')
test_data, test_labels, class_test = import_objects(resize_dir / 'test')

Ntrain = training_data.shape[0]
Ntest = test_data.shape[0]

npd_train_images, class_npd_train = import_background(resize_dir / background_fn, N=2*Ntrain)

training_data_ext = np.concatenate((training_data, npd_train_images), axis=0)
class_train_ext = np.concatenate((class_train, class_npd_train), axis=0)

#### Make a model
We take a standard model: Mobilenet. This model is designed especially for mobile devices such as smartphones. The only thing left to tell this model is how many types of objects it should be able to distinguish.

In [None]:
base_model = applications.MobileNet(weights='imagenet', include_top=False, input_shape=input_shape)

x = base_model.output
x = Flatten()(x)

predictions = Dense(n_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

#### A function to train the model

In [None]:
def train_model(m, tr_data, tr_class, bs, epochs, lr=1.0e-4, fn=None):
    cp = ModelCheckpoint(str(saved_model_dir / fn),
                         monitor='val_loss',
                         verbose=0, # verbosity - 0 or 1
                         save_best_only= True,
                         mode='auto')

    m.compile(loss='categorical_crossentropy',
              optimizer=Adam(learning_rate=lr),
              metrics = ['accuracy'])
    
    details = m.fit(tr_data, tr_class,
                    batch_size = bs,
                    epochs = epochs,
                    shuffle = True,
                    validation_data= (test_data, class_test),
                    callbacks=[cp],
                    verbose=1)
    if not fn is None:
        m.save(str(model_dir / fn))
    return details

#### Train the model
Now comes the real work: training the model. The batch size is 32, which means that the model looks at 32 images each time and adjusts its weights accordingly. The number of epochs is 6, which means that all images are viewed 6 times.

This step can take a long time, depending on the computing power of your computer.

In [None]:
%%time

batch_size = 32
epochs = 6

model_details = train_model(model, training_data_ext, class_train_ext, bs=batch_size, epochs=epochs, fn=keras_name)

#### Check the training
These graphs show how good the model was after each epoch (an epoch was looking at all training images once).

In [None]:
plot_model(model_details, plot_dir / "model_details.png")

#### Check the result
We can see how good the model is on the test pictures.

In [None]:
class_pred = model.predict(test_data)
labels_pred = np.argmax(class_pred,axis=1)
print(f'accuracy on test set: {100*np.mean(labels_pred==test_labels):.2f}%')

idx = rd.sample(range(Ntest), 12)
plot_images(test_data[idx], test_labels[idx], classes, labels_pred[idx])

#### Convert the model to a tensorflow lite model
In order to use the model in an Android app, it must be converted to a slightly different format. The weights in the model get slightly fewer bits, making the model a little smaller.

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

(model_dir / lite_name).write_bytes(tflite_model)

#### Add metadata 
The Android app still needs to know a few things about the model, such as what the input format is and how many types of objects it should be able to distinguish. This information comes in a separate json file. You can also enter your own name here as the author.

In [None]:
MODEL_INFO = {
    'author' : '<your own name between quotes>',
    'size' : size,
    'classes' : classes,
    'label_fn' : str(object_fn),
    'label_path' : object_fn.resolve(),
    'model_path' : (model_dir / lite_name).resolve(),
    'export_model_path' : (export_model_dir / lite_name).resolve(),
    'json_fn' : export_model_dir / json_name
}
generate_metadata(MODEL_INFO)