In [6]:
import os
import random
import shutil

import numpy as np
import tensorflow as tf
from keras_preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
%matplotlib inline


# Cats and dogs with MobileNets

## Comparing MobileNets to other models
To give a quick comparison in regards to size, the size of the full VGG16 network on disk is about 553 megabytes. The size of one of the currently largest MobileNets is about 17 megabytes, so that is a huge difference, especially when you think about deploying a model to a mobile app or running it in the browser.

| Model      | Size     | Parameters    |
| ---------- | -------- | ------------- |
| VGG16      | `553 MB` | `138,000,000` |
| Mobile Net | `17 MB`  | `4,200,000`   |

This vast size difference is due to the number of parameters within these networks. For example, VGG16 has 138 million parameters, while the 17 megabyte MobileNet we just mentioned has only 4.2 million.

### GPU setup


In [2]:
print("Built with CUDA: ", tf.test.is_built_with_cuda())
gpus = tf.config.experimental.list_physical_devices('GPU')
print("Num GPU available", len(gpus))
tf.config.experimental.set_memory_growth(gpus[0], True)

Built with CUDA:  True
Num GPU available 1


## MobileNet model building

In [3]:
mn_model = tf.keras.applications.mobilenet.MobileNet()

## Image preparation


In [3]:

def prepare_image(file_path):
    img = image.load_img(file_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array_expanded_dims = np.expand_dims(img_array, axis=0)
    return tf.keras.applications.mobilenet.preprocess_input(img_array_expanded_dims)


In [31]:
full_dir = 'data/full'
split_dir = 'data/split'
train_dir = f'{split_dir}/train'
valid_dir = f'{split_dir}/valid'
test_dir = f'{split_dir}/test'

def split_images():

    script_folder = globals()['_dh'][0]
    print('Siamo nella cartella:', script_folder)

    if os.path.isdir('train/0') is False:
        print('Cartella con immagini suddivise non trovata, vengono create le immagini.')

        # Da 'data' faro' tutte le operazioni senza spostarmi
        # os.chdir(script_folder)
        # os.chdir('data')

        # Copio tutte le immagini originali per evitare di rovinare il dataset
        shutil.copytree(full_dir, train_dir)
        os.makedirs(valid_dir)
        os.makedirs(test_dir)

        # for current_dir in os.listdir(train_dir):
        #     print('Directory: ', current_dir)
        for directory in os.listdir(train_dir):
            cur_dir = f'{train_dir}/{directory}'
            os.makedirs(f'{valid_dir}/{directory}')
            os.makedirs(f'{test_dir}/{directory}')
            valid_samples = random.sample(os.listdir(cur_dir), 30)
            for sample in valid_samples:
                shutil.move(f'{cur_dir}/{sample}', f'{valid_dir}/{directory}/{sample}')

            test_samples = random.sample(os.listdir(cur_dir), 5)
            for sample in test_samples:
                shutil.move(f'{train_dir}/{directory}/{sample}', f'{test_dir}/{directory}/{sample}')

        # Ritorno nella cartella iniziale
        # os.chdir(script_folder)
    else:
        print('Cartella con immagini suddivise trovata, salto la creazione')

split_images()

Siamo nella cartella: C:\Workspace\keras-deeplizard\sign-language
Cartella con immagini suddivise non trovata, vengono create le immagini.


In [37]:
preprocessing_function = tf.keras.applications.mobilenet.preprocess_input
target_size = (224,224)
batch_size = 10

train_batches = ImageDataGenerator(preprocessing_function=preprocessing_function).flow_from_directory(
    directory=train_dir,
    target_size=target_size,
    batch_size=batch_size
)
valid_batches = ImageDataGenerator(preprocessing_function=preprocessing_function).flow_from_directory(
    directory=valid_dir,
    target_size=target_size,
    batch_size=batch_size
)
# Non vogliamo che il test_batch sia mescolato perch√® vogliamo potervi accedere quando facciamo la
# rappresentazione grafica tramite confusion matrix
test_batches = ImageDataGenerator(preprocessing_function=preprocessing_function).flow_from_directory(
    directory=test_dir,
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False
)

assert train_batches.n == 1712
assert valid_batches.n == 300
assert test_batches.n == 50
assert train_batches.num_classes == valid_batches.num_classes == test_batches.num_classes == 10

Found 1712 images belonging to 10 classes.
Found 300 images belonging to 10 classes.
Found 50 images belonging to 10 classes.
