<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#Loading-the-data" data-toc-modified-id="Loading-the-data-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Loading the data</a></span><ul class="toc-item"><li><span><a href="#Copying-images-to-training,-validation,-and-test-directories" data-toc-modified-id="Copying-images-to-training,-validation,-and-test-directories-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Copying images to training, validation, and test directories</a></span></li></ul></li><li><span><a href="#Model" data-toc-modified-id="Model-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Model</a></span><ul class="toc-item"><li><span><a href="#Building-the-network" data-toc-modified-id="Building-the-network-2.1"><span class="toc-item-num">2.1&nbsp;&nbsp;</span>Building the network</a></span></li><li><span><a href="#Configure-the-model-for-training" data-toc-modified-id="Configure-the-model-for-training-2.2"><span class="toc-item-num">2.2&nbsp;&nbsp;</span>Configure the model for training</a></span></li></ul></li><li><span><a href="#Data-Preprocessing" data-toc-modified-id="Data-Preprocessing-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Data Preprocessing</a></span></li><li><span><a href="#Fitting-the-model-using-a-batch-generator" data-toc-modified-id="Fitting-the-model-using-a-batch-generator-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Fitting the model using a batch generator</a></span></li></ul></div>

# Loading the data

## Copying images to training, validation, and test directories

In [1]:
import os
import shutil

In [2]:
original_dataset_dir = '/home/abanihi/Documents/deep-data/kaggle/IEEE-camera-model/train'

In [3]:
base_dir = '/home/abanihi/Documents/deep-data/kaggle/IEEE-camera-model/camera_model_small'

In [4]:
# Check to see if base_dir exists, if not create it
if not os.path.exists(base_dir):
    os.mkdir(base_dir)

In [5]:
# Get camera list
cameras = os.listdir(original_dataset_dir)
cameras

['HTC-1-M7',
 'iPhone-6',
 'iPhone-4s',
 'Samsung-Galaxy-Note3',
 'Motorola-Nexus-6',
 'Motorola-Droid-Maxx',
 'LG-Nexus-5x',
 'Sony-NEX-7',
 'Samsung-Galaxy-S4',
 'Motorola-X']

In [6]:
train_dir = os.path.join(base_dir, 'train')
if not os.path.exists(train_dir):
    os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'validation')
if not os.path.exists(validation_dir):
    os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
if not os.path.exists(test_dir):
    os.mkdir(test_dir)

In [7]:
for camera in cameras:
    train_camera_dir = os.path.join(train_dir, camera)
    if not os.path.exists(train_camera_dir):
        os.mkdir(train_camera_dir)

    validation_camera_dir = os.path.join(validation_dir, camera)
    if not os.path.exists(validation_camera_dir):
        os.mkdir(validation_camera_dir)

    test_camera_dir = os.path.join(test_dir, camera)
    if not os.path.exists(test_camera_dir):
        os.mkdir(test_camera_dir)

In [8]:
cameras[0]

'HTC-1-M7'

In [9]:
fnames = ['{}'.format(camera) for camera in cameras]
fnames

['HTC-1-M7',
 'iPhone-6',
 'iPhone-4s',
 'Samsung-Galaxy-Note3',
 'Motorola-Nexus-6',
 'Motorola-Droid-Maxx',
 'LG-Nexus-5x',
 'Sony-NEX-7',
 'Samsung-Galaxy-S4',
 'Motorola-X']

In [10]:
camera_photos_names_prefix = [('HTC-1-M7', 'HTC-1-M7'),
                              ('iPhone-4s', 'iP4s'),
                              ('iPhone-6', 'iP6'),
                              ('LG-Nexus-5x', 'LG5x'),
                              ('Motorola-Droid-Maxx', 'MotoMax'),
                              ('Motorola-Nexus-6', 'MotoNex6'),
                              ('Motorola-X', 'MotoX'),
                              ('Samsung-Galaxy-Note3', 'GalaxyN3'),
                              ('Samsung-Galaxy-S4', 'GalaxyS4'),
                              ('Sony-NEX-7', 'Nex7')]
len(camera_photos_names_prefix)

10

- Copy the first 50 images from each camera to each camera training dir

In [11]:
for camera in camera_photos_names_prefix:
    ext1 = '.jpg'
    ext2 = '.JPG'

    fnames = []
    for i in range(50):
        if camera[0] != 'Sony-NEX-7':

            fnames.append('(' + camera[1] + ')' + str(i + 1) + ext1)

        elif camera[0] == 'Sony-NEX-7':

            fnames.append('(' + camera[1] + ')' + str(i + 1) + ext2)

    for fname in fnames:
        src = os.path.join(os.path.join(
            original_dataset_dir, camera[0]), fname)
        dst = os.path.join(os.path.join(train_dir, camera[0]), fname)
        if not os.path.exists(dst):
            shutil.copy(src, dst)


- Copy the next 25 images from each camera to each camera validation dir

In [12]:
for camera in camera_photos_names_prefix:
    ext1 = '.jpg'
    ext2 = '.JPG'

    fnames = []
    for i in range(50, 75):
        if camera[0] != 'Sony-NEX-7':

            fnames.append('(' + camera[1] + ')' + str(i + 1) + ext1)

        elif camera[0] == 'Sony-NEX-7':

            fnames.append('(' + camera[1] + ')' + str(i + 1) + ext2)

    for fname in fnames:
        src = os.path.join(os.path.join(
            original_dataset_dir, camera[0]), fname)
        dst = os.path.join(os.path.join(validation_dir, camera[0]), fname)
        if not os.path.exists(dst):
            shutil.copy(src, dst)


- Copy the next 25 images from each camera to each camera test dir

In [20]:
for camera in camera_photos_names_prefix:
    ext1 = '.jpg'
    ext2 = '.JPG'

    fnames = []
    for i in range(75, 100):
        if camera[0] != 'Sony-NEX-7':

            fnames.append('(' + camera[1] + ')' + str(i + 1) + ext1)

        elif camera[0] == 'Sony-NEX-7':

            fnames.append('(' + camera[1] + ')' + str(i + 1) + ext2)

    for fname in fnames:
        src = os.path.join(os.path.join(
            original_dataset_dir, camera[0]), fname)
        dst = os.path.join(os.path.join(test_dir, camera[0]), fname)
        if not os.path.exists(dst):
            shutil.copy(src, dst)


As a sanity check, let's count how many pictures are in each training split (train/validation/test):

In [21]:
for camera in cameras:
    camera_dir = os.path.join(train_dir, camera)
    print('Total training {} images: {} '.format(camera, len(os.listdir(camera_dir))))

Total training HTC-1-M7 images: 50 
Total training iPhone-6 images: 50 
Total training iPhone-4s images: 50 
Total training Samsung-Galaxy-Note3 images: 50 
Total training Motorola-Nexus-6 images: 50 
Total training Motorola-Droid-Maxx images: 50 
Total training LG-Nexus-5x images: 50 
Total training Sony-NEX-7 images: 50 
Total training Samsung-Galaxy-S4 images: 50 
Total training Motorola-X images: 50 


In [22]:
for camera in cameras:
    camera_dir = os.path.join(validation_dir, camera)
    print('Total validation {} images: {} '.format(camera, len(os.listdir(camera_dir))))

Total validation HTC-1-M7 images: 25 
Total validation iPhone-6 images: 25 
Total validation iPhone-4s images: 25 
Total validation Samsung-Galaxy-Note3 images: 25 
Total validation Motorola-Nexus-6 images: 25 
Total validation Motorola-Droid-Maxx images: 25 
Total validation LG-Nexus-5x images: 25 
Total validation Sony-NEX-7 images: 25 
Total validation Samsung-Galaxy-S4 images: 25 
Total validation Motorola-X images: 25 


In [23]:
for camera in cameras:
    camera_dir = os.path.join(test_dir, camera)
    print('Total test {} images: {} '.format(camera, len(os.listdir(camera_dir))))

Total test HTC-1-M7 images: 25 
Total test iPhone-6 images: 25 
Total test iPhone-4s images: 25 
Total test Samsung-Galaxy-Note3 images: 25 
Total test Motorola-Nexus-6 images: 25 
Total test Motorola-Droid-Maxx images: 25 
Total test LG-Nexus-5x images: 25 
Total test Sony-NEX-7 images: 25 
Total test Samsung-Galaxy-S4 images: 25 
Total test Motorola-X images: 25 


# Model

## Building the network

In [24]:
from keras import layers
from keras import models

Using TensorFlow backend.


In [29]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

In [30]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 72, 72, 64)        18496     
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 36, 36, 64)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 34, 34, 128)       73856     
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 17, 17, 128)       0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 15, 15, 128)       147584    
__________

## Configure the model for training

In [31]:
from keras import optimizers

In [32]:
model.compile(optimizer=optimizers.RMSprop(lr=1e-4),
              loss = 'categorical_crossentropy',
               metrics=['acc'])


# Data Preprocessing

Currently, the data sits on a drive as JPEG files, so the steps for getting it into the network are roughly as follows:
1. Read the picture files
2. Decode the JPEG content to RGB grids of pixels
3. Convert these into floating point tensors
4. Rescale the pixel values to the ```[0-1]``` range

In [34]:
from keras.preprocessing.image import ImageDataGenerator

In [35]:
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [37]:
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=10,
    class_mode='categorical')

Found 500 images belonging to 10 classes.


In [38]:
validation_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=10,
    class_mode='categorical')

Found 250 images belonging to 10 classes.


In [39]:
for data_batch, labels_batch in train_generator:
    print('data batch shape: ', data_batch.shape)
    print('labels batch shape: ', labels_batch.shape)
    break

data batch shape:  (10, 150, 150, 3)
labels batch shape:  (10, 10)


In [40]:
labels_batch[0]

array([ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.], dtype=float32)

# Fitting the model using a batch generator

In [41]:
%%time
history = model.fit_generator(
    train_generator,
    steps_per_epoch=50,
    epochs=15,
    validation_data=validation_generator,
    validation_steps=25)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15

KeyboardInterrupt: 

In [48]:
models_dir = os.path.join(os.curdir, 'saved_models')
models_dir

'./saved_models'

In [49]:
if not os.path.exists(models_dir):
    os.mkdir(models_dir)

In [47]:
os.listdir(os.curdir)

['01-Training-from-scratch.ipynb', '.ipynb_checkpoints']