In [1]:
from contextlib import suppress
import numpy as np
import os
import warnings
from zipfile import ZipFile

from PIL import Image
from skimage.io import imread, imsave
from skimage.transform import resize

from keras.preprocessing.image import ImageDataGenerator
from keras import applications
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense

from sklearn.metrics import accuracy_score, classification_report

Using TensorFlow backend.


# Data Preparation

### Download the [UC Merced Land Use dataset](http://vision.ucmerced.edu/datasets/landuse.html)

### Extract image files from the zipped archive, if necessary

In [2]:
with suppress(FileExistsError):
    os.mkdir('data')
source_dir = os.path.join('data', 'UCMerced_LandUse', 'Images')

# Download the zipped dataset from http://vision.ucmerced.edu/datasets/landuse.html 
if not os.path.isdir(source_dir):
    with ZipFile('UCMerced_LandUse.zip') as z:
        z.extractall(path='data')

In [3]:
# Set random seed for reproducibility
np.random.seed(2017)

### Randomly assign each image to train, validate, or test folder, segregated by class name

In [4]:
"""
Create image directory hierarchy that looks like this:
./data/transformed/
                   train/
                         agriculture/
                         airplane/
                         ...
                   validate/
                         agriculture/
                         airplane/
                         ...
                   test/
                         agriculture/
                         airplane/
                         ...
"""

# Collect class names from directory names in './data/UCMerced_LandUse/Images/'
class_names = os.listdir(source_dir)    

# Create path to image "flow" base directory
flow_base = os.path.join('data', 'flow')

# Create pathnames to train/validate/test subdirectories
target_dirs = {target: os.path.join(flow_base, target) for target in ['train', 'validate', 'test']}

if not os.path.isdir(flow_base):

    # Make new directories
    os.mkdir(flow_base)
    
    for target in ['train', 'validate', 'test']:
        target_dir = os.path.join(flow_base, target)
        os.mkdir(target_dir)
        for class_name in class_names:
            class_subdir = os.path.join(target_dir, class_name)
            os.mkdir(class_subdir)

    # Copy images from ./data/UCMerced_LandUse/Images to ./data/flow/<train, validate, test> directories
    
    warnings.simplefilter('ignore', UserWarning)  # suppress low-contrast warning from skimage.io.imsave
    for root, _, filenames in os.walk(source_dir):
        if filenames:
            class_name = os.path.basename(root)

            # Randomly shuffle filenames
            filenames = np.random.permutation(filenames)
            for target, count in [('train', 80), ('validate', 10), ('test', 10)]:
                target_dir = os.path.join(flow_base, target, class_name)
                for filename in filenames[:count]:
                    filepath = os.path.join(root, filename)
                    image = imread(filepath)
                    basename, _ = os.path.splitext(filename)
                    # Convert TIF to PNG to work with Keras ImageDataGenerator.flow_from_directory
                    target_filename = os.path.join(target_dir, basename + '.png')
                    imsave(target_filename, image)
            
                filenames = filenames[count:]
    # Show future warnings during development
    warnings.resetwarnings()

# Get training set bottleneck features from pretrained CNN

In [5]:
image_data_gen = ImageDataGenerator()

In [7]:
def extract_bottleneck_features(model, dataset='train', batch_size=32):
    """
    Extract botteleneck features for the input dataset (train/validate/test)
    by predicting on the convolutional portion only of a pretrained model.
        
    Inputs:
        model: Pre-trained deep learning model, excluding fully-connected top model
               e.g. applications.VGG16(include_top=False, weights='imagenet')
        dataset = string label for dataset image directory ['train', 'validate', 'test']
    
    Return:
        Return bottleneck features as numpy.array
    """
    image_size = (256, 256)
    image_data_gen = ImageDataGenerator(rescale=1.0/255)
    image_generator = image_data_gen.flow_from_directory(target_dirs[dataset],
                                                         batch_size=batch_size,
                                                         target_size=image_size,
                                                         shuffle=False
                                                         )
    
    print(f'Generating "{dataset}" bottleneck predictions')
    
    image_count = 0
    X_batches = []
    Y_batches = []
    for n, (X, Y) in enumerate(image_generator, start=1):
        X_batches.append(model.predict_on_batch(X))
        Y_batches.append(Y)
        image_count += X.shape[0]
        # Must interrupt image_generator
        if image_count >= image_generator.n:
            break
    
    X = np.concatenate(X_batches)
    Y = np.concatenate(Y_batches)
    
    print(f'   Features of shape {X.shape} extracted for model "{model.name}"')
    return X, Y

### Select a pre-trained model from the Keras.applications module; e.g. Xception, VGG16 ...

In [8]:
# Xception V1 is a smaller-footprint model with high accuracy
pretrained_model = applications.Xception(include_top=False, weights='imagenet')

### Extract bottleneck features for each dataset: train, validate, and test

In [9]:
num_classes = len(class_names)
X, Y = dict(), dict()
for dataset in ['train', 'validate', 'test']:
    # Extract bottleneck features from pretrained model, predicting on images from "dataset" directory
    X[dataset], Y[dataset]  = extract_bottleneck_features(pretrained_model, dataset)

Found 1680 images belonging to 21 classes.
Generating "train" bottleneck predictions
   Features of shape (1680, 8, 8, 2048) extracted for model xception
Found 210 images belonging to 21 classes.
Generating "validate" bottleneck predictions
   Features of shape (210, 8, 8, 2048) extracted for model xception
Found 210 images belonging to 21 classes.
Generating "test" bottleneck predictions
   Features of shape (210, 8, 8, 2048) extracted for model xception


# Train a fully-connected model using bottleneck features

In [10]:
def build_fully_connected(input_shape, num_classes):
    """
    Create a fully-connected model to train or test on UC Merced dataset.
    """
    model = Sequential()
    model.add(Flatten(input_shape=input_shape))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    return model

In [11]:
# Build, compile, and fit the model

model = build_fully_connected(input_shape=X['train'].shape[1:], num_classes=num_classes)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit(X['train'], Y['train'], batch_size=64, epochs=15,
          verbose=2, validation_data=(X['validate'], Y['validate'])
         )

# Save model weights for test dataset predictions
fit_model_weights = model.get_weights()

Train on 1680 samples, validate on 210 samples
Epoch 1/15
18s - loss: 7.6135 - acc: 0.4244 - val_loss: 5.1898 - val_acc: 0.6000
Epoch 2/15
16s - loss: 4.0794 - acc: 0.6470 - val_loss: 1.8674 - val_acc: 0.7810
Epoch 3/15
17s - loss: 1.6379 - acc: 0.7530 - val_loss: 0.9444 - val_acc: 0.8143
Epoch 4/15
17s - loss: 0.8143 - acc: 0.7905 - val_loss: 0.6842 - val_acc: 0.8333
Epoch 5/15
17s - loss: 0.5543 - acc: 0.8446 - val_loss: 0.7035 - val_acc: 0.8476
Epoch 6/15
16s - loss: 0.5313 - acc: 0.8750 - val_loss: 0.8170 - val_acc: 0.8333
Epoch 7/15
17s - loss: 0.4102 - acc: 0.8768 - val_loss: 0.6724 - val_acc: 0.8524
Epoch 8/15
17s - loss: 0.3877 - acc: 0.8982 - val_loss: 0.5629 - val_acc: 0.8762
Epoch 9/15
17s - loss: 0.3175 - acc: 0.9149 - val_loss: 0.6079 - val_acc: 0.8619
Epoch 10/15
17s - loss: 0.2748 - acc: 0.9226 - val_loss: 0.6349 - val_acc: 0.8667
Epoch 11/15
17s - loss: 0.2932 - acc: 0.9173 - val_loss: 0.5461 - val_acc: 0.8905
Epoch 12/15
17s - loss: 0.2580 - acc: 0.9292 - val_loss: 0.6

###  Validation accuracy ~ 88%. Validation loss has flattened; stop to avoid overfitting.

### Evaluate the model on the test images

In [12]:
# Start by building the same fully-connected model
model = build_fully_connected(input_shape=X['test'].shape[1:], num_classes=num_classes)

# Load weights from the model fit on the training data
model.set_weights(fit_model_weights)

# Predict on the test images
y_pred = model.predict_classes(X['test'], verbose=0)

### Print classification results

In [13]:
y_test = np.nonzero(Y['test'])[1]
accuracy = accuracy_score(y_test, y_pred)
print(f'Model predication accuracy: {accuracy:.3f}')
print(f'\nClassification report:\n {classification_report(y_test, y_pred)}')

Model predication accuracy: 0.890

Classification report:
              precision    recall  f1-score   support

          0       1.00      0.90      0.95        10
          1       1.00      1.00      1.00        10
          2       0.90      0.90      0.90        10
          3       1.00      1.00      1.00        10
          4       0.88      0.70      0.78        10
          5       1.00      1.00      1.00        10
          6       0.88      0.70      0.78        10
          7       1.00      0.90      0.95        10
          8       1.00      1.00      1.00        10
          9       0.91      1.00      0.95        10
         10       1.00      1.00      1.00        10
         11       0.53      0.90      0.67        10
         12       0.70      0.70      0.70        10
         13       0.75      0.90      0.82        10
         14       0.90      0.90      0.90        10
         15       1.00      0.90      0.95        10
         16       0.91      1.00      0

###  Test accuracy ~ 89% vs. ~ 88% validation accuracy. Impressive for such a small image dataset.