In [7]:
import numpy as np
import os.path
import pandas as pd
from sklearn.preprocessing.label import LabelBinarizer
from keras.applications import VGG16
from keras.layers import Dense, Flatten, Dropout
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from keras.optimizers import SGD
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Using feature extraction for image classification

## Extract feature

extract feature from dataset

In [4]:
batch_size = 16
datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = datagen.flow_from_directory(
    'data/train',
    target_size=(150, 150),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)

validation_generator = datagen.flow_from_directory(
    'data/validation',
    target_size=(150, 150),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)


def extract_feature(filename, data_generator, extractor):
    """Extract feature using image data generator
    
    if already have extracted feature load from numpy file
    """
    if os.path.exists(filename):
        return np.load(filename)

    feature = extractor.predict_generator(
        data_generator,
        steps=len(data_generator.filenames),
        verbose=1)

    np.save(filename, feature)
    return feature


# model for feature extraction
vgg16 = VGG16(include_top=False, weights='imagenet', input_shape=(150, 150, 3))

features_train = extract_feature("feature_train.npy", train_generator, vgg16)
features_valid = extract_feature("feature_valid.npy", validation_generator, vgg16)

train_labels = to_categorical(train_generator.classes, num_classes=120)
valid_labels = to_categorical(validation_generator.classes, num_classes=120)

Found 8185 images belonging to 120 classes.
Found 2037 images belonging to 120 classes.


## Create classifier

In [5]:
nsamples, x, y, z = features_train.shape

x_train = features_train.reshape((nsamples , x * y * z))
x_valid = features_valid.reshape((features_valid.shape[0] , x * y * z))

clf = LogisticRegression(multi_class='multinomial', solver='lbfgs')
clf.fit(x_train, train_generator.classes)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=1, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

In [9]:
predict = clf.predict(x_valid)
print('Validation VGG Accuracy {}'.format(accuracy_score(validation_generator.classes, predict)))

Validation VGG Accuracy 0.22385861561119294
