In [1]:
import keras
from keras.applications.xception import Xception
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense

import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import accuracy_score
import xgboost
from sklearn.ensemble import RandomForestClassifier

import os
import shutil

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
batch_size = 100
train_len = 2000
test_len = 800

train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

## VGG16

In [None]:
train_generator = train_datagen.flow_from_directory(
    '/home/garipovazamat/cat_dog_classification/train',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False)

classes = []
vgg16_features = []
vgg16_base_model = VGG16()
vgg16_model = Model(inputs=vgg16_base_model.input, outputs=vgg16_base_model.get_layer('fc1').output)
batches= 0
for x_batch, y_batch in train_generator:
    batches += 1
    predicted = vgg16_model.predict_on_batch(x_batch)
    vgg16_features.append(predicted)
    classes.append(y_batch)
    if batches >= train_len / batch_size:
        break

In [None]:
vgg16_features = np.vstack(vgg16_features)
classes = np.hstack(classes)

## Xception

In [8]:
train_generator = train_datagen.flow_from_directory(
    '/home/garipovazamat/cat_dog_classification/train',
    target_size=(299, 299),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False)

xception_features = []
classes = []
xception_base_model = Xception()
xception_model = Model(inputs=xception_base_model.input, outputs=xception_base_model.get_layer('avg_pool').output)

batches = 0
for x_batch, y_batch in train_generator:
    batches += 1
    predicted = xception_model.predict_on_batch(x_batch)
    xception_features.append(predicted)
    
    classes.append(y_batch)
    if batches >= train_len / batch_size:
        break

Found 2000 images belonging to 2 classes.


In [9]:
xception_features = np.vstack(xception_features)
classes = np.hstack(classes)

# Поиск аномалий в обучаюшей выборке

In [None]:
from sklearn.svm import OneClassSVM

oc_svm = OneClassSVM(gamma=10, nu=0.01)
oc_svm.fit(vgg16_features)
labels = oc_svm.predict(vgg16_features)
(labels==1).mean()

# Обучение и оценка

In [None]:
print 'vgg16: '
sgd = SGDClassifier(max_iter=1000, penalty='l2', random_state=42)
scores = cross_val_score(sgd, vgg16_features, classes, scoring='accuracy', n_jobs=-1)
print scores.mean()

In [14]:
print 'xception: '
sgd = SGDClassifier(max_iter=1000, penalty='l2', random_state=42)
scores = cross_val_score(sgd, xception_features, classes, scoring='accuracy', n_jobs=-1)
print scores.mean()

xception: 
0.8660007312701924


In [None]:
import xgboost

print 'vgg16: '
xgb = xgboost.XGBClassifier(max_depth=5, n_estimators=200, random_state=42)
scores = cross_val_score(xgb, vgg16_features, classes, scoring='accuracy', n_jobs=-1)
print scores.mean()

In [17]:
print 'xception: '
xgb = xgboost.XGBClassifier(max_depth=5, n_estimators=200, random_state=42)
scores = cross_val_score(xgb, xception_features, classes, scoring='accuracy', n_jobs=-1)
print scores.mean()

xception: 


  if diff:
  if diff:
  if diff:


0.897500794207381


In [38]:
xgb.fit(xception_features, classes)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=5, min_child_weight=1, missing=None, n_estimators=200,
       n_jobs=1, nthread=None, objective='binary:logistic',
       random_state=42, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=True, subsample=1)

## проверка на тестовой выборке

In [39]:
test_generator = train_datagen.flow_from_directory(
    '/home/garipovazamat/cat_dog_classification/test',
    target_size=(299, 299),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False)

batches = 0
xception_validation_features = []
validation_classes = []
for x_batch, y_batch in test_generator:
    batches += 1
    predicted = xception_model.predict_on_batch(x_batch)
    xception_validation_features.append(predicted)
    validation_classes.append(y_batch)
    if batches >= test_len / batch_size:
        break

xception_validation_features = np.vstack(xception_validation_features)
validation_classes = np.hstack(validation_classes)

Found 800 images belonging to 2 classes.


In [40]:
preds = xgb.predict(xception_validation_features)
print accuracy_score(validation_classes, preds)

0.91375


  if diff:
