In [1]:
# -*- coding: utf-8 -*-
from keras.preprocessing import image
from keras.utils import to_categorical
from keras.applications.vgg16 import preprocess_input

import os
import numpy as np
import pandas as pd
from tqdm import tqdm
%matplotlib inline

CATEGORIES = ['alaska', 'bichons', 'french_bulldog', 'chihuahua', 'golden', 'husky', 'labrador', 'papillon', 'samoyed', 'shepherd',
              'teddy', 'basset_hound_dog', 'bull_terrier_dog', 'chinese_sharpei', 'chow',  'cocker_spaniel', 'corgi_dog', 'dachshund_dog',
              'dalmatian_dog', 'doberman', 'eskimo_dog', 'great_greyhound_dog', 'italian_greyhound', 'japanese_spitz_dog', 'lhasa', 'maltese',
              'miniature_pinscher', 'miniature_schnauzer', 'newfoundland', 'pekingese_dog', 'pomeranian', 'poodle', 'rough_collie_dog',
              'saint_bernard', 'shetland_sheepdog', 'shiba_inu_dog', 'shih_tzu_dog', 'tibetan_mastiff', 'wolf_dog']

SAMPLE_PER_CATEGORY = 350
INPUT_SIZE = 224
SEED = 1991

Using TensorFlow backend.


In [2]:
# read the image
def read_img(data_dir, filepath, size):
    img = image.load_img(os.path.join(data_dir, filepath), target_size=size)
    img = image.img_to_array(img)
    return img

In [3]:
# read the training data
def get_train_data(data_dir, train_path):
    train_dir = os.path.join(data_dir, train_path)

    train = []
    for category_id, category in enumerate(CATEGORIES):
        for file in os.listdir(os.path.join(train_dir, category)):
            train.append([train_path+'/{}/{}'.format(category, file), category_id, category])
    train = pd.DataFrame(train, columns=['file', 'category_id', 'category'])
    # sampling
    train = pd.concat([train[train['category'] == c][:SAMPLE_PER_CATEGORY] for c in CATEGORIES])
    train = train.sample(frac=1)
    train.index = np.arange(len(train))

#     print(train[['category_id', 'category']])
    
    X = train['file']
    Y = train['category_id']
    
    # read in the train image array
    x_train = np.zeros((len(X), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
    for i, file in tqdm(enumerate(X)):
        img = read_img(data_dir, file, (INPUT_SIZE, INPUT_SIZE))
        x = preprocess_input(np.expand_dims(img.copy(), axis=0))
        x_train[i] = x

    num_classes = len(CATEGORIES)

    return x_train, Y, num_classes

In [4]:
data_dir = './input'
train_path = 'train1'

X, Y, num_classes = get_train_data(data_dir, train_path)

  'to RGBA images')
3923it [00:09, 398.86it/s]


In [8]:
from sklearn.model_selection import StratifiedKFold
# define 10-fold cross validation test harness
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=SEED)

from pretrained_models import vgg16_model
model = vgg16_model(num_classes)

# print(Y)
cvscores = []
for train, test in kfold.split(X, Y):
#     print(X[train])
    print(type(Y[train]))
    print(Y[train].index)
#     Y[train] = to_categorical(Y[train], num_classes=len(CATEGORIES))
#     model.fit(X[train], Y[train], epochs=10, batch_size=32, verbose=1, shuffle=True)
#     # evaluate the model
#     Y[test] = to_categorical(Y[test], num_classes=len(CATEGORIES))
#     scores = model.evaluate(X[test], Y[test], verbose=0)
#     print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
#     cvscores.append(scores[1] * 100)
# print("%.2f%% (+/- %.2f%%)" % (numpy.mean(cvscores), numpy.std(cvscores)))

VGG16 # layers 20
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0  

In [6]:
# to categorical
y = to_categorical(Y, num_classes=len(CATEGORIES))