In [1]:
# -*- coding: utf-8 -*-
from keras.preprocessing import image
from keras.utils import to_categorical
from keras.applications.vgg16 import preprocess_input

import os
import numpy as np
import pandas as pd
from tqdm import tqdm
%matplotlib inline

CATEGORIES = ['alaska', 'bichons', 'french_bulldog', 'chihuahua', 'golden', 'husky', 'labrador', 'papillon', 'samoyed', 'shepherd',
              'teddy', 'basset_hound_dog', 'bull_terrier_dog', 'chinese_sharpei', 'chow',  'cocker_spaniel', 'corgi_dog', 'dachshund_dog',
              'dalmatian_dog', 'doberman', 'eskimo_dog', 'great_greyhound_dog', 'italian_greyhound', 'japanese_spitz_dog', 'lhasa', 'maltese',
              'miniature_pinscher', 'miniature_schnauzer', 'newfoundland', 'pekingese_dog', 'pomeranian', 'poodle', 'rough_collie_dog',
              'saint_bernard', 'shetland_sheepdog', 'shiba_inu_dog', 'shih_tzu_dog', 'tibetan_mastiff', 'wolf_dog']

SAMPLE_PER_CATEGORY = 350
INPUT_SIZE = 224

Using TensorFlow backend.


In [2]:
# read the image
def read_img(data_dir, filepath, size):
    img = image.load_img(os.path.join(data_dir, filepath), target_size=size)
    img = image.img_to_array(img)
    return img

In [None]:
from collections import Counter
# read the training data
def get_train_val_data(data_dir, train_path):
    SEED = 1991
    train_dir = os.path.join(data_dir, train_path)

    train = []
    for category_id, category in enumerate(CATEGORIES):
        for file in os.listdir(os.path.join(train_dir, category)):
            train.append([train_path+'/{}/{}'.format(category, file), category_id, category])
    train = pd.DataFrame(train, columns=['file', 'category_id', 'category'])
    # sampling
    train = pd.concat([train[train['category'] == c][:SAMPLE_PER_CATEGORY] for c in CATEGORIES])
    train = train.sample(frac=1)
    train.index = np.arange(len(train))

    print(train)

    # read in the image array
    x_train = np.zeros((len(train), INPUT_SIZE, INPUT_SIZE, 3), dtype='float32')
    for i, file in tqdm(enumerate(train['file'])):
        img = read_img(data_dir, file, (INPUT_SIZE, INPUT_SIZE))
        x = preprocess_input(np.expand_dims(img.copy(), axis=0))
        x_train[i] = x

    # split train and validation
    np.random.seed(seed=SEED)
    rnd = np.random.random(len(train))
    train_idx = rnd < 0.7
    valid_idx = rnd >= 0.7

    Xtr = x_train[train_idx]
    Xv = x_train[valid_idx]
    ytr = train.loc[train_idx, 'category_id'].values
    yv = train.loc[valid_idx, 'category_id'].values

    train_dist = Counter(ytr)
    val_dist = Counter(yv)

    for k in train_dist:
        print('Train # {} has {} samples'.format(k, train_dist[k]))
        print('Val # {} has {} samples'.format(k, val_dist[k]))
        print('------------split line---------------------------')

    # to categorical
    ytr = to_categorical(ytr, num_classes=len(CATEGORIES))
    yv = to_categorical(yv, num_classes=len(CATEGORIES))

    num_classes = len(CATEGORIES)

    return Xtr, ytr, Xv, yv, num_classes

In [4]:
data_dir = './input'
train_path = 'train_aug'

Xtr, ytr, Xv, yv, num_classes = get_train_val_data(data_dir, train_path)
print("Categories:{}".format(num_classes))
print('Xtr.shape:{}, Xv.shape:{}'.format(Xtr.shape, Xv.shape))

0it [00:00, ?it/s]

48it [00:00, 476.74it/s]

                                                    file  category_id  \
0      train_aug/shiba_inu_dog/shiba_inu_dog_0_7457.jpeg           35   
1      train_aug/chinese_sharpei/chinese_sharpei_0_19...           13   
2      train_aug/shetland_sheepdog/shetland_sheepdog_...           34   
3                      train_aug/teddy/teddy_0_3700.jpeg           10   
4      train_aug/great_greyhound_dog/great_greyhound_...           21   
5              train_aug/chihuahua/chihuahua_0_2790.jpeg            3   
6      train_aug/rough_collie_dog/rough_collie_dog_0_...           32   
7      train_aug/shetland_sheepdog/shetland_sheepdog_...           34   
8      train_aug/chinese_sharpei/chinese_sharpei_0_92...           13   
9            train_aug/pomeranian/pomeranian_0_6688.jpeg           30   
10                     train_aug/husky/husky_0_2702.jpeg            5   
11     train_aug/tibetan_mastiff/tibetan_mastiff_0_51...           37   
12                 train_aug/samoyed/samoyed_0_2973

99it [00:00, 490.14it/s]

137it [00:00, 452.80it/s]

174it [00:00, 430.82it/s]

227it [00:00, 443.31it/s]

274it [00:00, 447.06it/s]

314it [00:00, 437.21it/s]

362it [00:00, 440.18it/s]

410it [00:00, 444.45it/s]

454it [00:01, 443.89it/s]

498it [00:01, 442.47it/s]

543it [00:01, 442.07it/s]

596it [00:01, 448.35it/s]

647it [00:01, 452.46it/s]

695it [00:01, 450.64it/s]

752it [00:01, 457.52it/s]

804it [00:01, 460.64it/s]

854it [00:01, 459.25it/s]

902it [00:01, 454.05it/s]

947it [00:02, 452.79it/s]

992it [00:02, 451.64it/s]

1040it [00:02, 452.54it/s]

1088it [00:02, 453.57it/s]

1146it [00:02, 458.53it/s]

1196it [00:02, 459.21it/s]

1245it [00:02, 460.15it/s]

1294it [00:02, 459.49it/s]

1343it [00:02, 451.82it/s]

1386it [00:03, 448.54it/s]

1427it [00:03, 446.21it/s]

1480it [00:03, 448.71it/s]

1524it [00:03, 448.08it/s]

1568it [00:03, 446.59it/s]

1611it [00:03, 444.67it/s]

1662it [00:03, 446.27it/s]

1713it [00:03, 447.59it/s]

1759it [00:03, 445.93it/s]

1803it [00:04, 445.67it/s]

1855it [00:04, 447.14it/s]

1901it [00:04, 439.77it/s]

1950it [00:04, 440.78it/s]

2003it [00:04, 442.62it/s]

2054it [00:04, 443.92it/s]

2108it [00:04, 445.87it/s]

2157it [00:04, 445.61it/s]

2205it [00:04, 444.56it/s]

2261it [00:05, 446.75it/s]

2313it [00:05, 448.16it/s]

2363it [00:05, 446.47it/s]

2415it [00:05, 447.73it/s]

2463it [00:05, 446.31it/s]

2512it [00:05, 446.85it/s]

2558it [00:05, 445.60it/s]

2602it [00:05, 439.40it/s]

2652it [00:06, 440.39it/s]

2694it [00:06, 440.04it/s]

2743it [00:06, 440.68it/s]

2787it [00:06, 440.43it/s]

2831it [00:06, 440.21it/s]

2875it [00:06, 439.27it/s]

2921it [00:06, 439.45it/s]

2964it [00:06, 439.06it/s]

3007it [00:06, 438.93it/s]

3052it [00:06, 439.03it/s]

3098it [00:07, 439.32it/s]

3142it [00:07, 436.01it/s]

3190it [00:07, 436.47it/s]

3235it [00:07, 436.60it/s]

3288it [00:07, 437.60it/s]

3345it [00:07, 439.26it/s]

3404it [00:07, 441.11it/s]

3456it [00:07, 441.00it/s]

3505it [00:07, 440.49it/s]

3552it [00:08, 440.23it/s]

3598it [00:08, 439.63it/s]

3642it [00:08, 439.09it/s]

3699it [00:08, 440.63it/s]

3748it [00:08, 441.19it/s]

3805it [00:08, 442.67it/s]

3856it [00:08, 440.64it/s]

3903it [00:08, 440.71it/s]

3956it [00:08, 441.11it/s]

4006it [00:09, 441.50it/s]

4053it [00:09, 441.70it/s]

4115it [00:09, 443.51it/s]

4166it [00:09, 443.47it/s]

4217it [00:09, 444.13it/s]

4267it [00:09, 443.91it/s]

4315it [00:09, 443.10it/s]

4364it [00:09, 443.55it/s]

4410it [00:10, 440.12it/s]

4456it [00:10, 440.24it/s]

4500it [00:10, 440.22it/s]

4553it [00:10, 441.03it/s]

4603it [00:10, 441.51it/s]

4650it [00:10, 441.56it/s]

4696it [00:10, 441.35it/s]

4741it [00:10, 439.54it/s]

4787it [00:10, 439.70it/s]

4848it [00:10, 441.21it/s]

4896it [00:11, 441.11it/s]

4943it [00:11, 440.87it/s]

5001it [00:11, 442.08it/s]

5061it [00:11, 443.44it/s]

5114it [00:11, 442.47it/s]

5162it [00:11, 442.69it/s]

5210it [00:11, 441.82it/s]

5266it [00:11, 442.73it/s]

5324it [00:12, 443.13it/s]

5372it [00:12, 443.00it/s]

5419it [00:12, 443.16it/s]

5467it [00:12, 443.43it/s]

5515it [00:12, 443.71it/s]

5562it [00:12, 443.87it/s]

5619it [00:12, 444.72it/s]

5670it [00:12, 445.19it/s]

5722it [00:12, 445.73it/s]

5773it [00:12, 446.10it/s]

5823it [00:13, 444.94it/s]

5873it [00:13, 445.34it/s]

5920it [00:13, 444.96it/s]

5974it [00:13, 445.67it/s]

6024it [00:13, 445.67it/s]

6071it [00:13, 444.82it/s]

6121it [00:13, 445.22it/s]

6175it [00:13, 445.85it/s]

6223it [00:13, 446.07it/s]

6271it [00:14, 446.20it/s]

6325it [00:14, 446.84it/s]

6375it [00:14, 446.14it/s]

6421it [00:14, 445.89it/s]

6468it [00:14, 446.04it/s]

6521it [00:14, 446.58it/s]

6573it [00:14, 447.04it/s]

6622it [00:14, 446.92it/s]

6669it [00:14, 445.53it/s]

6725it [00:15, 446.28it/s]

6772it [00:15, 445.66it/s]

6816it [00:15, 445.40it/s]

6870it [00:15, 445.98it/s]

6921it [00:15, 446.33it/s]

6975it [00:15, 446.91it/s]

7025it [00:15, 447.15it/s]

7074it [00:15, 446.74it/s]

7121it [00:15, 446.51it/s]

7171it [00:16, 446.81it/s]

7222it [00:16, 447.19it/s]

7270it [00:16, 447.15it/s]

7326it [00:16, 447.83it/s]

7376it [00:16, 446.68it/s]

7425it [00:16, 446.93it/s]

7471it [00:16, 446.97it/s]

7526it [00:16, 447.57it/s]

7575it [00:16, 447.64it/s]

7624it [00:17, 447.84it/s]

7672it [00:17, 446.12it/s]

7717it [00:17, 446.11it/s]

7769it [00:17, 446.48it/s]

7815it [00:17, 446.49it/s]

7862it [00:17, 446.60it/s]

7912it [00:17, 446.87it/s]

7959it [00:17, 445.60it/s]

8008it [00:17, 445.85it/s]

8053it [00:18, 445.80it/s]

8097it [00:18, 445.38it/s]

8140it [00:18, 445.07it/s]

8191it [00:18, 445.37it/s]

8239it [00:18, 445.53it/s]

8284it [00:18, 445.52it/s]

8332it [00:18, 444.51it/s]

8389it [00:18, 445.07it/s]

8442it [00:18, 445.51it/s]

8490it [00:19, 445.08it/s]

8535it [00:19, 444.61it/s]

8584it [00:19, 444.64it/s]

8636it [00:19, 445.02it/s]

8689it [00:19, 444.78it/s]

8738it [00:19, 444.94it/s]

8784it [00:19, 444.35it/s]

8827it [00:19, 444.27it/s]

8870it [00:20, 442.84it/s]

8921it [00:20, 443.14it/s]

8973it [00:20, 443.50it/s]

9018it [00:20, 443.04it/s]

9066it [00:20, 443.22it/s]

9116it [00:20, 443.47it/s]

9162it [00:20, 443.02it/s]

9210it [00:20, 442.79it/s]

9268it [00:20, 443.39it/s]

9316it [00:21, 443.54it/s]

9373it [00:21, 444.12it/s]

9423it [00:21, 443.97it/s]

9471it [00:21, 443.96it/s]

9519it [00:21, 444.09it/s]

9569it [00:21, 444.31it/s]

9623it [00:21, 444.74it/s]

9681it [00:21, 445.33it/s]

9733it [00:21, 444.62it/s]

9780it [00:22, 444.50it/s]

9830it [00:22, 444.72it/s]

9884it [00:22, 445.13it/s]

9933it [00:22, 445.06it/s]

9980it [00:22, 445.14it/s]

10027it [00:22, 444.84it/s]

10076it [00:22, 445.03it/s]

10122it [00:22, 445.04it/s]

10168it [00:22, 445.10it/s]

10214it [00:22, 444.12it/s]

10262it [00:23, 444.24it/s]

10305it [00:23, 444.01it/s]

10347it [00:23, 443.62it/s]

10398it [00:23, 443.89it/s]

10442it [00:23, 443.74it/s]

10485it [00:23, 443.28it/s]

10533it [00:23, 443.41it/s]

10583it [00:23, 443.64it/s]

10628it [00:23, 443.66it/s]

10673it [00:24, 443.37it/s]

10719it [00:24, 443.13it/s]

10774it [00:24, 443.56it/s]

10820it [00:24, 442.93it/s]

10863it [00:24, 442.04it/s]

10903it [00:24, 441.87it/s]

10961it [00:24, 442.41it/s]

11012it [00:24, 442.62it/s]

11058it [00:24, 442.43it/s]

11118it [00:25, 443.01it/s]

11167it [00:25, 443.15it/s]

11216it [00:25, 442.30it/s]

11259it [00:25, 442.09it/s]

11301it [00:25, 441.71it/s]

11347it [00:25, 441.78it/s]

11389it [00:25, 441.59it/s]

11442it [00:25, 441.91it/s]

11487it [00:26, 441.51it/s]

11541it [00:26, 441.87it/s]

11588it [00:26, 441.95it/s]

11644it [00:26, 442.38it/s]

11693it [00:26, 442.03it/s]

11739it [00:26, 441.96it/s]

11789it [00:26, 442.12it/s]

11844it [00:26, 442.52it/s]

11894it [00:26, 442.69it/s]

11943it [00:26, 442.43it/s]

11989it [00:27, 442.42it/s]

12035it [00:27, 442.41it/s]

12080it [00:27, 442.34it/s]

12127it [00:27, 442.19it/s]

12178it [00:27, 442.43it/s]

12236it [00:27, 442.91it/s]

12285it [00:27, 443.07it/s]

12334it [00:27, 442.89it/s]

12381it [00:27, 442.92it/s]

12428it [00:28, 442.99it/s]

12475it [00:28, 442.54it/s]

12524it [00:28, 442.67it/s]

12578it [00:28, 443.00it/s]

12626it [00:28, 442.95it/s]

12672it [00:28, 442.48it/s]

12715it [00:28, 442.41it/s]

12759it [00:28, 442.40it/s]

12805it [00:28, 442.44it/s]

12849it [00:29, 442.39it/s]

12894it [00:29, 442.42it/s]

12938it [00:29, 442.14it/s]

12985it [00:29, 442.22it/s]

13029it [00:29, 441.82it/s]

13082it [00:29, 442.10it/s]

13135it [00:29, 442.38it/s]

13191it [00:29, 442.74it/s]

13245it [00:29, 443.06it/s]

13296it [00:30, 442.50it/s]

13346it [00:30, 442.68it/s]

13401it [00:30, 443.02it/s]

13451it [00:30, 442.99it/s]

13504it [00:30, 443.27it/s]

13553it [00:30, 443.34it/s]

13602it [00:30, 443.29it/s]

13649it [00:30, 443.34it/s]

13650it [00:30, 443.34it/s]




Train # 0 has 250 samples
Val # 0 has 100 samples
------------split line---------------------------
Train # 1 has 255 samples
Val # 1 has 95 samples
------------split line---------------------------
Train # 2 has 235 samples
Val # 2 has 115 samples
------------split line---------------------------
Train # 3 has 240 samples
Val # 3 has 110 samples
------------split line---------------------------
Train # 4 has 236 samples
Val # 4 has 114 samples
------------split line---------------------------
Train # 5 has 246 samples
Val # 5 has 104 samples
------------split line---------------------------
Train # 6 has 249 samples
Val # 6 has 101 samples
------------split line---------------------------
Train # 7 has 247 samples
Val # 7 has 103 samples
------------split line---------------------------
Train # 8 has 249 samples
Val # 8 has 101 samples
------------split line---------------------------
Train # 9 has 246 samples
Val # 9 has 104 samples
------------split line---------------------------
T