# Model

In [15]:
import numpy as np
import os
import csv

from glob import glob
from keras.applications.resnet50 import ResNet50
from keras.layers import Flatten, Dense, Dropout
from keras.models import Model
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras import optimizers

In [16]:
DATA_DIR = os.path.join('..', 'data')
DATASET_DIR = os.path.join(DATA_DIR, 'datasets', '1')

WIDTH = 300
HEIGHT = 300
CLASSES = 3

In [17]:
TRAIN_DIR = os.path.join(DATASET_DIR, 'train')
VAL_DIR = os.path.join(DATASET_DIR, 'test')

train_files = glob(os.path.join(TRAIN_DIR, '*.jpg'))
val_files = glob(os.path.join(VAL_DIR, '*.jpg'))

n_train_samples = len(train_files)
n_val_samples = len(val_files)

n_train_samples, n_val_samples

(39905, 9977)

In [18]:
model = ResNet50(weights='imagenet',
                 include_top=False,
                 input_shape=(WIDTH, HEIGHT, 3))

In [19]:
TRAINABLE_LAYERS = 30
FC_SIZE = 1024
DROPOUT = 0.5

In [20]:
x = model.output
x = Flatten()(x)
x = Dense(FC_SIZE, activation='relu')(x)
x = Dropout(DROPOUT)(x)
predictions = Dense(CLASSES, activation='sigmoid')(x)

In [21]:
LR = 0.0001
MOMENTUM = 0.9

In [22]:
model_final = Model(inputs=model.input, outputs=predictions)

model_final.compile(loss='categorical_crossentropy',
                    optimizer=optimizers.SGD(lr=LR, momentum=MOMENTUM),
                    metrics=['accuracy'])

## Data augmentation

In [23]:
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator

In [24]:
BATCH_SIZE = 80

In [25]:
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    horizontal_flip=True,
    fill_mode="nearest",
    zoom_range=0.3,
    width_shift_range=0.3,
    height_shift_range=0.3,
    rotation_range=30)

test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input)

In [26]:
train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    classes=[''],
    target_size=(HEIGHT, WIDTH),
    batch_size=BATCH_SIZE,
    class_mode="categorical")

validation_generator = test_datagen.flow_from_directory(
    VAL_DIR,
    classes=[''],    
    target_size=(HEIGHT, WIDTH),
    class_mode="categorical")

Found 39905 images belonging to 1 classes.
Found 9977 images belonging to 1 classes.


In [27]:
def parse_label_row(row):
    labels = list(row.values())[1:]
    labels = [int(label) for label in labels]
    return labels

def read_labels_dict(dataset_dir):
    with open(os.path.join(dataset_dir, 'labels.csv')) as csvfile:
        reader = csv.DictReader(csvfile)
        return {row['img']: parse_label_row(row) for row in reader}

In [28]:
labels_dict = read_labels_dict(DATASET_DIR)

In [29]:
def build_data_generator(gen, labels_dict):
    for x in gen:
        idx = (gen.batch_index - 1) * gen.batch_size
        filenames = gen.filenames[idx : idx + gen.batch_size]
        labels = [labels_dict[fname] for fname in filenames]
        yield x, np.array(labels)

In [30]:
train_datagen = build_data_generator(train_generator, labels_dict)
val_datagen = build_data_generator(validation_generator, labels_dict)

## Training

In [32]:
EPOCHS = 30

In [33]:
checkpoint = ModelCheckpoint("checkpoint.h5",
    monitor = 'val_acc',
    verbose = 1,
    save_best_only = True,
    save_weights_only = False,
    mode = 'auto',
    period = 1)

early = EarlyStopping(
    monitor = 'val_acc',
    min_delta = 0,
    patience = 10,
    verbose = 1,
    mode = 'auto')

reduce_lr = ReduceLROnPlateau(
    monitor = 'val_loss',
    factor = 0.2,
    patience = 5,
    min_lr = 0.001)

history = model_final.fit_generator(
    train_datagen,
    steps_per_epoch = n_train_samples // BATCH_SIZE,
    epochs = EPOCHS, 
    validation_data = val_datagen,
    validation_steps = n_val_samples // BATCH_SIZE,
    callbacks = [checkpoint, early, reduce_lr])

model_final.save('model.h5')

Epoch 1/30


AttributeError: 'tuple' object has no attribute 'shape'