In [1]:
import os
import shutil
import numpy as np

In [15]:
def split_dataset_into_test_and_train_sets(all_data_dir, training_data_dir, testing_data_dir, testing_data_pct):
    # Recreate testing and training directories
    if testing_data_dir.count('/') > 1:
        shutil.rmtree(testing_data_dir, ignore_errors=True)
        os.makedirs(testing_data_dir)
        print("Successfully cleaned directory " + testing_data_dir)
    else:
        print("Refusing to delete testing data directory " + testing_data_dir + " as we prevent you from doing stupid things!")

    if training_data_dir.count('/') > 1:
        shutil.rmtree(training_data_dir, ignore_errors=True)
        os.makedirs(training_data_dir)
        print("Successfully cleaned directory " + training_data_dir)
    else:
        print("Refusing to delete testing data directory " + training_data_dir + " as we prevent you from doing stupid things!")

    num_training_files = 0
    num_testing_files = 0

    for subdir, dirs, files in os.walk(all_data_dir):
        category_name = os.path.basename(subdir)

        # Don't create a subdirectory for the root directory
        print(category_name + " vs " + os.path.basename(all_data_dir))
        if category_name == os.path.basename(all_data_dir):
            continue

        training_data_category_dir = training_data_dir + '/' + category_name
        testing_data_category_dir = testing_data_dir + '/' + category_name

        if not os.path.exists(training_data_category_dir):
            os.mkdir(training_data_category_dir)

        if not os.path.exists(testing_data_category_dir):
            os.mkdir(testing_data_category_dir)

        for file in files:
            input_file = os.path.join(subdir, file)
            if np.random.rand(1) < testing_data_pct:
                shutil.copy(input_file, testing_data_dir + '/' + category_name + '/' + file)
                num_testing_files += 1
            else:
                shutil.copy(input_file, training_data_dir + '/' + category_name + '/' + file)
                num_training_files += 1

    print("Processed " + str(num_training_files) + " training files.")
    print("Processed " + str(num_testing_files) + " testing files.")

In [16]:
split_dataset_into_test_and_train_sets("/dataset/dataset-resized", 
                                       "/dataset/dataset-resized-train", 
                                       "/dataset/dataset-resized-test",
                                      0.2)

Successfully cleaned directory /dataset/dataset-resized-test
Successfully cleaned directory /dataset/dataset-resized-train
dataset-resized vs dataset-resized
cardboard vs dataset-resized
glass vs dataset-resized
metal vs dataset-resized
paper vs dataset-resized
plastic vs dataset-resized
trash vs dataset-resized
Processed 2013 training files.
Processed 514 testing files.


In [2]:
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras.models import Model

Using TensorFlow backend.


In [3]:
# path to the model weights files.
#weights_path = '../keras/examples/vgg16_weights.h5'
top_model_weights_path = 'fc_model.h5'
# dimensions of our images.
img_width, img_height = 150, 150

In [4]:
train_data_dir = '/dataset/dataset-resized-train'
validation_data_dir = '/dataset/dataset-resized-test'
#nb_train_samples = 2000
#nb_validation_samples = 800
epochs = 50
batch_size = 16

In [19]:
#
#
# record the bottleneck features using image data generators

batch_size = 16

datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode=None,  # this means our generator will only yield batches of data, no labels
        shuffle=False)  # our data will be in order, so all first 1000 images will be cats, then 1000 dogs
# the predict_generator method returns the output of a model, given
# a generator that yields batches of numpy data
bottleneck_features_train = model.predict_generator(generator, 2000)
# save the output as a Numpy array
np.save(open('bottleneck_features_train.npy', 'w'), bottleneck_features_train)

generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
bottleneck_features_validation = model.predict_generator(generator, 800)
np.save(open('bottleneck_features_validation.npy', 'w'), bottleneck_features_validation)

Found 2013 images belonging to 6 classes.


TypeError: write() argument must be str, not bytes

In [5]:
#build the VGG16 network
base_model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(150,150,3))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.



In [7]:
# build a classifier model to put on top of the convolutional model
top_model = Sequential()
top_model.add(Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(Dense(256, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(1, activation='sigmoid'))

In [None]:
# note that it is necessary to start with a fully-trained
# classifier, including the top classifier,
# in order to successfully do fine-tuning

#top_model.load_weights(top_model_weights_path)

In [8]:
# add the model on top of the convolutional base
#model.add(top_model)
model = Model(inputs=base_model.input, outputs=top_model(base_model.output))

In [9]:
# set the first 25 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in model.layers[:15]:
    layer.trainable = False

In [10]:
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
model.compile(loss='binary_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [11]:
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

In [12]:
test_datagen = ImageDataGenerator(rescale=1. / 255)

In [13]:
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary')

Found 2013 images belonging to 6 classes.


In [14]:
validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary')

Found 514 images belonging to 6 classes.


In [15]:
nb_train_samples = 2013
nb_validation_samples = 514

In [17]:
top_model.compile()

TypeError: compile() missing 1 required positional argument: 'optimizer'

In [16]:
# fine-tune the model
H = model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size,
    verbose=2)


Epoch 1/50
 - 31s - loss: nan - accuracy: 0.2023 - val_loss: nan - val_accuracy: 0.1855
Epoch 2/50
 - 17s - loss: nan - accuracy: 0.2018 - val_loss: nan - val_accuracy: 0.1847
Epoch 3/50
 - 16s - loss: nan - accuracy: 0.1998 - val_loss: nan - val_accuracy: 0.1867
Epoch 4/50
 - 16s - loss: nan - accuracy: 0.2028 - val_loss: nan - val_accuracy: 0.1807
Epoch 5/50
 - 16s - loss: nan - accuracy: 0.2003 - val_loss: nan - val_accuracy: 0.1908
Epoch 6/50
 - 16s - loss: nan - accuracy: 0.2018 - val_loss: nan - val_accuracy: 0.1727
Epoch 7/50
 - 17s - loss: nan - accuracy: 0.1993 - val_loss: nan - val_accuracy: 0.1948
Epoch 8/50
 - 17s - loss: nan - accuracy: 0.1993 - val_loss: nan - val_accuracy: 0.1847
Epoch 9/50
 - 17s - loss: nan - accuracy: 0.2053 - val_loss: nan - val_accuracy: 0.1827
Epoch 10/50
 - 17s - loss: nan - accuracy: 0.2023 - val_loss: nan - val_accuracy: 0.1847
Epoch 11/50
 - 16s - loss: nan - accuracy: 0.2015 - val_loss: nan - val_accuracy: 0.1827
Epoch 12/50
 - 16s - loss: na

KeyboardInterrupt: 