In [1]:
import os
from keras.preprocessing import image

In [None]:
base_dir = '/tf/data'

# Directories for training, validation and test splits
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')


#Directory with training coffee pictures
train_coffee_dir = os.path.join(train_dir, 'coffee')
# Directory with training noncoffee pictures
train_noncoffee_dir = os.path.join(train_dir, 'noncoffee')


# Directory with validation coffee pictures
validation_coffee_dir = os.path.join(validation_dir, 'coffee')
# Directory with validation noncoffee pictures
validation_noncoffee_dir = os.path.join(validation_dir, 'noncoffee')

# Directory with test coffee pictures
test_coffee_dir = os.path.join(test_dir, 'coffee')
# Directory with test noncoffee pictures
test_noncoffee_dir = os.path.join(test_dir, 'noncoffee')

In [None]:
print('total train coffee images:', len(os.listdir(train_coffee_dir)))
print('total train noncoffee images:', len(os.listdir(train_noncoffee_dir)))
print('total validation coffee images:', len(os.listdir(validation_coffee_dir)))
print('total validation noncoffee images:', len(os.listdir(validation_noncoffee_dir)))
print('total test coffee images:', len(os.listdir(test_coffee_dir)))
print('total test noncoffee images:', len(os.listdir(test_noncoffee_dir)))

img = image.load_img(os.path.join(train_coffee_dir, os.listdir(train_coffee_dir)[0]))
print(img.size)
img.show()

It's a *balanced* binary-classification problem => classification ```accuracy``` is an appropriate measure of success.

# Preprocessing
- Read the picture files
- Decode the JPEG content to RGB grid of pixels
- Convert these into floating-point tensors
- Rescale the pixel values (between 0 and 255) to the [0,1] interval (to deal with small input values)

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
# Images to tensors
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
                train_dir,
                target_size=img.size, # Resizes all images to 64x64
                batch_size=20,
                class_mode='binary') # Binary-crossentropy loss => needs
                                     # binary labels

validation_generator = test_datagen.flow_from_directory(
                validation_dir,
                target_size=img.size, # Resizes all images to 64x64
                batch_size=20,
                class_mode='binary')


# First - baseline
Naively train a new small convnet on the training samples, *without regularization*, to set a baseline for what can be achieved.

In [None]:
from keras import layers
from keras import models
from tensorflow.keras import optimizers

In [None]:
model = models.Sequential()

model.add(layers.Conv2D(32, (3, 3), activation='relu', 
            input_shape=(64, 64, 3)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Flatten())

model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer=optimizers.RMSprop(learning_rate=1e-4),
            loss='binary_crossentropy',
            metrics=['accuracy'])

model.summary()

In [None]:
history = model.fit(
                train_generator,
                # train_generator found 2400 images => images/batch_size => 2400/20 = 120
                steps_per_epoch=120,
                epochs=50,
                validation_data=validation_generator,
                # validation_generator found 600 images => images/batch_size => 600/20 = 30
                validation_steps=30)

Saving the model:

In [None]:
model.save('/tf/data/saved-models/brazilian_coffee_1.h5')

# Plot loss and accuracy

In [None]:
import matplotlib.pyplot as plt

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()