In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import numpy as np
import glob
import cv2

## Model Training / Validation

In [2]:
# define our inputs

# data related inputs
train_data_dir = '../data/train'
validation_data_dir = '../data/val'
nb_train_samples = len(glob.glob('../data/train/chart/*'))+len(glob.glob('../data/train/meme/*'))
nb_validation_samples = len(glob.glob('../data/val/chart/*'))+len(glob.glob('../data/val/meme/*'))

# training related inputs
epochs = 5
batch_size = 32
img_size = 250

In [3]:
# definte data generators

# can define data augmentations here, we will scale images values between 0 and 1
train_datagen = ImageDataGenerator(rescale=1/255)
val_datagen = ImageDataGenerator(rescale=1/255)

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_directory(
        '../data/train',  # this is the target directory
        batch_size=batch_size,
        target_size=(img_size, img_size),
        class_mode='binary')

# this is a similar generator, for validation data
validation_generator = val_datagen.flow_from_directory(
        '../data/val',
        batch_size=batch_size,
        target_size=(img_size, img_size),
        class_mode='binary')

Found 1000 images belonging to 2 classes.
Found 100 images belonging to 2 classes.


In [4]:
# now we define our model

model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(img_size, img_size, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [5]:
# view model summary
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 248, 248, 32)      896       
_________________________________________________________________
activation (Activation)      (None, 248, 248, 32)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 124, 124, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 122, 122, 32)      9248      
_________________________________________________________________
activation_1 (Activation)    (None, 122, 122, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 61, 61, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 59, 59, 64)        18496     
__________

In [6]:
# fit model
model.fit_generator(
        train_generator,
        steps_per_epoch=nb_train_samples // batch_size,
        epochs=epochs,
        validation_data=validation_generator,
        validation_steps=nb_validation_samples // batch_size)

Instructions for updating:
Use tf.cast instead.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x134cc2438>

In [7]:
# this is a similar generator, for test data
test_datagen = ImageDataGenerator(rescale=1/255)
test_generator = test_datagen.flow_from_directory(
        '../data/test',
        batch_size=batch_size,
        target_size=(img_size, img_size),
        class_mode='binary')

Found 100 images belonging to 2 classes.


In [8]:
# evaluate the model on the holdout test set
test_loss, test_acc = model.evaluate_generator(test_generator)
print(f"Test Set Loss: {test_loss}")
print(f"Test Set Accuracy: {test_acc}")

Test Set Loss: 0.2255668118596077
Test Set Accuracy: 0.8700000047683716


In [9]:
# inference on signle image

img = cv2.imread("../data/awesome_chart.png", 1)
img = cv2.resize(img, (250, 250))
img = img / 255

# need to add additional dimention because model expects a batch
img = np.expand_dims(img, axis=0)

model.predict(img)

array([[0.85162807]], dtype=float32)

## Save Model For Later

In [11]:
# save the model for service
model.save('../api/cv_chart_model.h5')