## Simple CNN

Localize the quilt data for the fruit dataset 

In [1]:
from comet_ml import Experiment, ExistingExperiment
import t4

In [None]:
t4.Package.install(
    "quilt/open_fruit", 
    registry="s3://quilt-example", 
    dest="./data")

In [2]:
experiment = Experiment(project_name="comet-quilt-example")

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/ceceshao1/comet-quilt-example/d0625f21ccb34e83b9a338092016cf83



In [3]:
import numpy as np  
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img  
from keras.models import Sequential  
from keras.layers import Dropout, Flatten, Dense  
from keras import applications  
from keras.utils.np_utils import to_categorical  
import matplotlib.pyplot as plt  
import math  

Using TensorFlow backend.


In [4]:
# dimensions of our images
img_width, img_height = 128, 128

# set parameters
batch_size = 16
num_classes = 16
epochs = 50
activation = 'relu'
lr = 0.01
min_delta=0
patience=4
dropout=0.2

train_samples = 27593
validation_samples = 6889

In [5]:
params={'batch_size':batch_size,
        'epochs':epochs,
        'min_delta':min_delta,
        'patience':patience,
        'learning_rate':lr,
        'dropout':dropout
}

experiment.log_parameters(params)

### Data Preprocessing + Augmentation

Since our quilt data package does not have pre-defined training and validation subdirectories, we can use the `validation_split` argument for the ImageDataGenerator(). Here we split the data into 80/20.

In [6]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rescale=1/255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2 #set validation split
)

test_datagen = ImageDataGenerator(
    rescale=1/255
)

train_generator = train_datagen.flow_from_directory(
    './data/quilt/open_fruit/images_cropped',
    target_size=(128, 128),
    shuffle=True,
    seed=20,
    batch_size = batch_size,
    class_mode='categorical',
    subset="training"
)

validation_generator = train_datagen.flow_from_directory(
    './data/quilt/open_fruit/images_cropped',
    target_size=(128, 128),
    seed=20,
    batch_size=batch_size,
    class_mode='categorical',
    subset = "validation"
)

Found 27593 images belonging to 16 classes.
Found 6889 images belonging to 16 classes.


In [8]:
# check the classes and their index
train_generator.class_indices

{'Apple': 0,
 'Banana': 1,
 'Cantaloupe': 2,
 'Common_fig': 3,
 'Grape': 4,
 'Grapefruit': 5,
 'Lemon': 6,
 'Mango': 7,
 'Orange': 8,
 'Peach': 9,
 'Pear': 10,
 'Pineapple': 11,
 'Pomegranate': 12,
 'Strawberry': 13,
 'Tomato': 14,
 'Watermelon': 15}

### Define Model Architecture - Simple CNN

In [7]:
import numpy as np 
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.losses import categorical_crossentropy
from keras.callbacks import EarlyStopping
from keras.optimizers import RMSprop
from keras.callbacks import Callback
from sklearn.metrics import roc_auc_score


model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), input_shape=(128, 128, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(units=64, activation='relu'))
model.add(Dropout(dropout))
model.add(Dense(units=16, activation='softmax')) #Output Layer - Activation Function Softmax(to clasify multiple classes)

model.compile(loss=categorical_crossentropy,
              optimizer=RMSprop(lr=lr),
              metrics=['accuracy'])

In [8]:
import pathlib

sample_size = len(list(pathlib.Path('./data/quilt/open_fruit/images_cropped').rglob('./*')))

In [9]:
sample_size

34498

In [10]:
model.fit_generator(
    train_generator,
    steps_per_epoch=sample_size // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=validation_samples // batch_size,
    callbacks=[EarlyStopping(monitor='val_loss', min_delta=min_delta, patience=patience)]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50


<keras.callbacks.History at 0x7fd9ee269748>

In [15]:
experiment.end()

COMET INFO: ----------------------------
COMET INFO: Comet.ml Experiment Summary:
COMET INFO:   Metrics:
COMET INFO:                           acc: 0.21885238771782306
COMET INFO:                         batch: 2150
COMET INFO:                     epoch_end: 4
COMET INFO:                          loss: 12.590611759420165
COMET INFO:                          size: 16
COMET INFO:                          step: 10780
COMET INFO:         sys.gpu.0.free_memory: 150929408
COMET INFO:     sys.gpu.0.gpu_utilization: 0
COMET INFO:        sys.gpu.0.total_memory: 11996954624
COMET INFO:         sys.gpu.0.used_memory: 11846025216
COMET INFO:                       val_acc: 0.22122093023255815
COMET INFO:                      val_loss: 12.552435361507326
COMET INFO:   Other:
COMET INFO:     trainable_params: 832560
COMET INFO: ----------------------------
COMET INFO: Uploading stats to Comet before program termination (may take several seconds)


In [17]:
model.save_weights('simple_cnn.h5')

In [18]:
experiment.log_asset(file_path='/simple_cnn.h5', file_name='simple_cnn.h5')