In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import shutil

import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, ImageDataGenerator
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPooling2D, Dropout, Dense, Flatten

In [27]:
BATCH_SIZE = int(os.getenv('BATCHSIZE', 64))
EPOCHS = int(os.getenv('EPOCHS', 1))
NUM_CLASSES = 10
DATA_DIR='/opt/dkube/input'
if not os.path.exists('output'):
    os.makedirs('output')
if not os.path.exists('values'):
    os.makedirs('values')
MODEL_DIR='output'

In [15]:
path=DATA_DIR+'/train'
path_labels=DATA_DIR+'/trainLabels.csv'

In [16]:
train_dir = os.listdir(DATA_DIR + '/train')
train_dir_len = len(train_dir)
print("Length:\t", train_dir_len)

Length:	 50000


In [17]:
train_labels = pd.read_csv(path_labels)
train_images = pd.DataFrame(columns=['id', 'label', 'path'], dtype=str)

In [21]:
train_root = path

for i in range(0, train_dir_len):
    path1 = os.path.join(train_root, str(i+1) + ".png")
    if os.path.exists(path1):
        train_images = train_images.append([{
            'id': train_labels['id'].iloc[i],
            'label': train_labels['label'].iloc[i],
            'path': str(i+1) + '.png'
        }])
        
train_images.head()

Unnamed: 0,id,label,path
0,1,frog,1.png
0,2,truck,2.png
0,3,truck,3.png
0,4,deer,4.png
0,5,automobile,5.png


In [22]:
display_groupby = train_images.groupby(['label']).count()
display_groupby.head(10)

Unnamed: 0_level_0,id,path
label,Unnamed: 1_level_1,Unnamed: 2_level_1
airplane,5000,5000
automobile,5000,5000
bird,5000,5000
cat,5000,5000
deer,5000,5000
dog,5000,5000
frog,5000,5000
horse,5000,5000
ship,5000,5000
truck,5000,5000


In [23]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
for name in class_names:
    index = class_names.index(name)
    train_images.loc[train_images['label'] == name, 'label'] = str(index)

display_groupby = train_images.groupby(['label']).count()
display_groupby.head(10)

Unnamed: 0_level_0,id,path
label,Unnamed: 1_level_1,Unnamed: 2_level_1
0,5000,5000
1,5000,5000
2,5000,5000
3,5000,5000
4,5000,5000
5,5000,5000
6,5000,5000
7,5000,5000
8,5000,5000
9,5000,5000


In [24]:
train_images.head()

Unnamed: 0,id,label,path
0,1,6,1.png
0,2,9,2.png
0,3,9,3.png
0,4,4,4.png
0,5,1,5.png


In [25]:
data_generator = ImageDataGenerator(rescale=1/255.,
                                   validation_split=0.2,
                                   horizontal_flip=True)

train_generator = data_generator.flow_from_dataframe(dataframe=train_images, 
                                                    directory=train_root,
                                                    x_col='path',
                                                     y_col='label',
                                                     subset='training',
                                                     batch_size=BATCH_SIZE,
                                                     shuffle=True,
                                                     target_size=(32,32),
                                                     class_mode='categorical')

validation_generator = data_generator.flow_from_dataframe(dataframe=train_images,
                                                         directory=train_root,
                                                         x_col='path',
                                                         y_col='label',
                                                         subset='validation',
                                                         batch_size=BATCH_SIZE,
                                                         shuffle=True,
                                                         target_size=(32,32),
                                                         class_mode='categorical')

Found 40000 validated image filenames belonging to 10 classes.
Found 10000 validated image filenames belonging to 10 classes.


In [26]:
model = Sequential()
model.add(Conv2D(32, kernel_size=3, activation='relu', padding='same', input_shape=(32,32,3)))
model.add(BatchNormalization())
model.add(Conv2D(32, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Dropout(0.2))

model.add(Conv2D(64, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Dropout(0.3))

model.add(Conv2D(128, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(128, kernel_size=3, activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D())
model.add(Dropout(0.4))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.4))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(0.001), metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 32, 32, 32)        896       
_________________________________________________________________
batch_normalization (BatchNo (None, 32, 32, 32)        128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 32)        9248      
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 32)        128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 16, 16, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 16, 16, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 16, 64)        1

In [28]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=5, factor=0.6, min_denta=0.00001)
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, verbose=1, restore_best_weights=True)

history = model.fit(train_generator, epochs=EPOCHS, validation_data=validation_generator, callbacks=[reduce_lr, es])

Train for 625 steps, validate for 157 steps


In [29]:
version='1'
model.save(MODEL_DIR+'/'+version)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: output/1/assets
