# Imports

In [19]:
import keras
from keras.datasets import cifar10
from keras import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from mxnet import context
import mxnet.profiler as prf

# Cifar10 Dataset

In [2]:
num_classes = 10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

Y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
Y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)

# The Model

```python
model = Sequential()

model.add(Convolution2D(32, 3, 3, border_mode='same',
                        input_shape=X_train.shape[1:]))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 3, 3, border_mode='same'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))
```

In [3]:
model = Sequential()

model.add(Convolution2D(32, 3, 3, border_mode='same',
                        input_shape=X_train.shape[1:]))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 3, 3, border_mode='same'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

  train_symbol = func(*args, **kwargs)
  test_symbol = func(*args, **kwargs)


# Multi-GPU

- `keras.utils.multi_gpu_model(model, gpus)` tells keras to use mxnet in the backend using multiple gpus as indicated by `gpus` parameter
- By default, on a GPU machine, MXNet backend uses the first GPU device.
- `gpus=1` is an invalid argument

In [16]:
num_gpu = min(4, context.num_gpus())
model = keras.utils.multi_gpu_model(model=model, gpus=num_gpu)
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy', 'mse'])

In [20]:
from keras.preprocessing.image import ImageDataGenerator

batch_size = 32*4 # 32 per GPU. We use 4 GPUs in the example. Set batch_size to 32*4.
epochs = 3 # Increase this to 200 for higher accuracy.

# This will do preprocessing and realtime data augmentation:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

# Compute quantities required for feature-wise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)
prf.set_config(profile_all=True, filename='images/profile_output.json')
prf.set_state('run')
# Fit the model on the batches generated by datagen.flow().
history = model.fit_generator(datagen.flow(X_train, Y_train,
                                     batch_size=batch_size),
                        samples_per_epoch=X_train.shape[0],
                        nb_epoch=epochs,
                        validation_data=(X_test, Y_test))
prf.set_state('stop')


Epoch 1/3
  3/390 [..............................] - ETA: 21s - loss: 2.3026 - acc: 0.0964 - mean_squared_error: 0.0900



Epoch 2/3
Epoch 3/3


# Training

```python
history = model.fit_generator(datagen.flow(X_train, Y_train,
                                     batch_size=batch_size),
                        samples_per_epoch=X_train.shape[0],
                        nb_epoch=epochs,
                        validation_data=(X_test, Y_test))
```

![](images/keras-4gpus.png)