diff --git a/benchmark/README.md b/benchmark/README.md
index 350eff128d4..fbd2be4b2c6 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -62,9 +62,9 @@ NOTE:
| Instance Type | GPUs | Batch Size | Keras-MXNet (img/sec) | Keras-TensorFlow (img/sec) |
|---|---|---|---|---|
-| P3.8X Large | 1 | 32 | 202 | 52 |
+| P3.8X Large | 1 | 32 | 135 | 52 |
| P3.8X Large | 4 | 128 | 536 | 162 |
-| P3.16X Large | 8 | 256 | 681 | 209 |
+| P3.16X Large | 8 | 256 | 722 | 211 |
#### ResNet50-Synthetic Data
@@ -81,8 +81,8 @@ NOTE:
| Instance Type | GPUs | Batch Size | Keras-MXNet (img/sec) | Keras-TensorFlow (img/sec) |
|---|---|---|---|---|
| C5.18X Large | 0 | 32 | 87 | 59 |
-| P3.8X Large | 1 | 32 | 831 | 498 |
-| P3.8X Large | 4 | 128 | 1783 | 1020 |
+| P3.8X Large | 1 | 32 | 831 | 509 |
+| P3.8X Large | 4 | 128 | 1783 | 699 |
| P3.16X Large | 8 | 256 | 1680 | 435 |
@@ -210,10 +210,10 @@ For MXNet backend benchmarks:
For TensorFlow backend benchmarks:
```
- $ sh run_tf_backend.sh cpu_config lstm_nietzsche False 20 # For CPU Benchmarks
- $ sh run_tf_backend.sh gpu_config lstm_nietzsche False 20 # For 1 GPU Benchmarks
- $ sh run_tf_backend.sh 4_gpu_config lstm_nietzsche False 20 # For 4 GPU Benchmarks
- $ sh run_tf_backend.sh 8_gpu_config lstm_nietzsche False 20 # For 8 GPU Benchmarks
+ $ sh run_tf_backend.sh cpu_config lstm_nietzsche False 10 # For CPU Benchmarks
+ $ sh run_tf_backend.sh gpu_config lstm_nietzsche False 10 # For 1 GPU Benchmarks
+ $ sh run_tf_backend.sh 4_gpu_config lstm_nietzsche False 10 # For 4 GPU Benchmarks
+ $ sh run_tf_backend.sh 8_gpu_config lstm_nietzsche False 10 # For 8 GPU Benchmarks
```
#### LSTM-WikiText2
@@ -230,10 +230,10 @@ For MXNet backend benchmarks:
For TensorFlow backend benchmarks:
```
- $ sh run_tf_backend.sh cpu_config lstm_wikitext2 False 20 # For CPU Benchmarks
- $ sh run_tf_backend.sh gpu_config lstm_wikitext2 False 20 # For 1 GPU Benchmarks
- $ sh run_tf_backend.sh 4_gpu_config lstm_wikitext2 False 20 # For 4 GPU Benchmarks
- $ sh run_tf_backend.sh 8_gpu_config lstm_wikitext2 False 20 # For 8 GPU Benchmarks
+ $ sh run_tf_backend.sh cpu_config lstm_wikitext2 False 10 # For CPU Benchmarks
+ $ sh run_tf_backend.sh gpu_config lstm_wikitext2 False 10 # For 1 GPU Benchmarks
+ $ sh run_tf_backend.sh 4_gpu_config lstm_wikitext2 False 10 # For 4 GPU Benchmarks
+ $ sh run_tf_backend.sh 8_gpu_config lstm_wikitext2 False 10 # For 8 GPU Benchmarks
```
@@ -241,14 +241,23 @@ For TensorFlow backend benchmarks:
You can use the utility shell script to run the RNN benchmark on the Synthetic dataset.
-| Instance | GPUs | MXNet Backend
Speed/Epoch | TensorFlow Backend
Speed/Epoch |
-| :----------- | :---- | :------------------------------ | :----------------------------------- |
-| C5.18xLarge | 0 | 1345s 398us/step | 875s 259us/step |
-| P3.8xLarge | 1 | 868s 257us/step | 817s 242us/step |
-| P3.8xLarge | 4 | 775s 229us/step | 1468s 434us/step |
-## Credits
+For MXNet backend benchmarks:
+```
+ $ sh run_mxnet_backend.sh cpu_config lstm_synthetic False 10 # For CPU Benchmarks
+ $ sh run_mxnet_backend.sh gpu_config lstm_synthetic False 10 # For 1 GPU Benchmarks
+ $ sh run_mxnet_backend.sh 4_gpu_config lstm_synthetic False 10 # For 4 GPU Benchmarks
+ $ sh run_mxnet_backend.sh 8_gpu_config lstm_synthetic False 10 # For 8 GPU Benchmarks
+```
+
+For TensorFlow backend benchmarks:
+```
+ $ sh run_tf_backend.sh cpu_config lstm_synthetic False 10 # For CPU Benchmarks
+ $ sh run_tf_backend.sh gpu_config lstm_synthetic False 10 # For 1 GPU Benchmarks
+ $ sh run_tf_backend.sh 4_gpu_config lstm_synthetic False 10 # For 4 GPU Benchmarks
+ $ sh run_tf_backend.sh 8_gpu_config lstm_synthetic False 10 # For 8 GPU Benchmarks
+```
## References
* [TensorFlow Keras Benchmarks](https://github.com/tensorflow/benchmarks/tree/keras-benchmarks/scripts/keras_benchmarks)
-* [lstm_text_generation.py](https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py)
+* [lstm_text_generation.py](https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py)
\ No newline at end of file
diff --git a/benchmark/benchmark_result/RNN_result.md b/benchmark/benchmark_result/RNN_result.md
index 85cfe20e1a8..c638b96bd57 100644
--- a/benchmark/benchmark_result/RNN_result.md
+++ b/benchmark/benchmark_result/RNN_result.md
@@ -9,16 +9,24 @@
Please see [RNN with Keras-MXNet document](../docs/mxnet_backend/using_rnn_with_mxnet_backend.md) for more details on
the poor CPU training performance and unsupported functionalities.
-
+
+ ### Configuration
+| | |
+| :--------------- | :----------------------------------------------------------- |
+| Keras | v2.1.6 |
+| TensorFlow | v1.8.0 |
+| MXNet | v1.2.0 |
+| CUDA | v9.0.176 |
+| cuDNN | v7.0.1 |
### LSTM-Nietzsche
| Instance Type | GPUs | Batch Size | Keras-MXNet (Time/Epoch), (GPU Mem) | Keras-TensorFlow (Time/Epoch), (GPU Mem) |
|---|---|---|---|---|
| C5.18X Large | 0 | 128 | 78 sec, N/A | 55 sec, N/A|
-| P3.8X Large | 1 | 128 | 52 sec, 792 MB | 51 sec, 15360 MB|
-| P3.8X Large | 4 | 128 | 47 sec, 770 MB | 87 sec, 15410 MB |
-| P3.16X Large | 8 | 128 | TBD | TBD |
+| P3.8X Large | 1 | 128 | 52 sec, 792 MB | 83 sec, 15360 MB|
+| P3.8X Large | 4 | 128 | 47 sec, 770 MB | 117 sec, 15410 MB |
+| P3.16X Large | 8 | 128 | 72 sec, 826 MB | 183sec, 15408TBD |
### LSTM-WikiText2
@@ -27,7 +35,7 @@ Please see [RNN with Keras-MXNet document](../docs/mxnet_backend/using_rnn_with_
| C5.18X Large | 0 | 128 | 1345 sec, N/A | 875, N/A |
| P3.8X Large | 1 | 128 | 868 sec, 772 MB | 817, 15360 MB |
| P3.8X Large | 4 | 128 | 775 sec, 764 MB | 1468, 15410 MB |
-| P3.16X Large | 8 | 128 | TBD | TBD |
+| P3.16X Large | 8 | 128 | 1214 sec, 826 MB | 3176 sec, 15410 MB |
### Synthetic Data
@@ -36,7 +44,7 @@ Please see [RNN with Keras-MXNet document](../docs/mxnet_backend/using_rnn_with_
| C5.18X Large | 0 | 128 | 24 sec, N/A | 14 sec, N/A|
| P3.8X Large | 1 | 128 | 13 sec, 792 MB | 12 sec, 15360 MB|
| P3.8X Large | 4 | 128 | 12 sec, 770 MB | 21 sec, 15410 MB |
-| P3.16X Large | 8 | 128 | TBD | TBD |
+| P3.16X Large | 8 | 128 | 19 sec, 826 MB | 49 sec, 15360 MB |
# Detailed RNN Benchmark Results
diff --git a/benchmark/scripts/benchmark_resnet.py b/benchmark/scripts/benchmark_resnet.py
index 8b7f5b1ca01..a5be4dcb7a5 100644
--- a/benchmark/scripts/benchmark_resnet.py
+++ b/benchmark/scripts/benchmark_resnet.py
@@ -84,8 +84,7 @@
# prepare logging
# file name: backend_data_format_dataset_model_batch_size_gpus.log
-log_file = K.backend() + '_' + K.image_data_format() + '_' + args.dataset + '_resnet_v' + args.version + \
- '_' + args.layers + '_batch_size' + str(batch_size) + '_' + str(num_gpus) + 'gpus'
+log_file = K.backend() + '_' + K.image_data_format() + '_' + args.dataset + '_resnet_v' + args.version + '_' + args.layers + '_batch_size' + str(batch_size) + '_' + str(num_gpus) + 'gpus' # nopep8
logFormatter = logging.Formatter('%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s')
rootLogger = logging.getLogger()
@@ -298,9 +297,9 @@ def lr_schedule(epoch):
batch_time = 1000 * (end_time - start_time)
speed = batch_size * 1000.0 / batch_time if batch_time != 0 else 0
rootLogger.info('batch {}/{} loss: {} accuracy: {} '
- 'time: {}ms speed: {}'.format(int(current_index / batch_size),
- int(nice_n / batch_size), loss, accuracy,
- batch_time, speed))
+ 'time: {}ms speed: {}'.format(int(current_index / batch_size),
+ int(nice_n / batch_size), loss, accuracy,
+ batch_time, speed))
rootLogger.info('finish epoch {}/{} total epoch time: {}ms'.format(i, epochs, total_time))
@@ -321,4 +320,4 @@ def lr_schedule(epoch):
# Score trained model.
scores = model.evaluate(x_test, y_test, verbose=1)
rootLogger.info('Test loss: %.4f' % scores[0])
-rootLogger.info('Test accuracy: %.4f'% scores[1])
+rootLogger.info('Test accuracy: %.4f' % scores[1])
diff --git a/benchmark/scripts/models/lstm_synthetic.py b/benchmark/scripts/models/lstm_synthetic.py
index dfe9107e835..69c80ad8472 100644
--- a/benchmark/scripts/models/lstm_synthetic.py
+++ b/benchmark/scripts/models/lstm_synthetic.py
@@ -44,9 +44,7 @@ def __init__(self):
def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20):
# prepare logging
# file name: backend_data_format_dataset_model_batch_size_gpus.log
- log_file = keras.backend.backend() + '_' + keras.backend.image_data_format() + \
- '_lstm_synthetic_batch_size_' + \
- str(self.batch_size) + '_' + str(gpus) + 'gpus.log'
+ log_file = keras.backend.backend() + '_' + keras.backend.image_data_format() + '_lstm_synthetic_batch_size_' + str(self.batch_size) + '_' + str(gpus) + 'gpus.log' # nopep8
logging.basicConfig(level=logging.INFO, filename=log_file)
self.epochs = epochs
diff --git a/benchmark/scripts/models/lstm_text_generation.py b/benchmark/scripts/models/lstm_text_generation.py
index a557b021d3a..5e32fd6bad1 100644
--- a/benchmark/scripts/models/lstm_text_generation.py
+++ b/benchmark/scripts/models/lstm_text_generation.py
@@ -49,9 +49,7 @@ def __init__(self, dataset_name=None):
def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20):
# prepare logging
# file name: backend_data_format_dataset_model_batch_size_gpus.log
- log_file = keras.backend.backend() + '_' + keras.backend.image_data_format() + \
- '_lstm_test_generation_' + self.dataset_name + '_batch_size_' + \
- str(self.batch_size) + '_' + str(gpus) + 'gpus.log'
+ log_file = keras.backend.backend() + '_' + keras.backend.image_data_format() + '_lstm_test_generation_' + self.dataset_name + '_batch_size_' + str(self.batch_size) + '_' + str(gpus) + 'gpus.log' # nopep8
logging.basicConfig(level=logging.INFO, filename=log_file)
self.epochs = epochs
diff --git a/benchmark/scripts/models/resnet50_benchmark.py b/benchmark/scripts/models/resnet50_benchmark.py
index 56e068dc9fe..c796233af83 100644
--- a/benchmark/scripts/models/resnet50_benchmark.py
+++ b/benchmark/scripts/models/resnet50_benchmark.py
@@ -17,8 +17,6 @@
from keras import backend as K
-
-
def crossentropy_from_logits(y_true, y_pred):
return keras.backend.categorical_crossentropy(target=y_true,
output=y_pred,
@@ -39,12 +37,11 @@ def __init__(self):
def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20):
self.epochs = epochs
if gpus > 1:
- self.batch_size = self.batch_size*gpus
+ self.batch_size = self.batch_size * gpus
# prepare logging
# file name: backend_data_format_dataset_model_batch_size_gpus.log
- log_file = K.backend() + '_' + K.image_data_format() + '_synthetic_resnet50_batch_size_' + \
- str(self.batch_size) + '_' + str(gpus) + 'gpus.log'
+ log_file = K.backend() + '_' + K.image_data_format() + '_synthetic_resnet50_batch_size_' + str(self.batch_size) + '_' + str(gpus) + 'gpus.log' # nopep8
logging.basicConfig(level=logging.INFO, filename=log_file)
print("Running model ", self.test_name)
diff --git a/keras/backend/mxnet_backend.py b/keras/backend/mxnet_backend.py
index c77d587e69b..7af184991de 100644
--- a/keras/backend/mxnet_backend.py
+++ b/keras/backend/mxnet_backend.py
@@ -2593,13 +2593,13 @@ def rnn(step_function, inputs, initial_states,
'Ex: new_x_train = keras.preprocessing.sequence.pad_sequences(old_x_train, '
'maxlen=MAX_LEN_OF_INPUT_SAMPLE_TYPE_INT). '
'More Details - '
- 'https://github.com/awslabs/keras-apache-mxnet/tree/master/docs/mxnet_backend/using_rnn_with_mxnet_backend.md') #nopep8
+ 'https://github.com/awslabs/keras-apache-mxnet/tree/master/docs/mxnet_backend/using_rnn_with_mxnet_backend.md') # nopep8
if not unroll and dshape[1] is not None:
warnings.warn('MXNet Backend: `unroll=False` is not supported yet in RNN. Since the input_shape is known, '
'setting `unroll=True` and continuing the execution.'
'More Details - '
- 'https://github.com/awslabs/keras-apache-mxnet/tree/master/docs/mxnet_backend/using_rnn_with_mxnet_backend.md', stacklevel=2) #nopep8
+ 'https://github.com/awslabs/keras-apache-mxnet/tree/master/docs/mxnet_backend/using_rnn_with_mxnet_backend.md', stacklevel=2) # nopep8
# Split the inputs across time dimension and generate the list of inputs
# with shape `(samples, ...)` (no time dimension)