diff --git a/benchmark/README.md b/benchmark/README.md index 350eff128d4..fbd2be4b2c6 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -62,9 +62,9 @@ NOTE: | Instance Type | GPUs | Batch Size | Keras-MXNet (img/sec) | Keras-TensorFlow (img/sec) | |---|---|---|---|---| -| P3.8X Large | 1 | 32 | 202 | 52 | +| P3.8X Large | 1 | 32 | 135 | 52 | | P3.8X Large | 4 | 128 | 536 | 162 | -| P3.16X Large | 8 | 256 | 681 | 209 | +| P3.16X Large | 8 | 256 | 722 | 211 | #### ResNet50-Synthetic Data @@ -81,8 +81,8 @@ NOTE: | Instance Type | GPUs | Batch Size | Keras-MXNet (img/sec) | Keras-TensorFlow (img/sec) | |---|---|---|---|---| | C5.18X Large | 0 | 32 | 87 | 59 | -| P3.8X Large | 1 | 32 | 831 | 498 | -| P3.8X Large | 4 | 128 | 1783 | 1020 | +| P3.8X Large | 1 | 32 | 831 | 509 | +| P3.8X Large | 4 | 128 | 1783 | 699 | | P3.16X Large | 8 | 256 | 1680 | 435 | @@ -210,10 +210,10 @@ For MXNet backend benchmarks: For TensorFlow backend benchmarks: ``` - $ sh run_tf_backend.sh cpu_config lstm_nietzsche False 20 # For CPU Benchmarks - $ sh run_tf_backend.sh gpu_config lstm_nietzsche False 20 # For 1 GPU Benchmarks - $ sh run_tf_backend.sh 4_gpu_config lstm_nietzsche False 20 # For 4 GPU Benchmarks - $ sh run_tf_backend.sh 8_gpu_config lstm_nietzsche False 20 # For 8 GPU Benchmarks + $ sh run_tf_backend.sh cpu_config lstm_nietzsche False 10 # For CPU Benchmarks + $ sh run_tf_backend.sh gpu_config lstm_nietzsche False 10 # For 1 GPU Benchmarks + $ sh run_tf_backend.sh 4_gpu_config lstm_nietzsche False 10 # For 4 GPU Benchmarks + $ sh run_tf_backend.sh 8_gpu_config lstm_nietzsche False 10 # For 8 GPU Benchmarks ``` #### LSTM-WikiText2 @@ -230,10 +230,10 @@ For MXNet backend benchmarks: For TensorFlow backend benchmarks: ``` - $ sh run_tf_backend.sh cpu_config lstm_wikitext2 False 20 # For CPU Benchmarks - $ sh run_tf_backend.sh gpu_config lstm_wikitext2 False 20 # For 1 GPU Benchmarks - $ sh run_tf_backend.sh 4_gpu_config lstm_wikitext2 False 20 # For 4 GPU Benchmarks - $ sh run_tf_backend.sh 8_gpu_config lstm_wikitext2 False 20 # For 8 GPU Benchmarks + $ sh run_tf_backend.sh cpu_config lstm_wikitext2 False 10 # For CPU Benchmarks + $ sh run_tf_backend.sh gpu_config lstm_wikitext2 False 10 # For 1 GPU Benchmarks + $ sh run_tf_backend.sh 4_gpu_config lstm_wikitext2 False 10 # For 4 GPU Benchmarks + $ sh run_tf_backend.sh 8_gpu_config lstm_wikitext2 False 10 # For 8 GPU Benchmarks ``` @@ -241,14 +241,23 @@ For TensorFlow backend benchmarks: You can use the utility shell script to run the RNN benchmark on the Synthetic dataset. -| Instance | GPUs | MXNet Backend
Speed/Epoch | TensorFlow Backend
Speed/Epoch | -| :----------- | :---- | :------------------------------ | :----------------------------------- | -| C5.18xLarge | 0 | 1345s 398us/step | 875s 259us/step | -| P3.8xLarge | 1 | 868s 257us/step | 817s 242us/step | -| P3.8xLarge | 4 | 775s 229us/step | 1468s 434us/step | -## Credits +For MXNet backend benchmarks: +``` + $ sh run_mxnet_backend.sh cpu_config lstm_synthetic False 10 # For CPU Benchmarks + $ sh run_mxnet_backend.sh gpu_config lstm_synthetic False 10 # For 1 GPU Benchmarks + $ sh run_mxnet_backend.sh 4_gpu_config lstm_synthetic False 10 # For 4 GPU Benchmarks + $ sh run_mxnet_backend.sh 8_gpu_config lstm_synthetic False 10 # For 8 GPU Benchmarks +``` + +For TensorFlow backend benchmarks: +``` + $ sh run_tf_backend.sh cpu_config lstm_synthetic False 10 # For CPU Benchmarks + $ sh run_tf_backend.sh gpu_config lstm_synthetic False 10 # For 1 GPU Benchmarks + $ sh run_tf_backend.sh 4_gpu_config lstm_synthetic False 10 # For 4 GPU Benchmarks + $ sh run_tf_backend.sh 8_gpu_config lstm_synthetic False 10 # For 8 GPU Benchmarks +``` ## References * [TensorFlow Keras Benchmarks](https://github.com/tensorflow/benchmarks/tree/keras-benchmarks/scripts/keras_benchmarks) -* [lstm_text_generation.py](https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py) +* [lstm_text_generation.py](https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py) \ No newline at end of file diff --git a/benchmark/benchmark_result/RNN_result.md b/benchmark/benchmark_result/RNN_result.md index 85cfe20e1a8..c638b96bd57 100644 --- a/benchmark/benchmark_result/RNN_result.md +++ b/benchmark/benchmark_result/RNN_result.md @@ -9,16 +9,24 @@ Please see [RNN with Keras-MXNet document](../docs/mxnet_backend/using_rnn_with_mxnet_backend.md) for more details on the poor CPU training performance and unsupported functionalities. - + + ### Configuration +| | | +| :--------------- | :----------------------------------------------------------- | +| Keras | v2.1.6 | +| TensorFlow | v1.8.0 | +| MXNet | v1.2.0 | +| CUDA | v9.0.176 | +| cuDNN | v7.0.1 | ### LSTM-Nietzsche | Instance Type | GPUs | Batch Size | Keras-MXNet (Time/Epoch), (GPU Mem) | Keras-TensorFlow (Time/Epoch), (GPU Mem) | |---|---|---|---|---| | C5.18X Large | 0 | 128 | 78 sec, N/A | 55 sec, N/A| -| P3.8X Large | 1 | 128 | 52 sec, 792 MB | 51 sec, 15360 MB| -| P3.8X Large | 4 | 128 | 47 sec, 770 MB | 87 sec, 15410 MB | -| P3.16X Large | 8 | 128 | TBD | TBD | +| P3.8X Large | 1 | 128 | 52 sec, 792 MB | 83 sec, 15360 MB| +| P3.8X Large | 4 | 128 | 47 sec, 770 MB | 117 sec, 15410 MB | +| P3.16X Large | 8 | 128 | 72 sec, 826 MB | 183sec, 15408TBD | ### LSTM-WikiText2 @@ -27,7 +35,7 @@ Please see [RNN with Keras-MXNet document](../docs/mxnet_backend/using_rnn_with_ | C5.18X Large | 0 | 128 | 1345 sec, N/A | 875, N/A | | P3.8X Large | 1 | 128 | 868 sec, 772 MB | 817, 15360 MB | | P3.8X Large | 4 | 128 | 775 sec, 764 MB | 1468, 15410 MB | -| P3.16X Large | 8 | 128 | TBD | TBD | +| P3.16X Large | 8 | 128 | 1214 sec, 826 MB | 3176 sec, 15410 MB | ### Synthetic Data @@ -36,7 +44,7 @@ Please see [RNN with Keras-MXNet document](../docs/mxnet_backend/using_rnn_with_ | C5.18X Large | 0 | 128 | 24 sec, N/A | 14 sec, N/A| | P3.8X Large | 1 | 128 | 13 sec, 792 MB | 12 sec, 15360 MB| | P3.8X Large | 4 | 128 | 12 sec, 770 MB | 21 sec, 15410 MB | -| P3.16X Large | 8 | 128 | TBD | TBD | +| P3.16X Large | 8 | 128 | 19 sec, 826 MB | 49 sec, 15360 MB | # Detailed RNN Benchmark Results diff --git a/benchmark/scripts/benchmark_resnet.py b/benchmark/scripts/benchmark_resnet.py index 8b7f5b1ca01..a5be4dcb7a5 100644 --- a/benchmark/scripts/benchmark_resnet.py +++ b/benchmark/scripts/benchmark_resnet.py @@ -84,8 +84,7 @@ # prepare logging # file name: backend_data_format_dataset_model_batch_size_gpus.log -log_file = K.backend() + '_' + K.image_data_format() + '_' + args.dataset + '_resnet_v' + args.version + \ - '_' + args.layers + '_batch_size' + str(batch_size) + '_' + str(num_gpus) + 'gpus' +log_file = K.backend() + '_' + K.image_data_format() + '_' + args.dataset + '_resnet_v' + args.version + '_' + args.layers + '_batch_size' + str(batch_size) + '_' + str(num_gpus) + 'gpus' # nopep8 logFormatter = logging.Formatter('%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s') rootLogger = logging.getLogger() @@ -298,9 +297,9 @@ def lr_schedule(epoch): batch_time = 1000 * (end_time - start_time) speed = batch_size * 1000.0 / batch_time if batch_time != 0 else 0 rootLogger.info('batch {}/{} loss: {} accuracy: {} ' - 'time: {}ms speed: {}'.format(int(current_index / batch_size), - int(nice_n / batch_size), loss, accuracy, - batch_time, speed)) + 'time: {}ms speed: {}'.format(int(current_index / batch_size), + int(nice_n / batch_size), loss, accuracy, + batch_time, speed)) rootLogger.info('finish epoch {}/{} total epoch time: {}ms'.format(i, epochs, total_time)) @@ -321,4 +320,4 @@ def lr_schedule(epoch): # Score trained model. scores = model.evaluate(x_test, y_test, verbose=1) rootLogger.info('Test loss: %.4f' % scores[0]) -rootLogger.info('Test accuracy: %.4f'% scores[1]) +rootLogger.info('Test accuracy: %.4f' % scores[1]) diff --git a/benchmark/scripts/models/lstm_synthetic.py b/benchmark/scripts/models/lstm_synthetic.py index dfe9107e835..69c80ad8472 100644 --- a/benchmark/scripts/models/lstm_synthetic.py +++ b/benchmark/scripts/models/lstm_synthetic.py @@ -44,9 +44,7 @@ def __init__(self): def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20): # prepare logging # file name: backend_data_format_dataset_model_batch_size_gpus.log - log_file = keras.backend.backend() + '_' + keras.backend.image_data_format() + \ - '_lstm_synthetic_batch_size_' + \ - str(self.batch_size) + '_' + str(gpus) + 'gpus.log' + log_file = keras.backend.backend() + '_' + keras.backend.image_data_format() + '_lstm_synthetic_batch_size_' + str(self.batch_size) + '_' + str(gpus) + 'gpus.log' # nopep8 logging.basicConfig(level=logging.INFO, filename=log_file) self.epochs = epochs diff --git a/benchmark/scripts/models/lstm_text_generation.py b/benchmark/scripts/models/lstm_text_generation.py index a557b021d3a..5e32fd6bad1 100644 --- a/benchmark/scripts/models/lstm_text_generation.py +++ b/benchmark/scripts/models/lstm_text_generation.py @@ -49,9 +49,7 @@ def __init__(self, dataset_name=None): def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20): # prepare logging # file name: backend_data_format_dataset_model_batch_size_gpus.log - log_file = keras.backend.backend() + '_' + keras.backend.image_data_format() + \ - '_lstm_test_generation_' + self.dataset_name + '_batch_size_' + \ - str(self.batch_size) + '_' + str(gpus) + 'gpus.log' + log_file = keras.backend.backend() + '_' + keras.backend.image_data_format() + '_lstm_test_generation_' + self.dataset_name + '_batch_size_' + str(self.batch_size) + '_' + str(gpus) + 'gpus.log' # nopep8 logging.basicConfig(level=logging.INFO, filename=log_file) self.epochs = epochs diff --git a/benchmark/scripts/models/resnet50_benchmark.py b/benchmark/scripts/models/resnet50_benchmark.py index 56e068dc9fe..c796233af83 100644 --- a/benchmark/scripts/models/resnet50_benchmark.py +++ b/benchmark/scripts/models/resnet50_benchmark.py @@ -17,8 +17,6 @@ from keras import backend as K - - def crossentropy_from_logits(y_true, y_pred): return keras.backend.categorical_crossentropy(target=y_true, output=y_pred, @@ -39,12 +37,11 @@ def __init__(self): def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20): self.epochs = epochs if gpus > 1: - self.batch_size = self.batch_size*gpus + self.batch_size = self.batch_size * gpus # prepare logging # file name: backend_data_format_dataset_model_batch_size_gpus.log - log_file = K.backend() + '_' + K.image_data_format() + '_synthetic_resnet50_batch_size_' + \ - str(self.batch_size) + '_' + str(gpus) + 'gpus.log' + log_file = K.backend() + '_' + K.image_data_format() + '_synthetic_resnet50_batch_size_' + str(self.batch_size) + '_' + str(gpus) + 'gpus.log' # nopep8 logging.basicConfig(level=logging.INFO, filename=log_file) print("Running model ", self.test_name) diff --git a/keras/backend/mxnet_backend.py b/keras/backend/mxnet_backend.py index c77d587e69b..7af184991de 100644 --- a/keras/backend/mxnet_backend.py +++ b/keras/backend/mxnet_backend.py @@ -2593,13 +2593,13 @@ def rnn(step_function, inputs, initial_states, 'Ex: new_x_train = keras.preprocessing.sequence.pad_sequences(old_x_train, ' 'maxlen=MAX_LEN_OF_INPUT_SAMPLE_TYPE_INT). ' 'More Details - ' - 'https://github.com/awslabs/keras-apache-mxnet/tree/master/docs/mxnet_backend/using_rnn_with_mxnet_backend.md') #nopep8 + 'https://github.com/awslabs/keras-apache-mxnet/tree/master/docs/mxnet_backend/using_rnn_with_mxnet_backend.md') # nopep8 if not unroll and dshape[1] is not None: warnings.warn('MXNet Backend: `unroll=False` is not supported yet in RNN. Since the input_shape is known, ' 'setting `unroll=True` and continuing the execution.' 'More Details - ' - 'https://github.com/awslabs/keras-apache-mxnet/tree/master/docs/mxnet_backend/using_rnn_with_mxnet_backend.md', stacklevel=2) #nopep8 + 'https://github.com/awslabs/keras-apache-mxnet/tree/master/docs/mxnet_backend/using_rnn_with_mxnet_backend.md', stacklevel=2) # nopep8 # Split the inputs across time dimension and generate the list of inputs # with shape `(samples, ...)` (no time dimension)