diff --git a/benchmark/README.md b/benchmark/README.md
index 350eff128d4..fbd2be4b2c6 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -62,9 +62,9 @@ NOTE:
 
 | Instance Type | GPUs  | Batch Size  | Keras-MXNet (img/sec)  | Keras-TensorFlow (img/sec)  |
 |---|---|---|---|---|
-|  P3.8X Large | 1  | 32  | 202  | 52  |
+|  P3.8X Large | 1  | 32  | 135  | 52  |
 |  P3.8X Large |  4 |  128 | 536  | 162  |
-|  P3.16X Large | 8  | 256  | 681  | 209  |
+|  P3.16X Large | 8  | 256  | 722  | 211  |
 
 #### ResNet50-Synthetic Data
 
@@ -81,8 +81,8 @@ NOTE:
 | Instance Type | GPUs  | Batch Size  | Keras-MXNet (img/sec)  | Keras-TensorFlow (img/sec)  |
 |---|---|---|---|---|
 |  C5.18X Large | 0  | 32  | 87  | 59  |
-|  P3.8X Large | 1  | 32  | 831  | 498  |
-|  P3.8X Large |  4 |  128 | 1783  | 1020  |
+|  P3.8X Large | 1  | 32  | 831  | 509  |
+|  P3.8X Large |  4 |  128 | 1783  | 699  |
 |  P3.16X Large | 8  | 256  | 1680  | 435  |
 
 
@@ -210,10 +210,10 @@ For MXNet backend benchmarks:
 
 For TensorFlow backend benchmarks:
 ```
-    $ sh run_tf_backend.sh cpu_config lstm_nietzsche False 20 # For CPU Benchmarks
-    $ sh run_tf_backend.sh gpu_config lstm_nietzsche False 20 # For 1 GPU Benchmarks
-    $ sh run_tf_backend.sh 4_gpu_config lstm_nietzsche False 20 # For 4 GPU Benchmarks
-    $ sh run_tf_backend.sh 8_gpu_config lstm_nietzsche False 20 # For 8 GPU Benchmarks
+    $ sh run_tf_backend.sh cpu_config lstm_nietzsche False 10 # For CPU Benchmarks
+    $ sh run_tf_backend.sh gpu_config lstm_nietzsche False 10 # For 1 GPU Benchmarks
+    $ sh run_tf_backend.sh 4_gpu_config lstm_nietzsche False 10 # For 4 GPU Benchmarks
+    $ sh run_tf_backend.sh 8_gpu_config lstm_nietzsche False 10 # For 8 GPU Benchmarks
 ```
 
 #### LSTM-WikiText2
@@ -230,10 +230,10 @@ For MXNet backend benchmarks:
 
 For TensorFlow backend benchmarks:
 ```
-    $ sh run_tf_backend.sh cpu_config lstm_wikitext2 False 20 # For CPU Benchmarks
-    $ sh run_tf_backend.sh gpu_config lstm_wikitext2 False 20 # For 1 GPU Benchmarks
-    $ sh run_tf_backend.sh 4_gpu_config lstm_wikitext2 False 20 # For 4 GPU Benchmarks
-    $ sh run_tf_backend.sh 8_gpu_config lstm_wikitext2 False 20 # For 8 GPU Benchmarks
+    $ sh run_tf_backend.sh cpu_config lstm_wikitext2 False 10 # For CPU Benchmarks
+    $ sh run_tf_backend.sh gpu_config lstm_wikitext2 False 10 # For 1 GPU Benchmarks
+    $ sh run_tf_backend.sh 4_gpu_config lstm_wikitext2 False 10 # For 4 GPU Benchmarks
+    $ sh run_tf_backend.sh 8_gpu_config lstm_wikitext2 False 10 # For 8 GPU Benchmarks
 ```
 
 
@@ -241,14 +241,23 @@ For TensorFlow backend benchmarks:
 
 You can use the utility shell script to run the RNN benchmark on the Synthetic dataset.
 
-| Instance    | GPUs | MXNet Backend<br />Speed/Epoch | TensorFlow Backend<br />Speed/Epoch |
-| :----------- | :---- | :------------------------------ | :----------------------------------- |
-| C5.18xLarge | 0    | 1345s 398us/step               | 875s 259us/step                     |
-| P3.8xLarge  | 1    | 868s 257us/step                | 817s 242us/step                     |
-| P3.8xLarge  | 4    | 775s 229us/step                | 1468s 434us/step                    |
-## Credits
+For MXNet backend benchmarks:
+```
+    $ sh run_mxnet_backend.sh cpu_config lstm_synthetic False 10 # For CPU Benchmarks
+    $ sh run_mxnet_backend.sh gpu_config lstm_synthetic False 10 # For 1 GPU Benchmarks
+    $ sh run_mxnet_backend.sh 4_gpu_config lstm_synthetic False 10 # For 4 GPU Benchmarks
+    $ sh run_mxnet_backend.sh 8_gpu_config lstm_synthetic False 10 # For 8 GPU Benchmarks
+```
+
+For TensorFlow backend benchmarks:
+```
+    $ sh run_tf_backend.sh cpu_config lstm_synthetic False 10 # For CPU Benchmarks
+    $ sh run_tf_backend.sh gpu_config lstm_synthetic False 10 # For 1 GPU Benchmarks
+    $ sh run_tf_backend.sh 4_gpu_config lstm_synthetic False 10 # For 4 GPU Benchmarks
+    $ sh run_tf_backend.sh 8_gpu_config lstm_synthetic False 10 # For 8 GPU Benchmarks
+```
 
 ## References
 
 * [TensorFlow Keras Benchmarks](https://github.com/tensorflow/benchmarks/tree/keras-benchmarks/scripts/keras_benchmarks)
-* [lstm_text_generation.py](https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py)
+* [lstm_text_generation.py](https://github.com/keras-team/keras/blob/master/examples/lstm_text_generation.py)
\ No newline at end of file
diff --git a/benchmark/benchmark_result/RNN_result.md b/benchmark/benchmark_result/RNN_result.md
index 85cfe20e1a8..c638b96bd57 100644
--- a/benchmark/benchmark_result/RNN_result.md
+++ b/benchmark/benchmark_result/RNN_result.md
@@ -9,16 +9,24 @@
 
 Please see [RNN with Keras-MXNet document](../docs/mxnet_backend/using_rnn_with_mxnet_backend.md) for more details on
  the poor CPU training performance and unsupported functionalities. 
- 
+
+ ### Configuration
+|                  |                                                              |
+| :--------------- | :----------------------------------------------------------- |
+| Keras            | v2.1.6                                                       |
+| TensorFlow       | v1.8.0                                                       |
+| MXNet            | v1.2.0                                                       |
+| CUDA             | v9.0.176                                                     |
+| cuDNN            | v7.0.1                                                       |
 
 ### LSTM-Nietzsche
 
 | Instance Type | GPUs  | Batch Size  | Keras-MXNet (Time/Epoch), (GPU Mem)   | Keras-TensorFlow (Time/Epoch), (GPU Mem)   |
 |---|---|---|---|---|
 |  C5.18X Large | 0  | 128  | 78 sec, N/A | 55 sec, N/A|
-|  P3.8X Large |  1 |  128 | 52 sec, 792 MB | 51 sec, 15360 MB|
-|  P3.8X Large | 4  | 128  | 47 sec, 770 MB | 87 sec, 15410 MB |
-|  P3.16X Large | 8  | 128  | TBD | TBD |
+|  P3.8X Large |  1 |  128 | 52 sec, 792 MB | 83 sec, 15360 MB|
+|  P3.8X Large | 4  | 128  | 47 sec, 770 MB | 117 sec, 15410 MB |
+|  P3.16X Large | 8  | 128  | 72 sec, 826 MB | 183sec, 15408TBD |
 
 ### LSTM-WikiText2
 
@@ -27,7 +35,7 @@ Please see [RNN with Keras-MXNet document](../docs/mxnet_backend/using_rnn_with_
 |  C5.18X Large | 0  | 128  | 1345 sec, N/A  | 875, N/A  |
 |  P3.8X Large |  1 |  128 | 868 sec, 772 MB | 817, 15360 MB  |
 |  P3.8X Large | 4  | 128  | 775 sec, 764 MB | 1468, 15410 MB  |
-|  P3.16X Large | 8  | 128  | TBD | TBD |
+|  P3.16X Large | 8  | 128  | 1214 sec, 826 MB | 3176 sec, 15410 MB |
 
 ### Synthetic Data
 
@@ -36,7 +44,7 @@ Please see [RNN with Keras-MXNet document](../docs/mxnet_backend/using_rnn_with_
 |  C5.18X Large | 0  | 128  | 24 sec, N/A | 14 sec, N/A|
 |  P3.8X Large |  1 |  128 | 13 sec, 792 MB | 12 sec, 15360 MB|
 |  P3.8X Large | 4  | 128  | 12 sec, 770 MB | 21 sec, 15410 MB |
-|  P3.16X Large | 8  | 128  | TBD | TBD |
+|  P3.16X Large | 8  | 128  | 19 sec, 826 MB | 49 sec, 15360 MB |
 
 
 # Detailed RNN Benchmark Results
diff --git a/benchmark/scripts/benchmark_resnet.py b/benchmark/scripts/benchmark_resnet.py
index 8b7f5b1ca01..a5be4dcb7a5 100644
--- a/benchmark/scripts/benchmark_resnet.py
+++ b/benchmark/scripts/benchmark_resnet.py
@@ -84,8 +84,7 @@
 
 # prepare logging
 # file name: backend_data_format_dataset_model_batch_size_gpus.log
-log_file = K.backend() + '_' + K.image_data_format() + '_' + args.dataset + '_resnet_v' + args.version + \
-           '_' + args.layers + '_batch_size' + str(batch_size) + '_' + str(num_gpus) + 'gpus'
+log_file = K.backend() + '_' + K.image_data_format() + '_' + args.dataset + '_resnet_v' + args.version + '_' + args.layers + '_batch_size' + str(batch_size) + '_' + str(num_gpus) + 'gpus'  # nopep8
 logFormatter = logging.Formatter('%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s')
 rootLogger = logging.getLogger()
 
@@ -298,9 +297,9 @@ def lr_schedule(epoch):
                 batch_time = 1000 * (end_time - start_time)
                 speed = batch_size * 1000.0 / batch_time if batch_time != 0 else 0
                 rootLogger.info('batch {}/{} loss: {} accuracy: {} '
-                             'time: {}ms speed: {}'.format(int(current_index / batch_size),
-                                                           int(nice_n / batch_size), loss, accuracy,
-                                                           batch_time, speed))
+                                'time: {}ms speed: {}'.format(int(current_index / batch_size),
+                                                              int(nice_n / batch_size), loss, accuracy,
+                                                              batch_time, speed))
 
             rootLogger.info('finish epoch {}/{}  total epoch time: {}ms'.format(i, epochs, total_time))
 
@@ -321,4 +320,4 @@ def lr_schedule(epoch):
 # Score trained model.
 scores = model.evaluate(x_test, y_test, verbose=1)
 rootLogger.info('Test loss: %.4f' % scores[0])
-rootLogger.info('Test accuracy: %.4f'% scores[1])
+rootLogger.info('Test accuracy: %.4f' % scores[1])
diff --git a/benchmark/scripts/models/lstm_synthetic.py b/benchmark/scripts/models/lstm_synthetic.py
index dfe9107e835..69c80ad8472 100644
--- a/benchmark/scripts/models/lstm_synthetic.py
+++ b/benchmark/scripts/models/lstm_synthetic.py
@@ -44,9 +44,7 @@ def __init__(self):
     def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20):
         # prepare logging
         # file name: backend_data_format_dataset_model_batch_size_gpus.log
-        log_file = keras.backend.backend() + '_' + keras.backend.image_data_format() + \
-                   '_lstm_synthetic_batch_size_' + \
-                   str(self.batch_size) + '_' + str(gpus) + 'gpus.log'
+        log_file = keras.backend.backend() + '_' + keras.backend.image_data_format() + '_lstm_synthetic_batch_size_' + str(self.batch_size) + '_' + str(gpus) + 'gpus.log'  # nopep8
         logging.basicConfig(level=logging.INFO, filename=log_file)
 
         self.epochs = epochs
diff --git a/benchmark/scripts/models/lstm_text_generation.py b/benchmark/scripts/models/lstm_text_generation.py
index a557b021d3a..5e32fd6bad1 100644
--- a/benchmark/scripts/models/lstm_text_generation.py
+++ b/benchmark/scripts/models/lstm_text_generation.py
@@ -49,9 +49,7 @@ def __init__(self, dataset_name=None):
     def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20):
         # prepare logging
         # file name: backend_data_format_dataset_model_batch_size_gpus.log
-        log_file = keras.backend.backend() + '_' + keras.backend.image_data_format() + \
-                   '_lstm_test_generation_' + self.dataset_name + '_batch_size_' + \
-                   str(self.batch_size) + '_' + str(gpus) + 'gpus.log'
+        log_file = keras.backend.backend() + '_' + keras.backend.image_data_format() + '_lstm_test_generation_' + self.dataset_name + '_batch_size_' + str(self.batch_size) + '_' + str(gpus) + 'gpus.log'  # nopep8
         logging.basicConfig(level=logging.INFO, filename=log_file)
 
         self.epochs = epochs
diff --git a/benchmark/scripts/models/resnet50_benchmark.py b/benchmark/scripts/models/resnet50_benchmark.py
index 56e068dc9fe..c796233af83 100644
--- a/benchmark/scripts/models/resnet50_benchmark.py
+++ b/benchmark/scripts/models/resnet50_benchmark.py
@@ -17,8 +17,6 @@
 from keras import backend as K
 
 
-
-
 def crossentropy_from_logits(y_true, y_pred):
     return keras.backend.categorical_crossentropy(target=y_true,
                                                   output=y_pred,
@@ -39,12 +37,11 @@ def __init__(self):
     def run_benchmark(self, gpus=0, inference=False, use_dataset_tensors=False, epochs=20):
         self.epochs = epochs
         if gpus > 1:
-            self.batch_size = self.batch_size*gpus
+            self.batch_size = self.batch_size * gpus
 
         # prepare logging
         # file name: backend_data_format_dataset_model_batch_size_gpus.log
-        log_file = K.backend() + '_' + K.image_data_format() + '_synthetic_resnet50_batch_size_' + \
-                   str(self.batch_size) + '_' + str(gpus) + 'gpus.log'
+        log_file = K.backend() + '_' + K.image_data_format() + '_synthetic_resnet50_batch_size_' + str(self.batch_size) + '_' + str(gpus) + 'gpus.log'  # nopep8
         logging.basicConfig(level=logging.INFO, filename=log_file)
 
         print("Running model ", self.test_name)
diff --git a/keras/backend/mxnet_backend.py b/keras/backend/mxnet_backend.py
index c77d587e69b..7af184991de 100644
--- a/keras/backend/mxnet_backend.py
+++ b/keras/backend/mxnet_backend.py
@@ -2593,13 +2593,13 @@ def rnn(step_function, inputs, initial_states,
             'Ex: new_x_train = keras.preprocessing.sequence.pad_sequences(old_x_train, '
             'maxlen=MAX_LEN_OF_INPUT_SAMPLE_TYPE_INT). '
             'More Details - '
-            'https://github.com/awslabs/keras-apache-mxnet/tree/master/docs/mxnet_backend/using_rnn_with_mxnet_backend.md')  #nopep8
+            'https://github.com/awslabs/keras-apache-mxnet/tree/master/docs/mxnet_backend/using_rnn_with_mxnet_backend.md')  # nopep8
 
     if not unroll and dshape[1] is not None:
         warnings.warn('MXNet Backend: `unroll=False` is not supported yet in RNN. Since the input_shape is known, '
                       'setting `unroll=True` and continuing the execution.'
                       'More Details - '
-                      'https://github.com/awslabs/keras-apache-mxnet/tree/master/docs/mxnet_backend/using_rnn_with_mxnet_backend.md',  stacklevel=2)  #nopep8
+                      'https://github.com/awslabs/keras-apache-mxnet/tree/master/docs/mxnet_backend/using_rnn_with_mxnet_backend.md', stacklevel=2)  # nopep8
 
     # Split the inputs across time dimension and generate the list of inputs
     # with shape `(samples, ...)` (no time dimension)