Merge pull request #234 from gpengzhi/flake8

Fix flake8 lint error in our repo
asyml · Oct 22, 2019 · 22d7b8e · 22d7b8e
2 parents 53be563 + 17bc8be
commit 22d7b8e
Show file tree

Hide file tree

Showing 175 changed files with 833 additions and 534 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,17 @@
+[flake8]
+select =
+    E101,E111,E112,E113,E114,E115,E116,E117,
+    E201,E202,E203,E211,E221,E222,E223,E224,E225,E226,E227,E228
+    E231,E251,E261,E262,E265,E266,E271,E272,E273,E274,E275,
+    E301,E302,E303,E304,E305,E306,
+    E401,
+    E502,
+    E701,E702,E703E711,E712,E713,E714,E721,E722,
+    E901,
+    W191,
+    W291,W292,W293,
+    W391,
+    W605
+count = true
+show-source = true
+statistics = true
diff --git a/.travis.yml b/.travis.yml
@@ -14,10 +14,8 @@ install:
   - pip install pytest
 
 script:
-  # stop the build if there are Python syntax errors or undefined names
-  - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
-  # exit-zero treats all errors as warnings. Texar limits lines to a maximum of 80 chars. 
-  - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics
+  # Linting
+  - flake8 texar/ examples/
   # Unit tests
   - pytest
 

diff --git a/examples/bert/config_data.py b/examples/bert/config_data.py
@@ -5,10 +5,12 @@
 
 train_batch_size = 32
 max_train_epoch = 3
-display_steps = 50 # Print training loss every display_steps; -1 to disable
+display_steps = 50  # Print training loss every display_steps; -1 to disable
 eval_steps = -1    # Eval on the dev set every eval_steps; -1 to disable
-warmup_proportion = 0.1 # Proportion of training to perform linear learning
-                        # rate warmup for. E.g., 0.1 = 10% of training.
+# Proportion of training to perform linear learning
+# rate warmup for. E.g., 0.1 = 10% of training.
+warmup_proportion = 0.1
+
 eval_batch_size = 8
 test_batch_size = 8
 

diff --git a/examples/bert/utils/data_utils.py b/examples/bert/utils/data_utils.py
@@ -131,7 +131,7 @@ def _create_examples(lines, set_type):
                 text_a = tx.utils.compat_as_text(line[1])
                 # Single sentence classification, text_b doesn't exist
                 text_b = None
-                label = '0' # arbitrary set as 0
+                label = '0'  # arbitrary set as 0
                 examples.append(InputExample(guid=guid, text_a=text_a,
                                              text_b=text_b, label=label))
         return examples

diff --git a/examples/bert/utils/model_utils.py b/examples/bert/utils/model_utils.py
@@ -32,7 +32,7 @@ def get_lr(global_step, num_train_steps, num_warmup_steps, static_lr):
         warmup_learning_rate = static_lr * warmup_percent_done
 
         is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32)
-        learning_rate = ((1.0 - is_warmup) * learning_rate\
-            +is_warmup * warmup_learning_rate)
+        learning_rate = ((1.0 - is_warmup) * learning_rate
+                         + is_warmup * warmup_learning_rate)
 
     return learning_rate
diff --git a/examples/distributed_gpu/lm_ptb_distributed.py b/examples/distributed_gpu/lm_ptb_distributed.py
@@ -69,11 +69,12 @@
 
 config = importlib.import_module(FLAGS.config)
 
+
 def _main(_):
     # Data
     tf.logging.set_verbosity(tf.logging.INFO)
 
-    ## 1. initialize the horovod
+    # 1. initialize the horovod
     hvd.init()
 
     batch_size = config.batch_size
@@ -158,7 +159,6 @@ def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):
                 if is_train
                 else tf.estimator.ModeKeys.EVAL)
 
-
         for step, (x, y) in enumerate(data_iter):
             if step == 0:
                 state = sess.run(initial_state,
@@ -179,10 +179,11 @@ def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):
 
             ppl = np.exp(loss / iters)
             if verbose and is_train and hvd.rank() == 0 \
-                and (step+1) % (epoch_size // 10) == 0:
+                and (step + 1) % (epoch_size // 10) == 0:
                 tf.logging.info("%.3f perplexity: %.3f speed: %.0f wps" %
-                      ((step+1) * 1.0 / epoch_size, ppl,
-                       iters * batch_size / (time.time() - start_time)))
+                                ((step + 1) * 1.0 / epoch_size, ppl,
+                                 iters * batch_size / (
+                                         time.time() - start_time)))
         _elapsed_time = time.time() - start_time
         tf.logging.info("epoch time elapsed: %f" % (_elapsed_time))
         ppl = np.exp(loss / iters)
@@ -231,5 +232,6 @@ def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):
             test_ppl, _ = _run_epoch(sess, test_data_iter, 0)
             tf.logging.info("Test Perplexity: %.3f" % (test_ppl))
 
+
 if __name__ == '__main__':
     tf.app.run(main=_main)
diff --git a/examples/distributed_gpu/ptb_reader.py b/examples/distributed_gpu/ptb_reader.py
@@ -26,14 +26,15 @@
 import horovod.tensorflow as hvd
 import texar.tf as tx
 
+
 def ptb_iterator(data, batch_size, num_steps, is_train=False):
     """Iterates through the ptb data.
     """
 
     data_length = len(data)
 
     batch_length = data_length // batch_size
-    data = np.asarray(data[:batch_size*batch_length])
+    data = np.asarray(data[:batch_size * batch_length])
     data = data.reshape([batch_size, batch_length])
 
     epoch_size = (batch_length - 1) // num_steps
@@ -43,7 +44,7 @@ def ptb_iterator(data, batch_size, num_steps, is_train=False):
     def _sharded_data(data):
         _batch_size = len(data)
         _shard_size = _batch_size // hvd.size()
-        data = [data[i*_shard_size: (i+1) * _shard_size]
+        data = [data[i * _shard_size: (i + 1) * _shard_size]
                 for i in range(_shard_size)]
         data = data[hvd.rank()]
         return data
@@ -54,10 +55,11 @@ def _sharded_data(data):
         data = _sharded_data(data)
 
     for i in range(epoch_size):
-        x = data[:, i * num_steps : (i+1) * num_steps]
-        y = data[:, i * num_steps + 1 : (i+1) * num_steps + 1]
+        x = data[:, i * num_steps: (i + 1) * num_steps]
+        y = data[:, i * num_steps + 1: (i + 1) * num_steps + 1]
         yield (x, y)
 
+
 def prepare_data(data_path):
     """Preprocess PTB data.
     """

diff --git a/examples/gpt-2/configs/config_model_117M.py b/examples/gpt-2/configs/config_model_117M.py
@@ -36,7 +36,7 @@
                 "type": "Dense",
                 "kwargs": {
                     "name": "conv1",
-                    "units": dim*4,
+                    "units": dim * 4,
                     "activation": "gelu",
                     "use_bias": True,
                 }

diff --git a/examples/gpt-2/configs/config_model_345M.py b/examples/gpt-2/configs/config_model_345M.py
@@ -36,7 +36,7 @@
                 "type": "Dense",
                 "kwargs": {
                     "name": "conv1",
-                    "units": dim*4,
+                    "units": dim * 4,
                     "activation": "gelu",
                     "use_bias": True,
                 }

diff --git a/examples/gpt-2/configs/config_train.py b/examples/gpt-2/configs/config_train.py
@@ -8,15 +8,15 @@
 
 train_batch_size = 32
 max_train_epoch = 100
-display_steps = 10 # Print training loss every display_steps; -1 to disable
+display_steps = 10  # Print training loss every display_steps; -1 to disable
 eval_steps = -1    # Eval on the dev set every eval_steps; -1 to disable
-checkpoint_steps = -1 # Checkpoint model parameters every checkpoint_steps;
-                      # -1 to disable
+# Checkpoint model parameters every checkpoint_steps; -1 to disable
+checkpoint_steps = -1
 
 eval_batch_size = 8
 test_batch_size = 8
 
-## Optimization configs
+# Optimization configs
 
 opt = {
     'optimizer': {
@@ -27,7 +27,7 @@
     }
 }
 
-## Data configs
+# Data configs
 
 feature_original_types = {
     # Reading features from TFRecord data file.

diff --git a/examples/gpt-2/gpt2_generate_main.py b/examples/gpt-2/gpt2_generate_main.py
@@ -70,6 +70,7 @@
                     "For '--config_type=texar', set the texar config file "
                     "like: '--config_model configs.config_model_117M'.")
 
+
 def main(_):
     """
     Builds the model and runs
@@ -171,7 +172,7 @@ def _embedding_fn(x, y):
                     context: [context_tokens for _ in range(batch_size)],
                     context_length:
                         [len(context_tokens) for _ in range(batch_size)],
-                    tx.context.global_mode():tf.estimator.ModeKeys.PREDICT
+                    tx.context.global_mode(): tf.estimator.ModeKeys.PREDICT
                 }
                 generated = 0
                 for _ in range(nsamples // batch_size):
@@ -221,5 +222,6 @@ def _embedding_fn(x, y):
                           " SAMPLE " + str(generated) + " " + "=" * 40)
                     print(text)
 
+
 if __name__ == "__main__":
     tf.app.run()
diff --git a/examples/gpt-2/gpt2_train_main.py b/examples/gpt-2/gpt2_train_main.py
@@ -85,7 +85,7 @@ def main(_):
 
     tf.logging.set_verbosity(tf.logging.INFO)
 
-    ## Loads GPT-2 model configuration
+    # Loads GPT-2 model configuration
 
     if FLAGS.config_type == "json":
         gpt2_config = model_utils.transform_gpt2_to_texar_config(
@@ -104,7 +104,7 @@ def main(_):
         "max_decoding_length should not be greater than position_size. "
         "{}>{}".format(max_decoding_length, gpt2_config.position_size))
 
-    ## Loads data
+    # Loads data
 
     # Configures training data shard in distributed mode
     if FLAGS.distributed:
@@ -126,7 +126,7 @@ def main(_):
     batch = iterator.get_next()
     batch_size = tf.shape(batch['text_ids'])[0]
 
-    ## Builds the GPT-2 model
+    # Builds the GPT-2 model
 
     word_embedder = tx.modules.WordEmbedder(
         vocab_size=gpt2_config.vocab_size,
@@ -154,7 +154,7 @@ def main(_):
     loss = tx.losses.sequence_sparse_softmax_cross_entropy(
         labels=batch['text_ids'][:, 1:],
         logits=outputs.logits[:, :-1, :],
-        sequence_length=batch['length']-1,
+        sequence_length=batch['length'] - 1,
         average_across_timesteps=True,
         sum_over_timesteps=False)
     ppl = tf.exp(loss)
@@ -194,8 +194,7 @@ def _embedding_fn(x, y):
         helper=helper)
     sample_id = outputs_infer.sample_id
 
-
-    ## Train/eval/test routine
+    # Train/eval/test routine
     saver = tf.train.Saver()
     saver_best = tf.train.Saver(max_to_keep=1)
     dev_best = {'loss': 1e8, 'ppl': 1e8}
@@ -282,7 +281,6 @@ def _dev_epoch(sess):
             ckpt_fn = saver_best.save(sess, ckpt_fn)
             tf.logging.info('Checkpoint best to {}'.format(ckpt_fn))
 
-
     def _test_epoch(sess):
         """Generates samples on the test set.
         """
@@ -346,7 +344,6 @@ def _test_epoch(sess):
         tx.utils.write_paired_text(
             _all_input_text, _all_samples_text, output_file)
 
-
     # Broadcasts global variables from rank-0 process
     if FLAGS.distributed:
         bcast = hvd.broadcast_global_variables(0)
@@ -388,4 +385,3 @@ def _test_epoch(sess):
 
 if __name__ == "__main__":
     tf.app.run()
-
diff --git a/examples/gpt-2/utils/data_utils.py b/examples/gpt-2/utils/data_utils.py
@@ -123,4 +123,3 @@ def prepare_TFRecord_data(data_dir, max_seq_length, encoder, output_dir):
         test_file = os.path.join(output_dir, "test.tf_record")
         file_based_convert_examples_to_features(
             test_examples, max_seq_length, encoder, test_file, EOS_token=None)
-
diff --git a/examples/gpt-2/utils/model_utils.py b/examples/gpt-2/utils/model_utils.py
@@ -7,6 +7,7 @@
 import numpy as np
 from texar.tf import HParams
 
+
 def transform_gpt2_to_texar_config(input_json_path):
     """
     Remap the config file
@@ -47,7 +48,7 @@ def transform_gpt2_to_texar_config(input_json_path):
                     "type": "Dense",
                     "kwargs": {
                         "name": "conv1",
-                        "units": hidden_dim*4,
+                        "units": hidden_dim * 4,
                         "activation": "gelu",
                         "use_bias": True,
                     }
@@ -67,7 +68,6 @@ def transform_gpt2_to_texar_config(input_json_path):
     return HParams(configs, default_hparams=None)
 
 
-
 def _map_tensor_names(original_tensor_name):
     """
     Tensor name mapping
@@ -109,7 +109,6 @@ def _map_tensor_names(original_tensor_name):
         return original_tensor_name
 
 
-
 # pylint: disable=too-many-locals
 def _get_assignment_map_from_checkpoint(sess, all_variables, init_checkpoint):
     """
@@ -188,6 +187,7 @@ def _get_tensor_by_name(tensor_name):
 
     return assignment_map
 
+
 def init_gpt2_checkpoint(sess, init_checkpoint):
     """
     Initializes GPT-2 model parameters from a checkpoint