Skip to content

Commit

Permalink
Merge pull request #234 from gpengzhi/flake8
Browse files Browse the repository at this point in the history
Fix flake8 lint error in our repo
  • Loading branch information
gpengzhi committed Oct 22, 2019
2 parents 53be563 + 17bc8be commit 22d7b8e
Show file tree
Hide file tree
Showing 175 changed files with 833 additions and 534 deletions.
17 changes: 17 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[flake8]
select =
E101,E111,E112,E113,E114,E115,E116,E117,
E201,E202,E203,E211,E221,E222,E223,E224,E225,E226,E227,E228
E231,E251,E261,E262,E265,E266,E271,E272,E273,E274,E275,
E301,E302,E303,E304,E305,E306,
E401,
E502,
E701,E702,E703E711,E712,E713,E714,E721,E722,
E901,
W191,
W291,W292,W293,
W391,
W605
count = true
show-source = true
statistics = true
6 changes: 2 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@ install:
- pip install pytest

script:
# stop the build if there are Python syntax errors or undefined names
- flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
# exit-zero treats all errors as warnings. Texar limits lines to a maximum of 80 chars.
- flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics
# Linting
- flake8 texar/ examples/
# Unit tests
- pytest

Expand Down
8 changes: 5 additions & 3 deletions examples/bert/config_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

train_batch_size = 32
max_train_epoch = 3
display_steps = 50 # Print training loss every display_steps; -1 to disable
display_steps = 50 # Print training loss every display_steps; -1 to disable
eval_steps = -1 # Eval on the dev set every eval_steps; -1 to disable
warmup_proportion = 0.1 # Proportion of training to perform linear learning
# rate warmup for. E.g., 0.1 = 10% of training.
# Proportion of training to perform linear learning
# rate warmup for. E.g., 0.1 = 10% of training.
warmup_proportion = 0.1

eval_batch_size = 8
test_batch_size = 8

Expand Down
2 changes: 1 addition & 1 deletion examples/bert/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def _create_examples(lines, set_type):
text_a = tx.utils.compat_as_text(line[1])
# Single sentence classification, text_b doesn't exist
text_b = None
label = '0' # arbitrary set as 0
label = '0' # arbitrary set as 0
examples.append(InputExample(guid=guid, text_a=text_a,
text_b=text_b, label=label))
return examples
Expand Down
4 changes: 2 additions & 2 deletions examples/bert/utils/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def get_lr(global_step, num_train_steps, num_warmup_steps, static_lr):
warmup_learning_rate = static_lr * warmup_percent_done

is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32)
learning_rate = ((1.0 - is_warmup) * learning_rate\
+is_warmup * warmup_learning_rate)
learning_rate = ((1.0 - is_warmup) * learning_rate
+ is_warmup * warmup_learning_rate)

return learning_rate
12 changes: 7 additions & 5 deletions examples/distributed_gpu/lm_ptb_distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,12 @@

config = importlib.import_module(FLAGS.config)


def _main(_):
# Data
tf.logging.set_verbosity(tf.logging.INFO)

## 1. initialize the horovod
# 1. initialize the horovod
hvd.init()

batch_size = config.batch_size
Expand Down Expand Up @@ -158,7 +159,6 @@ def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):
if is_train
else tf.estimator.ModeKeys.EVAL)


for step, (x, y) in enumerate(data_iter):
if step == 0:
state = sess.run(initial_state,
Expand All @@ -179,10 +179,11 @@ def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):

ppl = np.exp(loss / iters)
if verbose and is_train and hvd.rank() == 0 \
and (step+1) % (epoch_size // 10) == 0:
and (step + 1) % (epoch_size // 10) == 0:
tf.logging.info("%.3f perplexity: %.3f speed: %.0f wps" %
((step+1) * 1.0 / epoch_size, ppl,
iters * batch_size / (time.time() - start_time)))
((step + 1) * 1.0 / epoch_size, ppl,
iters * batch_size / (
time.time() - start_time)))
_elapsed_time = time.time() - start_time
tf.logging.info("epoch time elapsed: %f" % (_elapsed_time))
ppl = np.exp(loss / iters)
Expand Down Expand Up @@ -231,5 +232,6 @@ def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):
test_ppl, _ = _run_epoch(sess, test_data_iter, 0)
tf.logging.info("Test Perplexity: %.3f" % (test_ppl))


if __name__ == '__main__':
tf.app.run(main=_main)
10 changes: 6 additions & 4 deletions examples/distributed_gpu/ptb_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@
import horovod.tensorflow as hvd
import texar.tf as tx


def ptb_iterator(data, batch_size, num_steps, is_train=False):
"""Iterates through the ptb data.
"""

data_length = len(data)

batch_length = data_length // batch_size
data = np.asarray(data[:batch_size*batch_length])
data = np.asarray(data[:batch_size * batch_length])
data = data.reshape([batch_size, batch_length])

epoch_size = (batch_length - 1) // num_steps
Expand All @@ -43,7 +44,7 @@ def ptb_iterator(data, batch_size, num_steps, is_train=False):
def _sharded_data(data):
_batch_size = len(data)
_shard_size = _batch_size // hvd.size()
data = [data[i*_shard_size: (i+1) * _shard_size]
data = [data[i * _shard_size: (i + 1) * _shard_size]
for i in range(_shard_size)]
data = data[hvd.rank()]
return data
Expand All @@ -54,10 +55,11 @@ def _sharded_data(data):
data = _sharded_data(data)

for i in range(epoch_size):
x = data[:, i * num_steps : (i+1) * num_steps]
y = data[:, i * num_steps + 1 : (i+1) * num_steps + 1]
x = data[:, i * num_steps: (i + 1) * num_steps]
y = data[:, i * num_steps + 1: (i + 1) * num_steps + 1]
yield (x, y)


def prepare_data(data_path):
"""Preprocess PTB data.
"""
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-2/configs/config_model_117M.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"type": "Dense",
"kwargs": {
"name": "conv1",
"units": dim*4,
"units": dim * 4,
"activation": "gelu",
"use_bias": True,
}
Expand Down
2 changes: 1 addition & 1 deletion examples/gpt-2/configs/config_model_345M.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
"type": "Dense",
"kwargs": {
"name": "conv1",
"units": dim*4,
"units": dim * 4,
"activation": "gelu",
"use_bias": True,
}
Expand Down
10 changes: 5 additions & 5 deletions examples/gpt-2/configs/config_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@

train_batch_size = 32
max_train_epoch = 100
display_steps = 10 # Print training loss every display_steps; -1 to disable
display_steps = 10 # Print training loss every display_steps; -1 to disable
eval_steps = -1 # Eval on the dev set every eval_steps; -1 to disable
checkpoint_steps = -1 # Checkpoint model parameters every checkpoint_steps;
# -1 to disable
# Checkpoint model parameters every checkpoint_steps; -1 to disable
checkpoint_steps = -1

eval_batch_size = 8
test_batch_size = 8

## Optimization configs
# Optimization configs

opt = {
'optimizer': {
Expand All @@ -27,7 +27,7 @@
}
}

## Data configs
# Data configs

feature_original_types = {
# Reading features from TFRecord data file.
Expand Down
4 changes: 3 additions & 1 deletion examples/gpt-2/gpt2_generate_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
"For '--config_type=texar', set the texar config file "
"like: '--config_model configs.config_model_117M'.")


def main(_):
"""
Builds the model and runs
Expand Down Expand Up @@ -171,7 +172,7 @@ def _embedding_fn(x, y):
context: [context_tokens for _ in range(batch_size)],
context_length:
[len(context_tokens) for _ in range(batch_size)],
tx.context.global_mode():tf.estimator.ModeKeys.PREDICT
tx.context.global_mode(): tf.estimator.ModeKeys.PREDICT
}
generated = 0
for _ in range(nsamples // batch_size):
Expand Down Expand Up @@ -221,5 +222,6 @@ def _embedding_fn(x, y):
" SAMPLE " + str(generated) + " " + "=" * 40)
print(text)


if __name__ == "__main__":
tf.app.run()
14 changes: 5 additions & 9 deletions examples/gpt-2/gpt2_train_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def main(_):

tf.logging.set_verbosity(tf.logging.INFO)

## Loads GPT-2 model configuration
# Loads GPT-2 model configuration

if FLAGS.config_type == "json":
gpt2_config = model_utils.transform_gpt2_to_texar_config(
Expand All @@ -104,7 +104,7 @@ def main(_):
"max_decoding_length should not be greater than position_size. "
"{}>{}".format(max_decoding_length, gpt2_config.position_size))

## Loads data
# Loads data

# Configures training data shard in distributed mode
if FLAGS.distributed:
Expand All @@ -126,7 +126,7 @@ def main(_):
batch = iterator.get_next()
batch_size = tf.shape(batch['text_ids'])[0]

## Builds the GPT-2 model
# Builds the GPT-2 model

word_embedder = tx.modules.WordEmbedder(
vocab_size=gpt2_config.vocab_size,
Expand Down Expand Up @@ -154,7 +154,7 @@ def main(_):
loss = tx.losses.sequence_sparse_softmax_cross_entropy(
labels=batch['text_ids'][:, 1:],
logits=outputs.logits[:, :-1, :],
sequence_length=batch['length']-1,
sequence_length=batch['length'] - 1,
average_across_timesteps=True,
sum_over_timesteps=False)
ppl = tf.exp(loss)
Expand Down Expand Up @@ -194,8 +194,7 @@ def _embedding_fn(x, y):
helper=helper)
sample_id = outputs_infer.sample_id


## Train/eval/test routine
# Train/eval/test routine
saver = tf.train.Saver()
saver_best = tf.train.Saver(max_to_keep=1)
dev_best = {'loss': 1e8, 'ppl': 1e8}
Expand Down Expand Up @@ -282,7 +281,6 @@ def _dev_epoch(sess):
ckpt_fn = saver_best.save(sess, ckpt_fn)
tf.logging.info('Checkpoint best to {}'.format(ckpt_fn))


def _test_epoch(sess):
"""Generates samples on the test set.
"""
Expand Down Expand Up @@ -346,7 +344,6 @@ def _test_epoch(sess):
tx.utils.write_paired_text(
_all_input_text, _all_samples_text, output_file)


# Broadcasts global variables from rank-0 process
if FLAGS.distributed:
bcast = hvd.broadcast_global_variables(0)
Expand Down Expand Up @@ -388,4 +385,3 @@ def _test_epoch(sess):

if __name__ == "__main__":
tf.app.run()

1 change: 0 additions & 1 deletion examples/gpt-2/utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,4 +123,3 @@ def prepare_TFRecord_data(data_dir, max_seq_length, encoder, output_dir):
test_file = os.path.join(output_dir, "test.tf_record")
file_based_convert_examples_to_features(
test_examples, max_seq_length, encoder, test_file, EOS_token=None)

6 changes: 3 additions & 3 deletions examples/gpt-2/utils/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np
from texar.tf import HParams


def transform_gpt2_to_texar_config(input_json_path):
"""
Remap the config file
Expand Down Expand Up @@ -47,7 +48,7 @@ def transform_gpt2_to_texar_config(input_json_path):
"type": "Dense",
"kwargs": {
"name": "conv1",
"units": hidden_dim*4,
"units": hidden_dim * 4,
"activation": "gelu",
"use_bias": True,
}
Expand All @@ -67,7 +68,6 @@ def transform_gpt2_to_texar_config(input_json_path):
return HParams(configs, default_hparams=None)



def _map_tensor_names(original_tensor_name):
"""
Tensor name mapping
Expand Down Expand Up @@ -109,7 +109,6 @@ def _map_tensor_names(original_tensor_name):
return original_tensor_name



# pylint: disable=too-many-locals
def _get_assignment_map_from_checkpoint(sess, all_variables, init_checkpoint):
"""
Expand Down Expand Up @@ -188,6 +187,7 @@ def _get_tensor_by_name(tensor_name):

return assignment_map


def init_gpt2_checkpoint(sess, init_checkpoint):
"""
Initializes GPT-2 model parameters from a checkpoint
Expand Down

0 comments on commit 22d7b8e

Please sign in to comment.