Lightning Updates for v0.8.5 #5798

nateraw · 2020-07-16T06:04:51Z

Fixing #5361 ...battling with unittests.

nateraw · 2020-07-16T07:28:47Z

@sshleifer have any guidance on these two errors?

T5

________________ test_finetune[patrickvonplaten/t5-tiny-random] ________________
[gw3] linux -- Python 3.6.11 /usr/local/bin/python

model = 'patrickvonplaten/t5-tiny-random'

    @pytest.mark.parametrize(
        ["model"], [pytest.param(T5_TINY), pytest.param(BART_TINY), pytest.param(MBART_TINY), pytest.param(MARIAN_TINY)]
    )
    def test_finetune(model):
        args_d: dict = CHEAP_ARGS.copy()
        task = "translation" if model in [MBART_TINY, MARIAN_TINY] else "summarization"
        tmp_dir = make_test_data_dir()
        output_dir = tempfile.mkdtemp(prefix="output_")
        args_d.update(
            data_dir=tmp_dir,
            model_name_or_path=model,
            tokenizer_name=None,
            train_batch_size=2,
            eval_batch_size=2,
            output_dir=output_dir,
            do_predict=True,
            task=task,
            src_lang="en_XX",
            tgt_lang="ro_RO",
            freeze_encoder=True,
            freeze_embeds=True,
        )
        assert "n_train" in args_d
        args = argparse.Namespace(**args_d)
>       module = main(args)

examples/seq2seq/test_seq2seq_examples.py:233: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
examples/seq2seq/finetune.py:298: in main
    model: SummarizationModule = SummarizationModule(args)
examples/seq2seq/finetune.py:95: in __init__
    freeze_params(self.model.model.encoder)  # TODO: this will break for t5
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = T5ForConditionalGeneration(
  (shared): Embedding(32128, 64)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128...
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (lm_head): Linear(in_features=64, out_features=32128, bias=False)
)
name = 'model'

    def __getattr__(self, name):
        if '_parameters' in self.__dict__:
            _parameters = self.__dict__['_parameters']
            if name in _parameters:
                return _parameters[name]
        if '_buffers' in self.__dict__:
            _buffers = self.__dict__['_buffers']
            if name in _buffers:
                return _buffers[name]
        if '_modules' in self.__dict__:
            modules = self.__dict__['_modules']
            if name in modules:
                return modules[name]
        raise AttributeError("'{}' object has no attribute '{}'".format(
>           type(self).__name__, name))
E       AttributeError: 'T5ForConditionalGeneration' object has no attribute 'model'

/usr/local/lib/python3.6/site-packages/torch/nn/modules/module.py:594: AttributeError

MBart

_____________________ test_finetune[sshleifer/tiny-mbart] ______________________
[gw3] linux -- Python 3.6.11 /usr/local/bin/python

model = 'sshleifer/tiny-mbart'

    @pytest.mark.parametrize(
        ["model"], [pytest.param(T5_TINY), pytest.param(BART_TINY), pytest.param(MBART_TINY), pytest.param(MARIAN_TINY)]
    )
    def test_finetune(model):
        args_d: dict = CHEAP_ARGS.copy()
        task = "translation" if model in [MBART_TINY, MARIAN_TINY] else "summarization"
        tmp_dir = make_test_data_dir()
        output_dir = tempfile.mkdtemp(prefix="output_")
        args_d.update(
            data_dir=tmp_dir,
            model_name_or_path=model,
            tokenizer_name=None,
            train_batch_size=2,
            eval_batch_size=2,
            output_dir=output_dir,
            do_predict=True,
            task=task,
            src_lang="en_XX",
            tgt_lang="ro_RO",
            freeze_encoder=True,
            freeze_embeds=True,
        )
        assert "n_train" in args_d
        args = argparse.Namespace(**args_d)
>       module = main(args)

examples/seq2seq/test_seq2seq_examples.py:233: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
examples/seq2seq/finetune.py:324: in main
    logger=logger,
examples/lightning_base.py:312: in generic_train
    trainer.fit(model)
/usr/local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py:1038: in fit
    model.setup('fit')
examples/lightning_base.py:125: in setup
    dataloader = self.get_dataloader("train", train_batch_size)
examples/seq2seq/finetune.py:193: in get_dataloader
    dataset = self.get_dataset(type_path)
examples/seq2seq/finetune.py:188: in get_dataset
    **self.dataset_kwargs,
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = <seq2seq.utils.SummarizationDataset object at 0x7ff21a4592e8>
tokenizer = <transformers.tokenization_bart.MBartTokenizer object at 0x7ff21f7c0b00>
data_dir = PosixPath('/tmp/tmpmc70afs6'), type_path = 'train'
max_source_length = 12, max_target_length = 12, n_obs = None
overwrite_cache = False, prefix = '', src_lang = None, tgt_lang = None

    def __init__(
        self,
        tokenizer,
        data_dir,
        type_path="train",
        max_source_length=1024,
        max_target_length=56,
        n_obs=None,
        overwrite_cache=False,
        prefix="",
        src_lang=None,
        tgt_lang=None,
    ):
        super().__init__()
        # FIXME: the rstrip logic strips all the chars, it seems.
        tok_name = tokenizer.__class__.__name__.lower().rstrip("tokenizer")
        if hasattr(tokenizer, "set_lang") and src_lang is not None:
            tokenizer.set_lang(src_lang)  # HACK: only applies to mbart
        self.source = encode_file(
            tokenizer,
            os.path.join(data_dir, type_path + ".source"),
            max_source_length,
            overwrite_cache=overwrite_cache,
            prefix=prefix,
            tok_name=tok_name,
        )
        tgt_path = os.path.join(data_dir, type_path + ".target")
        if hasattr(tokenizer, "set_lang"):
>           assert tgt_lang is not None, "--tgt_lang must be passed to build a translation"
E           AssertionError: --tgt_lang must be passed to build a translation

examples/seq2seq/utils.py:112: AssertionError

examples/seq2seq/finetune.py

examples/seq2seq/finetune.sh

codecov · 2020-07-18T00:02:36Z

Codecov Report

Merging #5798 into master will decrease coverage by 0.18%.
The diff coverage is n/a.

@@            Coverage Diff             @@
##           master    #5798      +/-   ##
==========================================
- Coverage   78.66%   78.48%   -0.19%     
==========================================
  Files         146      146              
  Lines       26200    26200              
==========================================
- Hits        20611    20563      -48     
- Misses       5589     5637      +48

Impacted Files	Coverage Δ
src/transformers/modeling_tf_electra.py	`26.02% <0.00%> (-69.52%)`	⬇️
src/transformers/generation_tf_utils.py	`86.21% <0.00%> (-0.51%)`	⬇️
src/transformers/modeling_tf_distilbert.py	`98.79% <0.00%> (+33.89%)`	⬆️

Continue to review full report at Codecov.

Legend - Click here to learn more
Δ = absolute <relative> (impact), ø = not affected, ? = missing data
Powered by Codecov. Last update 615be03...ee864a0. Read the comment docs.

examples/lightning_base.py

sshleifer · 2020-07-18T02:42:15Z

Merging this now.
cc @moscow25 this bumps us to pytorch_lightning==0.8.5, let us know if any issues.
cc @clmnt , @patil-suraj, @williamFalcon

Thanks for the big PR @nateraw and @williamFalcon !

moscow25 · 2020-07-18T20:08:01Z

Thanks @sshleifer -- 0.8.5 has been good for us this week. Much appreciated.