-
Notifications
You must be signed in to change notification settings - Fork 25.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Lightning Updates for v0.8.5 #5798
Conversation
@sshleifer have any guidance on these two errors? T5________________ test_finetune[patrickvonplaten/t5-tiny-random] ________________
[gw3] linux -- Python 3.6.11 /usr/local/bin/python
model = 'patrickvonplaten/t5-tiny-random'
@pytest.mark.parametrize(
["model"], [pytest.param(T5_TINY), pytest.param(BART_TINY), pytest.param(MBART_TINY), pytest.param(MARIAN_TINY)]
)
def test_finetune(model):
args_d: dict = CHEAP_ARGS.copy()
task = "translation" if model in [MBART_TINY, MARIAN_TINY] else "summarization"
tmp_dir = make_test_data_dir()
output_dir = tempfile.mkdtemp(prefix="output_")
args_d.update(
data_dir=tmp_dir,
model_name_or_path=model,
tokenizer_name=None,
train_batch_size=2,
eval_batch_size=2,
output_dir=output_dir,
do_predict=True,
task=task,
src_lang="en_XX",
tgt_lang="ro_RO",
freeze_encoder=True,
freeze_embeds=True,
)
assert "n_train" in args_d
args = argparse.Namespace(**args_d)
> module = main(args)
examples/seq2seq/test_seq2seq_examples.py:233:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
examples/seq2seq/finetune.py:298: in main
model: SummarizationModule = SummarizationModule(args)
examples/seq2seq/finetune.py:95: in __init__
freeze_params(self.model.model.encoder) # TODO: this will break for t5
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = T5ForConditionalGeneration(
(shared): Embedding(32128, 64)
(encoder): T5Stack(
(embed_tokens): Embedding(32128...
(dropout): Dropout(p=0.1, inplace=False)
)
(lm_head): Linear(in_features=64, out_features=32128, bias=False)
)
name = 'model'
def __getattr__(self, name):
if '_parameters' in self.__dict__:
_parameters = self.__dict__['_parameters']
if name in _parameters:
return _parameters[name]
if '_buffers' in self.__dict__:
_buffers = self.__dict__['_buffers']
if name in _buffers:
return _buffers[name]
if '_modules' in self.__dict__:
modules = self.__dict__['_modules']
if name in modules:
return modules[name]
raise AttributeError("'{}' object has no attribute '{}'".format(
> type(self).__name__, name))
E AttributeError: 'T5ForConditionalGeneration' object has no attribute 'model'
/usr/local/lib/python3.6/site-packages/torch/nn/modules/module.py:594: AttributeError MBart_____________________ test_finetune[sshleifer/tiny-mbart] ______________________
[gw3] linux -- Python 3.6.11 /usr/local/bin/python
model = 'sshleifer/tiny-mbart'
@pytest.mark.parametrize(
["model"], [pytest.param(T5_TINY), pytest.param(BART_TINY), pytest.param(MBART_TINY), pytest.param(MARIAN_TINY)]
)
def test_finetune(model):
args_d: dict = CHEAP_ARGS.copy()
task = "translation" if model in [MBART_TINY, MARIAN_TINY] else "summarization"
tmp_dir = make_test_data_dir()
output_dir = tempfile.mkdtemp(prefix="output_")
args_d.update(
data_dir=tmp_dir,
model_name_or_path=model,
tokenizer_name=None,
train_batch_size=2,
eval_batch_size=2,
output_dir=output_dir,
do_predict=True,
task=task,
src_lang="en_XX",
tgt_lang="ro_RO",
freeze_encoder=True,
freeze_embeds=True,
)
assert "n_train" in args_d
args = argparse.Namespace(**args_d)
> module = main(args)
examples/seq2seq/test_seq2seq_examples.py:233:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
examples/seq2seq/finetune.py:324: in main
logger=logger,
examples/lightning_base.py:312: in generic_train
trainer.fit(model)
/usr/local/lib/python3.6/site-packages/pytorch_lightning/trainer/trainer.py:1038: in fit
model.setup('fit')
examples/lightning_base.py:125: in setup
dataloader = self.get_dataloader("train", train_batch_size)
examples/seq2seq/finetune.py:193: in get_dataloader
dataset = self.get_dataset(type_path)
examples/seq2seq/finetune.py:188: in get_dataset
**self.dataset_kwargs,
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <seq2seq.utils.SummarizationDataset object at 0x7ff21a4592e8>
tokenizer = <transformers.tokenization_bart.MBartTokenizer object at 0x7ff21f7c0b00>
data_dir = PosixPath('/tmp/tmpmc70afs6'), type_path = 'train'
max_source_length = 12, max_target_length = 12, n_obs = None
overwrite_cache = False, prefix = '', src_lang = None, tgt_lang = None
def __init__(
self,
tokenizer,
data_dir,
type_path="train",
max_source_length=1024,
max_target_length=56,
n_obs=None,
overwrite_cache=False,
prefix="",
src_lang=None,
tgt_lang=None,
):
super().__init__()
# FIXME: the rstrip logic strips all the chars, it seems.
tok_name = tokenizer.__class__.__name__.lower().rstrip("tokenizer")
if hasattr(tokenizer, "set_lang") and src_lang is not None:
tokenizer.set_lang(src_lang) # HACK: only applies to mbart
self.source = encode_file(
tokenizer,
os.path.join(data_dir, type_path + ".source"),
max_source_length,
overwrite_cache=overwrite_cache,
prefix=prefix,
tok_name=tok_name,
)
tgt_path = os.path.join(data_dir, type_path + ".target")
if hasattr(tokenizer, "set_lang"):
> assert tgt_lang is not None, "--tgt_lang must be passed to build a translation"
E AssertionError: --tgt_lang must be passed to build a translation
examples/seq2seq/utils.py:112: AssertionError |
Codecov Report
@@ Coverage Diff @@
## master #5798 +/- ##
==========================================
- Coverage 78.66% 78.48% -0.19%
==========================================
Files 146 146
Lines 26200 26200
==========================================
- Hits 20611 20563 -48
- Misses 5589 5637 +48
Continue to review full report at Codecov.
|
Merging this now. Thanks for the big PR @nateraw and @williamFalcon ! |
Thanks @sshleifer -- |
Fixing #5361 ...battling with unittests.