From 924c08af4e9a63c153c1f5cdc64de6cf4b04b7c7 Mon Sep 17 00:00:00 2001 From: TevenLeScao Date: Mon, 9 Nov 2020 14:04:54 +0100 Subject: [PATCH 1/4] Move XLNet memory length FutureWarning --- src/transformers/configuration_xlnet.py | 11 ----------- src/transformers/modeling_xlnet.py | 14 +++++++++++++- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/transformers/configuration_xlnet.py b/src/transformers/configuration_xlnet.py index 05eda6010cc3ff..f9df42680457eb 100644 --- a/src/transformers/configuration_xlnet.py +++ b/src/transformers/configuration_xlnet.py @@ -198,17 +198,6 @@ def __init__( self.pad_token_id = pad_token_id self.eos_token_id = eos_token_id - if mem_len is None or mem_len == 0: - warnings.warn( - "This config doesn't use attention memories, a core feature of XLNet." - " Consider setting `mem_len` to a non-zero value, for example " - "`xlnet = XLNetLMHeadModel.from_pretrained('xlnet-base-cased'', mem_len=1024)`," - " for accurate training performance as well as an order of magnitude faster inference." - " Starting from version 3.5.0, the default parameter will be 1024, following" - " the implementation in https://arxiv.org/abs/1906.08237", - FutureWarning, - ) - @property def max_position_embeddings(self): return -1 diff --git a/src/transformers/modeling_xlnet.py b/src/transformers/modeling_xlnet.py index fd3113fa263ffe..a077d4432fa2af 100755 --- a/src/transformers/modeling_xlnet.py +++ b/src/transformers/modeling_xlnet.py @@ -16,7 +16,7 @@ """ PyTorch XLNet model. """ - +import warnings from dataclasses import dataclass from typing import List, Optional, Tuple @@ -1087,6 +1087,18 @@ def forward( output_hidden_states=None, return_dict=None, ): + + if self.config.mem_len is None or self.config.mem_len == 0: + warnings.warn( + "This XLNet config doesn't use attention memories, a core feature of XLNet." + " Consider setting `mem_len` to a non-zero value, for example " + "`xlnet = XLNetLMHeadModel.from_pretrained('xlnet-base-cased'', mem_len=1024)`," + " for accurate training performance as well as an order of magnitude faster inference." + " Starting from version 3.5.0, the default parameter will be 1024, following" + " the implementation in https://arxiv.org/abs/1906.08237", + FutureWarning, + ) + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states From cec8a68259ac9a2e91e337bf9eff75bb9ce8eb4a Mon Sep 17 00:00:00 2001 From: TevenLeScao Date: Mon, 9 Nov 2020 14:12:29 +0100 Subject: [PATCH 2/4] isort --- src/transformers/modeling_xlnet.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/transformers/modeling_xlnet.py b/src/transformers/modeling_xlnet.py index a077d4432fa2af..26bfe32dce2f9b 100755 --- a/src/transformers/modeling_xlnet.py +++ b/src/transformers/modeling_xlnet.py @@ -17,7 +17,6 @@ PyTorch XLNet model. """ import warnings - from dataclasses import dataclass from typing import List, Optional, Tuple From 03e51f87a31ec4d286aa604cb70eb7bb23c8daf1 Mon Sep 17 00:00:00 2001 From: TevenLeScao Date: Mon, 9 Nov 2020 14:16:18 +0100 Subject: [PATCH 3/4] style --- src/transformers/configuration_xlnet.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/transformers/configuration_xlnet.py b/src/transformers/configuration_xlnet.py index f9df42680457eb..2b85f4e7571588 100644 --- a/src/transformers/configuration_xlnet.py +++ b/src/transformers/configuration_xlnet.py @@ -15,8 +15,6 @@ # limitations under the License. """ XLNet configuration """ -import warnings - from .configuration_utils import PretrainedConfig from .utils import logging From deed7255ec6367dd3f9a93f386c0a61322a004fb Mon Sep 17 00:00:00 2001 From: TevenLeScao Date: Mon, 9 Nov 2020 22:27:04 +0100 Subject: [PATCH 4/4] Changed default XLNet memory length --- src/transformers/configuration_xlnet.py | 2 +- src/transformers/modeling_xlnet.py | 12 ------------ 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/src/transformers/configuration_xlnet.py b/src/transformers/configuration_xlnet.py index 2b85f4e7571588..4a39d130c36b92 100644 --- a/src/transformers/configuration_xlnet.py +++ b/src/transformers/configuration_xlnet.py @@ -142,7 +142,7 @@ def __init__( initializer_range=0.02, layer_norm_eps=1e-12, dropout=0.1, - mem_len=None, + mem_len=512, reuse_len=None, bi_data=False, clamp_len=-1, diff --git a/src/transformers/modeling_xlnet.py b/src/transformers/modeling_xlnet.py index 26bfe32dce2f9b..6405cd13532fb0 100755 --- a/src/transformers/modeling_xlnet.py +++ b/src/transformers/modeling_xlnet.py @@ -16,7 +16,6 @@ """ PyTorch XLNet model. """ -import warnings from dataclasses import dataclass from typing import List, Optional, Tuple @@ -1087,17 +1086,6 @@ def forward( return_dict=None, ): - if self.config.mem_len is None or self.config.mem_len == 0: - warnings.warn( - "This XLNet config doesn't use attention memories, a core feature of XLNet." - " Consider setting `mem_len` to a non-zero value, for example " - "`xlnet = XLNetLMHeadModel.from_pretrained('xlnet-base-cased'', mem_len=1024)`," - " for accurate training performance as well as an order of magnitude faster inference." - " Starting from version 3.5.0, the default parameter will be 1024, following" - " the implementation in https://arxiv.org/abs/1906.08237", - FutureWarning, - ) - output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states