Merge pull request #243 from gpengzhi/gpt2-xl

Support GPT2-XL
asyml · Nov 8, 2019 · 676c66a · 676c66a
2 parents c8f452d + b34f922
commit 676c66a
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 0 deletions.
diff --git a/texar/tf/data/tokenizers/gpt2_tokenizer.py b/texar/tf/data/tokenizers/gpt2_tokenizer.py
@@ -58,6 +58,7 @@ class GPT2Tokenizer(TokenizerBase, PretrainedGPT2Mixin):
         'gpt2-small': 1024,
         'gpt2-medium': 1024,
         'gpt2-large': 1024,
+        'gpt2-xl': 1024,
     }
     _DEPRECATED_MAX_INPUT_SIZE = {
         '117M': 1024,

diff --git a/texar/tf/modules/pretrained/gpt2.py b/texar/tf/modules/pretrained/gpt2.py
@@ -53,6 +53,7 @@ class PretrainedGPT2Mixin(PretrainedMixin, ABC):
       * ``gpt2-small``: Small version of GPT-2, 124M parameters.
       * ``gpt2-medium``: Medium version of GPT-2, 355M parameters.
       * ``gpt2-large``: Large version of GPT-2, 774M parameters.
+      * ``gpt2-xl``: XL version of GPT-2, 1558M parameters.
 
     We provide the following GPT2 classes:
 
@@ -74,6 +75,8 @@ class PretrainedGPT2Mixin(PretrainedMixin, ABC):
                         for file in _CHECKPOINT_FILES],
         'gpt2-large': [_GPT2_PATH + f"774M/{file}"
                        for file in _CHECKPOINT_FILES],
+        'gpt2-xl': [_GPT2_PATH + f"1558M/{file}"
+                    for file in _CHECKPOINT_FILES],
     }
 
     # Raise warning for the deprecated pre-trained model names