Skip to content

Commit 1c542df

Browse files
julien-cLysandreJikjongwookWuTheFWasThat
committed
Add RoBERTa-based GPT-2 Output Detector from OpenAI
converted from https://github.com/openai/gpt-2-output-dataset/tree/master/detector Co-Authored-By: Lysandre Debut <lysandre.debut@reseau.eseo.fr> Co-Authored-By: Jong Wook Kim <jongwook@nyu.edu> Co-Authored-By: Jeff Wu <wuthefwasthat@gmail.com>
1 parent 2f3a421 commit 1c542df

File tree

4 files changed

+18
-0
lines changed

4 files changed

+18
-0
lines changed

Diff for: docs/source/pretrained_models.rst

+8
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,14 @@ Here is the full list of the currently provided pretrained models together with
127127
| | ``roberta-large-mnli`` | | 24-layer, 1024-hidden, 16-heads, 355M parameters |
128128
| | | | ``roberta-large`` fine-tuned on `MNLI <http://www.nyu.edu/projects/bowman/multinli/>`__. |
129129
| | | (see `details <https://github.com/pytorch/fairseq/tree/master/examples/roberta>`__) |
130+
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
131+
| | ``roberta-base-openai-detector`` | | 12-layer, 768-hidden, 12-heads, 125M parameters |
132+
| | | | ``roberta-base`` fine-tuned by OpenAI on the outputs of the 1.5B-parameter GPT-2 model. |
133+
| | | (see `details <https://github.com/openai/gpt-2-output-dataset/tree/master/detector>`__) |
134+
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
135+
| | ``roberta-large-openai-detector`` | | 24-layer, 1024-hidden, 16-heads, 355M parameters |
136+
| | | | ``roberta-large`` fine-tuned by OpenAI on the outputs of the 1.5B-parameter GPT-2 model. |
137+
| | | (see `details <https://github.com/openai/gpt-2-output-dataset/tree/master/detector>`__) |
130138
+-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
131139
| DistilBERT | ``distilbert-base-uncased`` | | 6-layer, 768-hidden, 12-heads, 66M parameters |
132140
| | | | The DistilBERT model distilled from the BERT model `bert-base-uncased` checkpoint |

Diff for: transformers/configuration_roberta.py

+2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
'roberta-large': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-config.json",
3030
'roberta-large-mnli': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-config.json",
3131
'distilroberta-base': "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-config.json",
32+
'roberta-base-openai-detector': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-openai-detector-config.json",
33+
'roberta-large-openai-detector': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-openai-detector-config.json",
3234
}
3335

3436

Diff for: transformers/modeling_roberta.py

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
'roberta-large': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-pytorch_model.bin",
3636
'roberta-large-mnli': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-pytorch_model.bin",
3737
'distilroberta-base': "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-pytorch_model.bin",
38+
'roberta-base-openai-detector': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-openai-detector-pytorch_model.bin",
39+
'roberta-large-openai-detector': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-openai-detector-pytorch_model.bin",
3840
}
3941

4042
class RobertaEmbeddings(BertEmbeddings):

Diff for: transformers/tokenization_roberta.py

+6
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,17 @@ def lru_cache():
4747
'roberta-large': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-vocab.json",
4848
'roberta-large-mnli': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-vocab.json",
4949
'distilroberta-base': "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-vocab.json",
50+
'roberta-base-openai-detector': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json",
51+
'roberta-large-openai-detector': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-vocab.json",
5052
},
5153
'merges_file':
5254
{
5355
'roberta-base': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt",
5456
'roberta-large': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt",
5557
'roberta-large-mnli': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-mnli-merges.txt",
5658
'distilroberta-base': "https://s3.amazonaws.com/models.huggingface.co/bert/distilroberta-base-merges.txt",
59+
'roberta-base-openai-detector': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt",
60+
'roberta-large-openai-detector': "https://s3.amazonaws.com/models.huggingface.co/bert/roberta-large-merges.txt",
5761
},
5862
}
5963

@@ -62,6 +66,8 @@ def lru_cache():
6266
'roberta-large': 512,
6367
'roberta-large-mnli': 512,
6468
'distilroberta-base': 512,
69+
'roberta-base-openai-detector': 512,
70+
'roberta-large-openai-detector': 512,
6571
}
6672

6773

0 commit comments

Comments
 (0)