From c988db5af2a5f1ccfcb5ad19bd735b6a77516637 Mon Sep 17 00:00:00 2001
From: Lysandre <lysandre.debut@reseau.eseo.fr>
Date: Tue, 16 Mar 2021 11:33:35 -0400
Subject: [PATCH] Release v4.4.0

---
 README.md                                         |  4 ++--
 docs/source/conf.py                               |  3 ++-
 docs/source/index.rst                             | 11 +++++------
 examples/language-modeling/run_clm.py             |  2 +-
 examples/language-modeling/run_mlm.py             |  2 +-
 examples/language-modeling/run_plm.py             |  2 +-
 examples/multiple-choice/run_swag.py              |  2 +-
 examples/question-answering/run_qa.py             |  2 +-
 examples/question-answering/run_qa_beam_search.py |  2 +-
 examples/seq2seq/run_summarization.py             |  2 +-
 examples/seq2seq/run_translation.py               |  2 +-
 examples/text-classification/run_glue.py          |  2 +-
 examples/text-classification/run_xnli.py          |  2 +-
 examples/token-classification/run_ner.py          |  2 +-
 setup.py                                          |  2 +-
 src/transformers/__init__.py                      |  2 +-
 16 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index 49ec67b4f8ab2..de2917c9a2385 100644
--- a/README.md
+++ b/README.md
@@ -201,7 +201,7 @@ Current number of checkpoints: ![](https://img.shields.io/endpoint?url=https://h
 1. **[ConvBERT](https://huggingface.co/transformers/model_doc/convbert.html)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
 1. **[CTRL](https://huggingface.co/transformers/model_doc/ctrl.html)** (from Salesforce) released with the paper [CTRL: A Conditional Transformer Language Model for Controllable Generation](https://arxiv.org/abs/1909.05858) by Nitish Shirish Keskar*, Bryan McCann*, Lav R. Varshney, Caiming Xiong and Richard Socher.
 1. **[DeBERTa](https://huggingface.co/transformers/model_doc/deberta.html)** (from Microsoft) released with the paper [DeBERTa: Decoding-enhanced BERT with Disentangled Attention](https://arxiv.org/abs/2006.03654) by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen.
-1. **[DeBERTa-v2](https://huggingface.co/transformers/master/model_doc/deberta_v2.html)** (from Microsoft) released with the paper [DeBERTa: Decoding-enhanced BERT with Disentangled Attention](https://arxiv.org/abs/2006.03654) by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen.
+1. **[DeBERTa-v2](https://huggingface.co/transformers/model_doc/deberta_v2.html)** (from Microsoft) released with the paper [DeBERTa: Decoding-enhanced BERT with Disentangled Attention](https://arxiv.org/abs/2006.03654) by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen.
 1. **[DialoGPT](https://huggingface.co/transformers/model_doc/dialogpt.html)** (from Microsoft Research) released with the paper [DialoGPT: Large-Scale Generative Pre-training for Conversational Response Generation](https://arxiv.org/abs/1911.00536) by Yizhe Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao, Jianfeng Gao, Jingjing Liu, Bill Dolan.
 1. **[DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)** (from HuggingFace), released together with the paper [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) by Victor Sanh, Lysandre Debut and Thomas Wolf. The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers/tree/master/examples/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers/tree/master/examples/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers/tree/master/examples/distillation) and a German version of DistilBERT.
 1. **[DPR](https://huggingface.co/transformers/model_doc/dpr.html)** (from Facebook) released with the paper [Dense Passage Retrieval
@@ -212,7 +212,7 @@ Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih.
 1. **[Funnel Transformer](https://huggingface.co/transformers/model_doc/funnel.html)** (from CMU/Google Brain) released with the paper [Funnel-Transformer: Filtering out Sequential Redundancy for Efficient Language Processing](https://arxiv.org/abs/2006.03236) by Zihang Dai, Guokun Lai, Yiming Yang, Quoc V. Le.
 1. **[GPT](https://huggingface.co/transformers/model_doc/gpt.html)** (from OpenAI) released with the paper [Improving Language Understanding by Generative Pre-Training](https://blog.openai.com/language-unsupervised/) by Alec Radford, Karthik Narasimhan, Tim Salimans and Ilya Sutskever.
 1. **[GPT-2](https://huggingface.co/transformers/model_doc/gpt2.html)** (from OpenAI) released with the paper [Language Models are Unsupervised Multitask Learners](https://blog.openai.com/better-language-models/) by Alec Radford*, Jeffrey Wu*, Rewon Child, David Luan, Dario Amodei** and Ilya Sutskever**.
-1. **[I-BERT](https://huggingface.co/transformers/master/model_doc/ibert.html)** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer
+1. **[I-BERT](https://huggingface.co/transformers/model_doc/ibert.html)** (from Berkeley) released with the paper [I-BERT: Integer-only BERT Quantization](https://arxiv.org/abs/2101.01321) by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer
 1. **[LayoutLM](https://huggingface.co/transformers/model_doc/layoutlm.html)** (from Microsoft Research Asia) released with the paper [LayoutLM: Pre-training of Text and Layout for Document Image Understanding](https://arxiv.org/abs/1912.13318) by Yiheng Xu, Minghao Li, Lei Cui, Shaohan Huang, Furu Wei, Ming Zhou.
 1. **[LED](https://huggingface.co/transformers/model_doc/led.html)** (from AllenAI) released with the paper [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150) by Iz Beltagy, Matthew E. Peters, Arman Cohan.
 1. **[Longformer](https://huggingface.co/transformers/model_doc/longformer.html)** (from AllenAI) released with the paper [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150) by Iz Beltagy, Matthew E. Peters, Arman Cohan.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 2c066e584f9e7..6027c88b66708 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -26,7 +26,8 @@
 # The short X.Y version
 version = u''
 # The full version, including alpha/beta/rc tags
-release = u'4.2.0'
+release = u'4.4.0'
+
 # Prefix link to point to master, comment this during version release and uncomment below line
 extlinks = {'prefix_link': ('https://github.com/huggingface/transformers/blob/master/%s', '')}
 # Prefix link to always point to corresponding version, uncomment this during version release
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 7a3369ac5de48..e069b997e8140 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -117,9 +117,9 @@ and conversion utilities for the following models:
 12. :doc:`DeBERTa <model_doc/deberta>` (from Microsoft) released with the paper `DeBERTa: Decoding-enhanced BERT with
     Disentangled Attention <https://arxiv.org/abs/2006.03654>`__ by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu
     Chen.
-13. `DeBERTa-v2 <https://huggingface.co/transformers/master/model_doc/deberta_v2.html>`__ (from Microsoft) released
-    with the paper `DeBERTa: Decoding-enhanced BERT with Disentangled Attention <https://arxiv.org/abs/2006.03654>`__
-    by Pengcheng He, Xiaodong Liu, Jianfeng Gao, Weizhu Chen.
+13. :doc:`DeBERTa-v2 <model_doc/deberta_v2>` (from Microsoft) released with the paper `DeBERTa: Decoding-enhanced BERT
+    with Disentangled Attention <https://arxiv.org/abs/2006.03654>`__ by Pengcheng He, Xiaodong Liu, Jianfeng Gao,
+    Weizhu Chen.
 14. :doc:`DialoGPT <model_doc/dialogpt>` (from Microsoft Research) released with the paper `DialoGPT: Large-Scale
     Generative Pre-training for Conversational Response Generation <https://arxiv.org/abs/1911.00536>`__ by Yizhe
     Zhang, Siqi Sun, Michel Galley, Yen-Chun Chen, Chris Brockett, Xiang Gao, Jianfeng Gao, Jingjing Liu, Bill Dolan.
@@ -148,9 +148,8 @@ and conversion utilities for the following models:
 21. :doc:`GPT-2 <model_doc/gpt2>` (from OpenAI) released with the paper `Language Models are Unsupervised Multitask
     Learners <https://blog.openai.com/better-language-models/>`__ by Alec Radford*, Jeffrey Wu*, Rewon Child, David
     Luan, Dario Amodei** and Ilya Sutskever**.
-22. `I-BERT <https://huggingface.co/transformers/master/model_doc/ibert.html>`__ (from Berkeley) released with the
-    paper `I-BERT: Integer-only BERT Quantization <https://arxiv.org/abs/2101.01321>`__ by Sehoon Kim, Amir Gholami,
-    Zhewei Yao, Michael W. Mahoney, Kurt Keutzer
+22. :doc:`I-BERT <model_doc/ibert>` (from Berkeley) released with the paper `I-BERT: Integer-only BERT Quantization
+    <https://arxiv.org/abs/2101.01321>`__ by Sehoon Kim, Amir Gholami, Zhewei Yao, Michael W. Mahoney, Kurt Keutzer
 23. :doc:`LayoutLM <model_doc/layoutlm>` (from Microsoft Research Asia) released with the paper `LayoutLM: Pre-training
     of Text and Layout for Document Image Understanding <https://arxiv.org/abs/1912.13318>`__ by Yiheng Xu, Minghao Li,
     Lei Cui, Shaohan Huang, Furu Wei, Ming Zhou.
diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py
index 833a8ccc87c40..cb8a90fc21cd6 100755
--- a/examples/language-modeling/run_clm.py
+++ b/examples/language-modeling/run_clm.py
@@ -48,7 +48,7 @@
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.4.0.dev0")
+check_min_version("4.4.0")
 
 logger = logging.getLogger(__name__)
 
diff --git a/examples/language-modeling/run_mlm.py b/examples/language-modeling/run_mlm.py
index a58ac2ed0e468..641987e3c6297 100755
--- a/examples/language-modeling/run_mlm.py
+++ b/examples/language-modeling/run_mlm.py
@@ -48,7 +48,7 @@
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.4.0.dev0")
+check_min_version("4.4.0")
 
 logger = logging.getLogger(__name__)
 MODEL_CONFIG_CLASSES = list(MODEL_FOR_MASKED_LM_MAPPING.keys())
diff --git a/examples/language-modeling/run_plm.py b/examples/language-modeling/run_plm.py
index 7305b393f578a..9f58557150fa9 100755
--- a/examples/language-modeling/run_plm.py
+++ b/examples/language-modeling/run_plm.py
@@ -44,7 +44,7 @@
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.4.0.dev0")
+check_min_version("4.4.0")
 
 logger = logging.getLogger(__name__)
 
diff --git a/examples/multiple-choice/run_swag.py b/examples/multiple-choice/run_swag.py
index 8a0a78401d278..f589195922380 100755
--- a/examples/multiple-choice/run_swag.py
+++ b/examples/multiple-choice/run_swag.py
@@ -46,7 +46,7 @@
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.4.0.dev0")
+check_min_version("4.4.0")
 
 logger = logging.getLogger(__name__)
 
diff --git a/examples/question-answering/run_qa.py b/examples/question-answering/run_qa.py
index aab8022a8fb0e..e107999a313d7 100755
--- a/examples/question-answering/run_qa.py
+++ b/examples/question-answering/run_qa.py
@@ -46,7 +46,7 @@
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.4.0.dev0")
+check_min_version("4.4.0")
 
 logger = logging.getLogger(__name__)
 
diff --git a/examples/question-answering/run_qa_beam_search.py b/examples/question-answering/run_qa_beam_search.py
index 0fbea56bfe3a1..67fbf30d35004 100755
--- a/examples/question-answering/run_qa_beam_search.py
+++ b/examples/question-answering/run_qa_beam_search.py
@@ -45,7 +45,7 @@
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.4.0.dev0")
+check_min_version("4.4.0")
 
 logger = logging.getLogger(__name__)
 
diff --git a/examples/seq2seq/run_summarization.py b/examples/seq2seq/run_summarization.py
index 211045ed92fb1..f30b83f276e05 100755
--- a/examples/seq2seq/run_summarization.py
+++ b/examples/seq2seq/run_summarization.py
@@ -47,7 +47,7 @@
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.4.0.dev0")
+check_min_version("4.4.0")
 
 logger = logging.getLogger(__name__)
 
diff --git a/examples/seq2seq/run_translation.py b/examples/seq2seq/run_translation.py
index 62e68c19cba5c..0901584d1d80f 100755
--- a/examples/seq2seq/run_translation.py
+++ b/examples/seq2seq/run_translation.py
@@ -46,7 +46,7 @@
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.4.0.dev0")
+check_min_version("4.4.0")
 
 logger = logging.getLogger(__name__)
 
diff --git a/examples/text-classification/run_glue.py b/examples/text-classification/run_glue.py
index b473953ca2b39..fad0a78592ef9 100755
--- a/examples/text-classification/run_glue.py
+++ b/examples/text-classification/run_glue.py
@@ -45,7 +45,7 @@
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.4.0.dev0")
+check_min_version("4.4.0")
 
 task_to_keys = {
     "cola": ("sentence", None),
diff --git a/examples/text-classification/run_xnli.py b/examples/text-classification/run_xnli.py
index 15d2c404a63b7..e785c93b8c79b 100755
--- a/examples/text-classification/run_xnli.py
+++ b/examples/text-classification/run_xnli.py
@@ -45,7 +45,7 @@
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.4.0.dev0")
+check_min_version("4.4.0")
 
 logger = logging.getLogger(__name__)
 
diff --git a/examples/token-classification/run_ner.py b/examples/token-classification/run_ner.py
index f9cb70881f0ed..f9e2f633ba7ca 100755
--- a/examples/token-classification/run_ner.py
+++ b/examples/token-classification/run_ner.py
@@ -45,7 +45,7 @@
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.4.0.dev0")
+check_min_version("4.4.0")
 
 logger = logging.getLogger(__name__)
 
diff --git a/setup.py b/setup.py
index 261e90f1125c7..c9a80c067ddce 100644
--- a/setup.py
+++ b/setup.py
@@ -278,7 +278,7 @@ def run(self):
 
 setup(
     name="transformers",
-    version="4.4.0.dev0",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
+    version="4.4.0",  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
     author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Sam Shleifer, Patrick von Platen, Sylvain Gugger, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
     author_email="thomas@huggingface.co",
     description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
index c4e8943620d7a..7c43e70e9f471 100755
--- a/src/transformers/__init__.py
+++ b/src/transformers/__init__.py
@@ -22,7 +22,7 @@
 # to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
 # in the namespace without actually importing anything (and especially none of the backends).
 
-__version__ = "4.4.0.dev0"
+__version__ = "4.4.0"
 
 # Work around to update TensorFlow's absl.logging threshold which alters the
 # default Python logging output behavior when present.