diff --git a/.circleci/config.yml b/.circleci/config.yml
index 524e5a940d705..662e18c6953da 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -77,7 +77,6 @@ jobs:
                       - v0.4-torch_and_tf-{{ checksum "setup.py" }}
                       - v0.4-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
-            - run: pip install git+https://github.com/huggingface/datasets
             - run: pip install .[sklearn,tf-cpu,torch,testing]
             - save_cache:
                 key: v0.4-{{ checksum "setup.py" }}
@@ -102,7 +101,6 @@ jobs:
                       - v0.4-torch-{{ checksum "setup.py" }}
                       - v0.4-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
-            - run: pip install git+https://github.com/huggingface/datasets
             - run: pip install .[sklearn,torch,testing]
             - save_cache:
                   key: v0.4-torch-{{ checksum "setup.py" }}
@@ -129,7 +127,6 @@ jobs:
                       - v0.4-tf-{{ checksum "setup.py" }}
                       - v0.4-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
-            - run: pip install git+https://github.com/huggingface/datasets
             - run: pip install .[sklearn,tf-cpu,testing]
             - save_cache:
                   key: v0.4-tf-{{ checksum "setup.py" }}
@@ -154,7 +151,6 @@ jobs:
                     - v0.4-flax-{{ checksum "setup.py" }}
                     - v0.4-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
-            - run: pip install git+https://github.com/huggingface/datasets
             - run: sudo pip install .[flax,sklearn,torch,testing]
             - save_cache:
                   key: v0.4-flax-{{ checksum "setup.py" }}
@@ -179,7 +175,6 @@ jobs:
                       - v0.4-torch-{{ checksum "setup.py" }}
                       - v0.4-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
-            - run: pip install git+https://github.com/huggingface/datasets
             - run: pip install .[sklearn,torch,testing]
             - save_cache:
                   key: v0.4-torch-{{ checksum "setup.py" }}
@@ -204,7 +199,6 @@ jobs:
                       - v0.4-tf-{{ checksum "setup.py" }}
                       - v0.4-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
-            - run: pip install git+https://github.com/huggingface/datasets
             - run: pip install .[sklearn,tf-cpu,testing]
             - save_cache:
                   key: v0.4-tf-{{ checksum "setup.py" }}
diff --git a/tests/test_trainer.py b/tests/test_trainer.py
index 6505539cdac04..497c6c8b4daa7 100644
--- a/tests/test_trainer.py
+++ b/tests/test_trainer.py
@@ -18,13 +18,13 @@
 import tempfile
 import unittest
 
-import datasets
 import numpy as np
 
 from transformers import AutoTokenizer, EvaluationStrategy, PretrainedConfig, TrainingArguments, is_torch_available
 from transformers.file_utils import WEIGHTS_NAME
 from transformers.testing_utils import (
     get_tests_dir,
+    require_datasets,
     require_optuna,
     require_sentencepiece,
     require_tokenizers,
@@ -340,7 +340,10 @@ def test_predict(self):
         self.assertTrue(np.array_equal(labels[0], trainer.eval_dataset.ys[0]))
         self.assertTrue(np.array_equal(labels[1], trainer.eval_dataset.ys[1]))
 
+    @require_datasets
     def test_trainer_with_datasets(self):
+        import datasets
+
         np.random.seed(42)
         x = np.random.normal(size=(64,)).astype(np.float32)
         y = 2.0 * x + 3.0 + np.random.normal(scale=0.1, size=(64,))
@@ -658,15 +661,17 @@ def model_init(trial):
         def hp_name(trial):
             return MyTrialShortNamer.shortname(trial.params)
 
-        trainer = get_regression_trainer(
-            learning_rate=0.1,
-            logging_steps=1,
-            evaluation_strategy=EvaluationStrategy.EPOCH,
-            num_train_epochs=4,
-            disable_tqdm=True,
-            load_best_model_at_end=True,
-            logging_dir="runs",
-            run_name="test",
-            model_init=model_init,
-        )
-        trainer.hyperparameter_search(direction="minimize", hp_space=hp_space, hp_name=hp_name, n_trials=4)
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            trainer = get_regression_trainer(
+                output_dir=tmp_dir,
+                learning_rate=0.1,
+                logging_steps=1,
+                evaluation_strategy=EvaluationStrategy.EPOCH,
+                num_train_epochs=4,
+                disable_tqdm=True,
+                load_best_model_at_end=True,
+                logging_dir="runs",
+                run_name="test",
+                model_init=model_init,
+            )
+            trainer.hyperparameter_search(direction="minimize", hp_space=hp_space, hp_name=hp_name, n_trials=4)