diff --git a/docs/tutorials/tuning.md b/docs/tutorials/tuning.md
index 0865a2e9b..d0189d839 100644
--- a/docs/tutorials/tuning.md
+++ b/docs/tutorials/tuning.md
@@ -76,6 +76,8 @@ To enable hyperparameter tuning, add the following `tuning` section to your `con
 tuning:
   # Output directory for tuning results.
   output_dir: 'results'
+  # Checkpoint directory
+  checkpoint_dir: 'checkpoint'
   # Number of gpu hours allowed for tuning.
   gpu_hours: 1.0
   # Number of fixed trials to tune hyperparameters (override gpu_hours).
@@ -92,6 +94,7 @@ tuning:
 Let's detail the new parameters:
 
 - `output_dir`: Directory where tuning results, visualizations, and best parameters will be saved.
+- `checkpoint_dir`: Directory where the tuning checkpoint will be saved each trial. Allows resuming previous tuning in case of a crash.
 - `gpu_hours`: Estimated total GPU time available for tuning, in hours. Given this time, the script will automatically compute for how many training trials we can tune hyperparameters. By default, `gpu_hours` is set to 1.
 - `n_trials`: Number of training trials for tuning. If provided, it will override `gpu_hours` and tune the model for exactly `n_trial` trials.
 - `two_phase_tuning`: If True, performs a two-phase tuning. In the first phase, all hyperparameters are tuned, and in the second phase, the top half (based on importance) are fine-tuned while freezing others. By default, `two_phase_tuning` is False.
@@ -252,6 +255,7 @@ package:
 # -> python -m edsnlp.tune --config configs/config.yml
 tuning:
   output_dir: 'results'
+  checkpoint_dir: 'checkpoint'
   gpu_hours: 40.0
   two_phase_tuning: True
   metric: "ner.micro.f"
diff --git a/edsnlp/tune.py b/edsnlp/tune.py
index 48d305a24..20a21e33d 100644
--- a/edsnlp/tune.py
+++ b/edsnlp/tune.py
@@ -7,6 +7,7 @@
 import sys
 from typing import Dict, List, Optional, Tuple, Union
 
+import joblib
 import optuna
 import optuna.visualization as vis
 from configobj import ConfigObj
@@ -17,14 +18,19 @@
 from optuna.pruners import MedianPruner
 from pydantic import BaseModel, confloat, conint
 from ruamel.yaml import YAML
+from transformers.utils.logging import ERROR, set_verbosity
 
 from edsnlp.training.trainer import GenericScorer, registry, train
 
 app = Cli(pretty_exceptions_show_locals=False)
 
+# disable transformers lib warn logs
+set_verbosity(ERROR)
+
 logger = logging.getLogger(__name__)
 
 DEFAULT_GPU_HOUR = 1.0
+CHECKPOINT = "study.pkl"
 
 
 class HyperparameterConfig(BaseModel):
@@ -284,7 +290,9 @@ def on_validation_callback(all_metrics):
     return score
 
 
-def optimize(config_path, tuned_parameters, n_trials, metric, study=None):
+def optimize(
+    config_path, tuned_parameters, n_trials, metric, checkpoint_dir, study=None
+):
     def objective(trial):
         return objective_with_param(config_path, tuned_parameters, trial, metric)
 
@@ -293,10 +301,29 @@ def objective(trial):
             direction="maximize",
             pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=2),
         )
-    study.optimize(objective, n_trials=n_trials)
+    study.optimize(
+        objective, n_trials=n_trials, callbacks=[save_checkpoint(checkpoint_dir)]
+    )
     return study
 
 
+def save_checkpoint(checkpoint_dir):
+    def callback(study: optuna.study.Study, trial: optuna.trial.FrozenTrial):
+        checkpoint_file = os.path.join(checkpoint_dir, CHECKPOINT)
+        logger.info(f"Saving checkpoint to {checkpoint_file}")
+        joblib.dump(study, checkpoint_file)
+
+    return callback
+
+
+def load_checkpoint(checkpoint_dir) -> Optional[optuna.study.Study]:
+    checkpoint_file = os.path.join(checkpoint_dir, CHECKPOINT)
+    if os.path.exists(checkpoint_file):
+        logger.info(f"Loading study checkpoint from {checkpoint_file}")
+        return joblib.load(checkpoint_file)
+    return None
+
+
 def process_results(
     study,
     output_dir,
@@ -376,17 +403,38 @@ def write_final_config(output_dir, config_path, tuned_parameters, best_params):
         config.write()
 
 
+def parse_study_summary(output_dir):
+    file_path = os.path.join(output_dir, "results_summary.txt")
+    with open(file_path, "r") as f:
+        lines = f.readlines()
+
+    sections = {"Params:": {}, "Importances:": {}}
+    current = None
+
+    for line in lines:
+        line = line.strip()
+        if line in sections:
+            current = sections[line]
+        elif current is not None and line:
+            key, value = map(str.strip, line.split(":"))
+            current[key] = float(value)
+
+    return sections["Params:"], sections["Importances:"]
+
+
 def tune_two_phase(
     config: Dict,
     config_path: str,
     hyperparameters: Dict[str, Dict],
     output_dir: str,
+    checkpoint_dir: str,
     n_trials: int,
     viz: bool,
     metric: Tuple[str],
     study: Optional[optuna.study.Study] = None,
     is_fixed_n_trials: bool = False,
     gpu_hours: float = 1.0,
+    skip_phase_1: bool = False,
 ) -> None:
     """
     Perform two-phase hyperparameter tuning using Optuna.
@@ -406,6 +454,8 @@ def tune_two_phase(
     output_dir : str
         Directory where tuning results, visualizations, and best parameters will
         be saved.
+    checkpoint_dir : str,
+        Path to save the checkpoint file.
     n_trials : int
         The total number of trials to execute across both tuning phases.
         This number will be split between the two phases, with approximately half
@@ -423,17 +473,41 @@ def tune_two_phase(
         trials pruned in phase 1, we raise n_trials to compensate. Default is False.
     gpu_hours : float, optional
         Total GPU time available for tuning, in hours. Default is 1 hour.
+    skip_phase_1 : bool, optional
+        Whether or not to skip phase 1 (in case of resuming from checkpoint).
+        Default is False.
     """
-    n_trials_2 = n_trials // 2
-    n_trials_1 = n_trials - n_trials_2
     output_dir_phase_1 = os.path.join(output_dir, "phase_1")
     output_dir_phase_2 = os.path.join(output_dir, "phase_2")
 
-    logger.info(f"Phase 1: Tuning all hyperparameters ({n_trials_1} trials).")
-    study = optimize(config, hyperparameters, n_trials_1, metric, study=study)
-    best_params_phase_1, importances = process_results(
-        study, output_dir_phase_1, viz, config, config_path, hyperparameters
-    )
+    if str(config_path).endswith("yaml") or str(config_path).endswith("yml"):
+        config_path_phase_2 = os.path.join(output_dir_phase_1, "config.yml")
+    else:
+        config_path_phase_2 = os.path.join(output_dir_phase_1, "config.cfg")
+
+    if not skip_phase_1:
+        n_trials_2 = n_trials // 2
+        n_trials_1 = n_trials - n_trials_2
+
+        logger.info(f"Phase 1: Tuning all hyperparameters ({n_trials_1} trials).")
+        study = optimize(
+            config,
+            hyperparameters,
+            n_trials_1,
+            metric,
+            checkpoint_dir,
+            study,
+        )
+        best_params_phase_1, importances = process_results(
+            study, output_dir_phase_1, viz, config, config_path, hyperparameters
+        )
+        if not is_fixed_n_trials:
+            n_trials_2 = compute_remaining_n_trials_possible(study, gpu_hours)
+
+    else:
+        n_trials_2 = n_trials
+        logger.info("Skipping already tuned phase 1")
+        best_params_phase_1, importances = parse_study_summary(output_dir_phase_1)
 
     hyperparameters_to_keep = list(importances.keys())[
         : math.ceil(len(importances) / 2)
@@ -445,6 +519,7 @@ def tune_two_phase(
         if key in hyperparameters_to_keep
         or (value.get("alias") and value["alias"] in hyperparameters_to_keep)
     }
+
     hyperparameters_frozen = {
         key: value
         for key, value in hyperparameters.items()
@@ -456,20 +531,20 @@ def tune_two_phase(
         config, hyperparameters_frozen, values=best_params_phase_1
     )
 
-    if not is_fixed_n_trials:
-        n_trials_2 = compute_remaining_n_trials_possible(study, gpu_hours)
-
     logger.info(
         f"Phase 2: Tuning {hyperparameters_to_keep} hyperparameters "
         f"({n_trials_2} trials). Other hyperparameters frozen to best values."
     )
+
     study = optimize(
-        updated_config, hyperparameters_phase_2, n_trials_2, metric, study=study
+        updated_config,
+        hyperparameters_phase_2,
+        n_trials_2,
+        metric,
+        checkpoint_dir,
+        study,
     )
-    if str(config_path).endswith("yaml") or str(config_path).endswith("yml"):
-        config_path_phase_2 = os.path.join(output_dir_phase_1, "config.yml")
-    else:
-        config_path_phase_2 = os.path.join(output_dir_phase_1, "config.cfg")
+
     process_results(
         study,
         output_dir_phase_2,
@@ -521,6 +596,7 @@ def tune(
     config_meta: Dict,
     hyperparameters: Dict[str, HyperparameterConfig],
     output_dir: str,
+    checkpoint_dir: str,
     gpu_hours: confloat(gt=0) = DEFAULT_GPU_HOUR,
     n_trials: conint(gt=0) = None,
     two_phase_tuning: bool = False,
@@ -548,6 +624,8 @@ def tune(
     output_dir : str
         Directory where tuning results, visualizations, and best parameters will
         be saved.
+    checkpoint_dir : str,
+        Path to save the checkpoint file.
     gpu_hours : float, optional
         Total GPU time available for tuning, in hours. Default is 1 hour.
     n_trials : int, optional
@@ -568,13 +646,35 @@ def tune(
     hyperparameters = {key: value.to_dict() for key, value in hyperparameters.items()}
     set_seed(seed)
     metric = split_path(metric)
-    study = None
+    study = load_checkpoint(checkpoint_dir)
+    elapsed_trials = 0
+    skip_phase_1 = False
     is_fixed_n_trials = n_trials is not None
 
+    if study:
+        elapsed_trials = len(study.trials)
+        logger.info(f"Elapsed trials: {elapsed_trials}")
+
     if not is_fixed_n_trials:
-        logger.info(f"Computing number of trials for {gpu_hours} hours of GPU.")
-        study = optimize(config, hyperparameters, n_trials=1, metric=metric)
-        n_trials = compute_n_trials(gpu_hours, compute_time_per_trial(study)) - 1
+        if not study:
+            logger.info(f"Computing number of trials for {gpu_hours} hours of GPU.")
+            study = optimize(
+                config,
+                hyperparameters,
+                n_trials=1,
+                metric=metric,
+                checkpoint_dir=checkpoint_dir,
+            )
+            n_trials = compute_n_trials(gpu_hours, compute_time_per_trial(study)) - 1
+        else:
+            n_trials = compute_n_trials(
+                gpu_hours, compute_time_per_trial(study, ema=True)
+            )
+
+    if elapsed_trials >= (n_trials / 2):
+        skip_phase_1 = True
+
+    n_trials = max(0, n_trials - elapsed_trials)
 
     logger.info(f"Number of trials: {n_trials}")
 
@@ -585,16 +685,25 @@ def tune(
             config_path,
             hyperparameters,
             output_dir,
+            checkpoint_dir,
             n_trials,
             viz,
             metric=metric,
             study=study,
             is_fixed_n_trials=is_fixed_n_trials,
             gpu_hours=gpu_hours,
+            skip_phase_1=skip_phase_1,
         )
     else:
         logger.info("Starting single-phase tuning.")
-        study = optimize(config, hyperparameters, n_trials, metric, study=study)
+        study = optimize(
+            config,
+            hyperparameters,
+            n_trials,
+            metric,
+            checkpoint_dir,
+            study,
+        )
         if not is_fixed_n_trials:
             n_trials = compute_remaining_n_trials_possible(study, gpu_hours)
             if n_trials > 0:
@@ -602,9 +711,23 @@ def tune(
                     f"As some trials were pruned, perform tuning for {n_trials} "
                     "more trials to fully use GPU time budget."
                 )
-                study = optimize(config, hyperparameters, n_trials, metric, study=study)
+                study = optimize(
+                    config,
+                    hyperparameters,
+                    n_trials,
+                    metric,
+                    checkpoint_dir,
+                    study,
+                )
         process_results(study, output_dir, viz, config, config_path, hyperparameters)
 
+    logger.info(
+        f"Tuning completed. Results available in {output_dir}. Deleting checkpoint."
+    )
+    checkpoint_file = os.path.join(checkpoint_dir, CHECKPOINT)
+    if os.path.exists(checkpoint_file):
+        os.remove(checkpoint_file)
+
 
 if __name__ == "__main__":
     app()
diff --git a/tests/tuning/test_checkpoints/single_phase_gpu_hour/study_.pkl b/tests/tuning/test_checkpoints/single_phase_gpu_hour/study_.pkl
new file mode 100644
index 000000000..41662e072
Binary files /dev/null and b/tests/tuning/test_checkpoints/single_phase_gpu_hour/study_.pkl differ
diff --git a/tests/tuning/test_checkpoints/single_phase_n_trials/study_.pkl b/tests/tuning/test_checkpoints/single_phase_n_trials/study_.pkl
new file mode 100644
index 000000000..69f4d0eab
Binary files /dev/null and b/tests/tuning/test_checkpoints/single_phase_n_trials/study_.pkl differ
diff --git a/tests/tuning/test_checkpoints/two_phase_gpu_hour/config.yml b/tests/tuning/test_checkpoints/two_phase_gpu_hour/config.yml
new file mode 100644
index 000000000..b481e64e8
--- /dev/null
+++ b/tests/tuning/test_checkpoints/two_phase_gpu_hour/config.yml
@@ -0,0 +1,127 @@
+# My usefull comment
+# 🤖 PIPELINE DEFINITION
+nlp:
+  "@core": pipeline
+  lang: eds
+
+  components:
+    normalizer:
+      '@factory': eds.normalizer
+
+    sentencizer:
+      '@factory': eds.sentences
+
+    ner:
+      '@factory': eds.ner_crf
+      mode: "joint"
+      target_span_getter: "gold_spans"
+      # Set spans as both to ents and in separate `ent.label` groups
+      span_setter: [ "ents", "*" ]
+      infer_span_setter: true
+
+      embedding:
+        '@factory': eds.text_cnn
+        kernel_sizes: [ 3 ]
+
+        embedding:
+          '@factory': eds.transformer
+          model: hf-internal-testing/tiny-bert
+          window: 128
+          stride: 96
+          new_tokens: [ [ "(?:\\n\\s*)*\\n", "⏎" ] ]
+
+    qualifier:
+      '@factory': eds.span_classifier
+      attributes: { "_.negation": [ "sosy" ], "_.unit": [ "measure" ] }
+      span_getter: ["ents", "gold_spans"]
+
+      embedding:
+        '@factory': eds.span_pooler
+
+        embedding: # ${ nlp.components.ner.embedding }
+          '@factory': eds.text_cnn
+          kernel_sizes: [ 3 ]
+
+          embedding:
+            '@factory': eds.transformer
+            model: hf-internal-testing/tiny-bert
+            window: 128
+            stride: 96
+
+# 📈 SCORERS
+scorer:
+  speed: true
+  qual:
+    '@metrics': eds.span_attributes
+    span_getter: ${nlp.components.qualifier.span_getter}
+    qualifiers: ${nlp.components.qualifier.attributes}
+  ner:
+    '@metrics': eds.ner_exact
+    span_getter: ${nlp.components.ner.target_span_getter}
+
+# 🎛️ OPTIMIZER
+optimizer:
+  "@core": optimizer
+  optim: AdamW
+  module: ${ nlp }
+  groups:
+    "^transformer": false
+    ".*":
+      lr:
+          "@schedules": linear
+          start_value: 1e-3
+          max_value: 2e-3
+          warmup_rate: 0.1
+  total_steps: ${ train.max_steps }
+
+# 📚 DATA
+train_data:
+  - data:
+      '@readers': standoff
+      path: tests/training/dataset/
+      converter:
+        - '@factory': eds.standoff_dict2doc
+          span_setter : 'gold_spans'
+          span_attributes : ['sosy', 'unit', 'negation']
+          bool_attributes : ['negation']  # default standoff to doc converter
+        - '@factory': eds.sentences
+          nlp: ${nlp}
+        - '@factory': eds.split
+          nlp: null
+          max_length: 2000
+          regex: '\n\n+'
+    shuffle: dataset
+    batch_size: 8 docs
+    pipe_names: [ "ner" ]
+  - data:
+      '@readers': standoff
+      path: tests/training/dataset/
+      converter:
+        - '@factory': eds.standoff_dict2doc
+          span_setter : 'gold_spans'
+          span_attributes : ['sosy', 'unit', 'negation']
+          bool_attributes : ['negation']  # default standoff to doc converter
+    shuffle: dataset
+    batch_size: 16 spans
+    pipe_names: [ "qualifier" ]
+
+val_data:
+  '@readers': standoff
+  path: tests/training/dataset/
+  converter:
+    - '@factory': eds.standoff_dict2doc
+      span_setter : 'gold_spans'
+      span_attributes : ['sosy', 'unit', 'negation']
+      bool_attributes : ['negation']  # default standoff to doc converter
+
+# 🚀 TRAIN SCRIPT OPTIONS
+train:
+  nlp: ${ nlp }
+  train_data: ${ train_data }
+  val_data: ${ val_data }
+  max_steps: 5
+  validation_interval: 2
+  max_grad_norm: 1.0
+  scorer: ${ scorer }
+  num_workers: 0
+  optimizer: ${ optimizer }
diff --git a/tests/tuning/test_checkpoints/two_phase_gpu_hour/results_summary.txt b/tests/tuning/test_checkpoints/two_phase_gpu_hour/results_summary.txt
new file mode 100644
index 000000000..f706c7cac
--- /dev/null
+++ b/tests/tuning/test_checkpoints/two_phase_gpu_hour/results_summary.txt
@@ -0,0 +1,13 @@
+Study Summary
+==================
+Best trial: 2
+
+Value: 0.7674011016524788
+
+Params:
+  start_value: 0.00017235427021406453
+  warmup_rate: 0.1
+
+Importances:
+  start_value: 0.7
+  warmup_rate: 0.3
diff --git a/tests/tuning/test_checkpoints/two_phase_gpu_hour/study_.pkl b/tests/tuning/test_checkpoints/two_phase_gpu_hour/study_.pkl
new file mode 100644
index 000000000..f6a911771
Binary files /dev/null and b/tests/tuning/test_checkpoints/two_phase_gpu_hour/study_.pkl differ
diff --git a/tests/tuning/test_checkpoints/two_phase_n_trials/config.yml b/tests/tuning/test_checkpoints/two_phase_n_trials/config.yml
new file mode 100644
index 000000000..b481e64e8
--- /dev/null
+++ b/tests/tuning/test_checkpoints/two_phase_n_trials/config.yml
@@ -0,0 +1,127 @@
+# My usefull comment
+# 🤖 PIPELINE DEFINITION
+nlp:
+  "@core": pipeline
+  lang: eds
+
+  components:
+    normalizer:
+      '@factory': eds.normalizer
+
+    sentencizer:
+      '@factory': eds.sentences
+
+    ner:
+      '@factory': eds.ner_crf
+      mode: "joint"
+      target_span_getter: "gold_spans"
+      # Set spans as both to ents and in separate `ent.label` groups
+      span_setter: [ "ents", "*" ]
+      infer_span_setter: true
+
+      embedding:
+        '@factory': eds.text_cnn
+        kernel_sizes: [ 3 ]
+
+        embedding:
+          '@factory': eds.transformer
+          model: hf-internal-testing/tiny-bert
+          window: 128
+          stride: 96
+          new_tokens: [ [ "(?:\\n\\s*)*\\n", "⏎" ] ]
+
+    qualifier:
+      '@factory': eds.span_classifier
+      attributes: { "_.negation": [ "sosy" ], "_.unit": [ "measure" ] }
+      span_getter: ["ents", "gold_spans"]
+
+      embedding:
+        '@factory': eds.span_pooler
+
+        embedding: # ${ nlp.components.ner.embedding }
+          '@factory': eds.text_cnn
+          kernel_sizes: [ 3 ]
+
+          embedding:
+            '@factory': eds.transformer
+            model: hf-internal-testing/tiny-bert
+            window: 128
+            stride: 96
+
+# 📈 SCORERS
+scorer:
+  speed: true
+  qual:
+    '@metrics': eds.span_attributes
+    span_getter: ${nlp.components.qualifier.span_getter}
+    qualifiers: ${nlp.components.qualifier.attributes}
+  ner:
+    '@metrics': eds.ner_exact
+    span_getter: ${nlp.components.ner.target_span_getter}
+
+# 🎛️ OPTIMIZER
+optimizer:
+  "@core": optimizer
+  optim: AdamW
+  module: ${ nlp }
+  groups:
+    "^transformer": false
+    ".*":
+      lr:
+          "@schedules": linear
+          start_value: 1e-3
+          max_value: 2e-3
+          warmup_rate: 0.1
+  total_steps: ${ train.max_steps }
+
+# 📚 DATA
+train_data:
+  - data:
+      '@readers': standoff
+      path: tests/training/dataset/
+      converter:
+        - '@factory': eds.standoff_dict2doc
+          span_setter : 'gold_spans'
+          span_attributes : ['sosy', 'unit', 'negation']
+          bool_attributes : ['negation']  # default standoff to doc converter
+        - '@factory': eds.sentences
+          nlp: ${nlp}
+        - '@factory': eds.split
+          nlp: null
+          max_length: 2000
+          regex: '\n\n+'
+    shuffle: dataset
+    batch_size: 8 docs
+    pipe_names: [ "ner" ]
+  - data:
+      '@readers': standoff
+      path: tests/training/dataset/
+      converter:
+        - '@factory': eds.standoff_dict2doc
+          span_setter : 'gold_spans'
+          span_attributes : ['sosy', 'unit', 'negation']
+          bool_attributes : ['negation']  # default standoff to doc converter
+    shuffle: dataset
+    batch_size: 16 spans
+    pipe_names: [ "qualifier" ]
+
+val_data:
+  '@readers': standoff
+  path: tests/training/dataset/
+  converter:
+    - '@factory': eds.standoff_dict2doc
+      span_setter : 'gold_spans'
+      span_attributes : ['sosy', 'unit', 'negation']
+      bool_attributes : ['negation']  # default standoff to doc converter
+
+# 🚀 TRAIN SCRIPT OPTIONS
+train:
+  nlp: ${ nlp }
+  train_data: ${ train_data }
+  val_data: ${ val_data }
+  max_steps: 5
+  validation_interval: 2
+  max_grad_norm: 1.0
+  scorer: ${ scorer }
+  num_workers: 0
+  optimizer: ${ optimizer }
diff --git a/tests/tuning/test_checkpoints/two_phase_n_trials/results_summary.txt b/tests/tuning/test_checkpoints/two_phase_n_trials/results_summary.txt
new file mode 100644
index 000000000..f706c7cac
--- /dev/null
+++ b/tests/tuning/test_checkpoints/two_phase_n_trials/results_summary.txt
@@ -0,0 +1,13 @@
+Study Summary
+==================
+Best trial: 2
+
+Value: 0.7674011016524788
+
+Params:
+  start_value: 0.00017235427021406453
+  warmup_rate: 0.1
+
+Importances:
+  start_value: 0.7
+  warmup_rate: 0.3
diff --git a/tests/tuning/test_checkpoints/two_phase_n_trials/study_.pkl b/tests/tuning/test_checkpoints/two_phase_n_trials/study_.pkl
new file mode 100644
index 000000000..c54ab0e96
Binary files /dev/null and b/tests/tuning/test_checkpoints/two_phase_n_trials/study_.pkl differ
diff --git a/tests/tuning/test_end_to_end.py b/tests/tuning/test_end_to_end.py
index 1febb4e82..b9c32c068 100644
--- a/tests/tuning/test_end_to_end.py
+++ b/tests/tuning/test_end_to_end.py
@@ -54,7 +54,8 @@ def assert_results(output_dir):
 
 @pytest.mark.parametrize("n_trials", [7, None])
 @pytest.mark.parametrize("two_phase_tuning", [True, False])
-def test_tune(tmpdir, n_trials, two_phase_tuning):
+@pytest.mark.parametrize("start_from_checkpoint", [True, False])
+def test_tune(tmpdir, n_trials, two_phase_tuning, start_from_checkpoint):
     config_meta = {"config_path": ["tests/tuning/config.yml"]}
     hyperparameters = {
         "optimizer.groups.'.*'.lr.start_value": {
@@ -73,26 +74,62 @@ def test_tune(tmpdir, n_trials, two_phase_tuning):
         },
     }
     output_dir = "./results"
-    gpu_hours = 0.015
-    seed = 42
-    metric = "ner.micro.f"
-    tune(
-        config_meta=config_meta,
-        hyperparameters=hyperparameters,
-        output_dir=output_dir,
-        gpu_hours=gpu_hours,
-        n_trials=n_trials,
-        two_phase_tuning=two_phase_tuning,
-        seed=seed,
-        metric=metric,
-    )
-    if two_phase_tuning:
-        phase_1_dir = os.path.join(output_dir, "phase_1")
-        phase_2_dir = os.path.join(output_dir, "phase_2")
-        assert_results(phase_1_dir)
-        assert_results(phase_2_dir)
-    else:
-        assert_results(output_dir)
-
-    shutil.rmtree(output_dir)
-    shutil.rmtree("./artifacts")
+    try:
+        if start_from_checkpoint:
+            if two_phase_tuning:
+                if n_trials is None:
+                    checkpoint_dir = (
+                        "./tests/tuning/test_checkpoints/two_phase_gpu_hour"
+                    )
+                else:
+                    checkpoint_dir = (
+                        "./tests/tuning/test_checkpoints/two_phase_n_trials"
+                    )
+                summary_src = os.path.join(checkpoint_dir, "results_summary.txt")
+                summary_dst = os.path.join(output_dir, "phase_1/results_summary.txt")
+                config_src = os.path.join(checkpoint_dir, "config.yml")
+                config_dst = os.path.join(output_dir, "phase_1/config.yml")
+                os.makedirs(os.path.join(output_dir, "phase_1"))
+                shutil.copy(summary_src, summary_dst)
+                shutil.copy(config_src, config_dst)
+            else:
+                if n_trials is None:
+                    checkpoint_dir = (
+                        "./tests/tuning/test_checkpoints/single_phase_gpu_hour"
+                    )
+                else:
+                    checkpoint_dir = (
+                        "./tests/tuning/test_checkpoints/single_phase_n_trials"
+                    )
+            study_src = os.path.join(checkpoint_dir, "study_.pkl")
+            study_dst = os.path.join(checkpoint_dir, "study.pkl")
+            shutil.copy(study_src, study_dst)
+
+        else:
+            checkpoint_dir = "./tests/tuning/test_checkpoints"
+
+        gpu_hours = 0.015
+        seed = 42
+        metric = "ner.micro.f"
+        tune(
+            config_meta=config_meta,
+            hyperparameters=hyperparameters,
+            output_dir=output_dir,
+            checkpoint_dir=checkpoint_dir,
+            gpu_hours=gpu_hours,
+            n_trials=n_trials,
+            two_phase_tuning=two_phase_tuning,
+            seed=seed,
+            metric=metric,
+        )
+        if two_phase_tuning:
+            phase_1_dir = os.path.join(output_dir, "phase_1")
+            phase_2_dir = os.path.join(output_dir, "phase_2")
+            if not start_from_checkpoint:
+                assert_results(phase_1_dir)
+            assert_results(phase_2_dir)
+        else:
+            assert_results(output_dir)
+    finally:
+        shutil.rmtree(output_dir)
+        shutil.rmtree("./artifacts")
diff --git a/tests/tuning/test_tuning.py b/tests/tuning/test_tuning.py
index b5c1d603e..9967f6934 100644
--- a/tests/tuning/test_tuning.py
+++ b/tests/tuning/test_tuning.py
@@ -212,22 +212,33 @@ def test_compute_remaining_n_trials_possible(study):
 def test_optimize(mock_objective_with_param, mock_optimize_study, has_study, study):
     mock_objective_with_param.return_value = 0.9
     metric = ("ner", "micro", "f")
+    checkpoint_dir = "./checkpoint"
 
     if has_study:
 
-        def pass_fn(obj, n_trials):
+        def pass_fn(obj, n_trials, callbacks):
             pass
 
         study.optimize = pass_fn
         study = optimize(
-            "config_path", tuned_parameters={}, n_trials=1, metric=metric, study=study
+            "config_path",
+            tuned_parameters={},
+            n_trials=1,
+            metric=metric,
+            checkpoint_dir=checkpoint_dir,
+            study=study,
         )
         assert isinstance(study, Mock)
         assert len(study.trials) == 3
 
     else:
         study = optimize(
-            "config_path", tuned_parameters={}, n_trials=1, metric=metric, study=None
+            "config_path",
+            tuned_parameters={},
+            n_trials=1,
+            metric=metric,
+            checkpoint_dir=checkpoint_dir,
+            study=None,
         )
         assert isinstance(study, optuna.study.Study)
         assert len(study.trials) == 0
@@ -260,7 +271,8 @@ def test_tune(
         "param1": {"type": "float", "low": 0.0, "high": 1.0},
         "param2": {"type": "float", "low": 0.0, "high": 1.0},
     }
-    output_dir = "fake_output_dir"
+    output_dir = "output_dir"
+    checkpoint_dir = "checkpoint_dir"
     gpu_hours = 0.25
     seed = 42
 
@@ -268,6 +280,7 @@ def test_tune(
         config_meta=config_meta,
         hyperparameters=hyperparameters,
         output_dir=output_dir,
+        checkpoint_dir=checkpoint_dir,
         gpu_hours=gpu_hours,
         n_trials=n_trials,
         two_phase_tuning=two_phase_tuning,