diff --git a/holocron/models/classification/convnext.py b/holocron/models/classification/convnext.py
index f9e6746d0..4a2cf36be 100644
--- a/holocron/models/classification/convnext.py
+++ b/holocron/models/classification/convnext.py
@@ -46,7 +46,7 @@ class LayerScale(nn.Module):
 
     def __init__(self, chans: int, scale: float = 1e-6) -> None:
         super().__init__()
-        self.register_parameter("weight", nn.Parameter(scale * torch.ones(chans)))  # type: ignore[arg-type]
+        self.register_parameter("weight", nn.Parameter(scale * torch.ones(chans)))
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         return x * self.weight.reshape(1, -1, *((1,) * (x.ndim - 2)))
diff --git a/holocron/models/detection/yolo.py b/holocron/models/detection/yolo.py
index 0cceb15c4..d1c82f6b3 100644
--- a/holocron/models/detection/yolo.py
+++ b/holocron/models/detection/yolo.py
@@ -70,7 +70,7 @@ def _compute_losses(
         gt_labels = [t["labels"] for t in target]
 
         # GT xmin, ymin, xmax, ymax
-        if not all(torch.all(boxes >= 0) and torch.all(boxes <= 1) for boxes in gt_boxes):  # type: ignore[call-overload]
+        if not all(torch.all(boxes >= 0) and torch.all(boxes <= 1) for boxes in gt_boxes):
             raise ValueError("Ground truth boxes are expected to have values between 0 and 1.")
 
         b, h, w, _, _ = pred_scores.shape
diff --git a/holocron/models/detection/yolov2.py b/holocron/models/detection/yolov2.py
index 3c74280ab..594033d00 100644
--- a/holocron/models/detection/yolov2.py
+++ b/holocron/models/detection/yolov2.py
@@ -63,7 +63,7 @@ def __init__(
         if anchors is None:
             # cf. https://github.com/pjreddie/darknet/blob/master/cfg/yolov2-voc.cfg#L242
             anchors = (
-                torch.tensor([  # type: ignore[assignment]
+                torch.tensor([
                     [1.3221, 1.73145],
                     [3.19275, 4.00944],
                     [5.05587, 8.09892],
@@ -131,7 +131,7 @@ def __init__(
         )
 
         # Each box has P_objectness, 4 coords, and score for each class
-        self.head = nn.Conv2d(layout[-1][0], anchors.shape[0] * (5 + num_classes), 1)  # type: ignore[union-attr]
+        self.head = nn.Conv2d(layout[-1][0], anchors.shape[0] * (5 + num_classes), 1)
 
         # Register losses
         self.register_buffer("anchors", anchors)
@@ -248,7 +248,7 @@ def forward(
             b_coords,
             b_o,
             b_scores,
-            out.shape[-2:],
+            out.shape[-2:],  # type: ignore[arg-type]
             self.rpn_nms_thresh,
             self.box_score_thresh,
         )
diff --git a/holocron/models/detection/yolov4.py b/holocron/models/detection/yolov4.py
index 57ff3229c..955cae3f5 100644
--- a/holocron/models/detection/yolov4.py
+++ b/holocron/models/detection/yolov4.py
@@ -3,7 +3,7 @@
 # This program is licensed under the Apache License 2.0.
 # See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0> for full license details.
 
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -277,7 +277,7 @@ def _format_outputs(self, output: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
         c_x = torch.arange(w, dtype=torch.float32, device=output.device).reshape(1, 1, -1, 1)
         c_y = torch.arange(h, dtype=torch.float32, device=output.device).reshape(1, -1, 1, 1)
 
-        b_xy = cast(Tensor, self.scale_xy * torch.sigmoid(output[..., :2]) - 0.5 * (self.scale_xy - 1))
+        b_xy = self.scale_xy * torch.sigmoid(output[..., :2]) - 0.5 * (self.scale_xy - 1)
         b_xy[..., 0].add_(c_x)
         b_xy[..., 1].add_(c_y)
         b_xy[..., 0].div_(w)
@@ -455,7 +455,7 @@ def __init__(
         # cf. https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov4.cfg#L1143
         if anchors is None:
             anchors = (
-                torch.tensor(  # type: ignore[assignment]
+                torch.tensor(
                     [
                         [[12, 16], [19, 36], [40, 28]],
                         [[36, 75], [76, 55], [72, 146]],
@@ -468,8 +468,8 @@ def __init__(
         elif not isinstance(anchors, torch.Tensor):
             anchors = torch.tensor(anchors, dtype=torch.float32)
 
-        if anchors.shape[0] != 3:  # type: ignore[union-attr]
-            raise AssertionError(f"The number of anchors is expected to be 3. received: {anchors.shape[0]}")  # type: ignore[union-attr]
+        if anchors.shape[0] != 3:
+            raise AssertionError(f"The number of anchors is expected to be 3. received: {anchors.shape[0]}")
 
         super().__init__()
 
@@ -480,7 +480,7 @@ def __init__(
             *conv_sequence(256, (5 + num_classes) * 3, None, None, None, conv_layer, kernel_size=1, bias=True),
         )
 
-        self.yolo1 = YoloLayer(anchors[0], num_classes=num_classes, scale_xy=1.2)  # type: ignore[index]
+        self.yolo1 = YoloLayer(anchors[0], num_classes=num_classes, scale_xy=1.2)
 
         self.pre_head2 = nn.Sequential(
             *conv_sequence(
@@ -536,7 +536,7 @@ def __init__(
             *conv_sequence(512, (5 + num_classes) * 3, None, None, None, conv_layer, kernel_size=1, bias=True),
         )
 
-        self.yolo2 = YoloLayer(anchors[1], num_classes=num_classes, scale_xy=1.1)  # type: ignore[index]
+        self.yolo2 = YoloLayer(anchors[1], num_classes=num_classes, scale_xy=1.1)
 
         self.pre_head3 = nn.Sequential(
             *conv_sequence(
@@ -598,7 +598,7 @@ def __init__(
             *conv_sequence(1024, (5 + num_classes) * 3, None, None, None, conv_layer, kernel_size=1, bias=True),
         )
 
-        self.yolo3 = YoloLayer(anchors[2], num_classes=num_classes, scale_xy=1.05)  # type: ignore[index]
+        self.yolo3 = YoloLayer(anchors[2], num_classes=num_classes, scale_xy=1.05)
         init_module(self, "leaky_relu")
         # Zero init
         self.head1[-1].weight.data.zero_()
diff --git a/holocron/nn/functional.py b/holocron/nn/functional.py
index 14572d070..3f0245ab0 100644
--- a/holocron/nn/functional.py
+++ b/holocron/nn/functional.py
@@ -37,8 +37,8 @@ def hard_mish(x: Tensor, inplace: bool = False) -> Tensor:
         output tensor
     """
     if inplace:
-        return x.mul_(0.5 * (x + 2).clamp(min=0, max=2))  # type: ignore[attr-defined]
-    return 0.5 * x * (x + 2).clamp(min=0, max=2)  # type: ignore[attr-defined]
+        return x.mul_(0.5 * (x + 2).clamp(min=0, max=2))
+    return 0.5 * x * (x + 2).clamp(min=0, max=2)
 
 
 def nl_relu(x: Tensor, beta: float = 1.0, inplace: bool = False) -> Tensor:
@@ -53,7 +53,7 @@ def nl_relu(x: Tensor, beta: float = 1.0, inplace: bool = False) -> Tensor:
     """
     if inplace:
         return torch.log(F.relu_(x).mul_(beta).add_(1), out=x)
-    return torch.log(1 + beta * F.relu(x))  # type: ignore[arg-type]
+    return torch.log(1 + beta * F.relu(x))
 
 
 def focal_loss(
@@ -96,7 +96,7 @@ def focal_loss(
         # Tensor type
         if weight.type() != x.data.type():
             weight = weight.type_as(x.data)
-        logpt = weight.gather(0, target.data.view(-1)) * logpt  # type: ignore[union-attr]
+        logpt = weight.gather(0, target.data.view(-1)) * logpt
 
     # Loss
     loss = cast(Tensor, -1 * (1 - pt) ** gamma * logpt)
@@ -177,7 +177,7 @@ def multilabel_cross_entropy(
         # Tensor type
         if weight.type() != x.data.type():
             weight = weight.type_as(x.data)
-        logpt = logpt * weight.view(1, -1, *([1] * (x.ndim - 2)))  # type: ignore[union-attr]
+        logpt = logpt * weight.view(1, -1, *([1] * (x.ndim - 2)))
 
     # CE Loss
     loss = -target * logpt
@@ -243,7 +243,7 @@ def complement_cross_entropy(
         # Tensor type
         if weight.type() != x.data.type():
             weight = weight.type_as(x.data)
-        loss = loss * weight.view(1, -1, *([1] * (x.ndim - 2)))  # type: ignore[union-attr]
+        loss = loss * weight.view(1, -1, *([1] * (x.ndim - 2)))
 
     # Loss reduction
     if reduction == "sum":
@@ -484,7 +484,7 @@ def dropblock2d(x: Tensor, drop_prob: float, block_size: int, inplace: bool = Fa
     gamma = drop_prob / block_size**2
 
     # Sample a mask for the centers of blocks that will be dropped
-    mask = (torch.rand((x.shape[0], *x.shape[2:]), device=x.device) <= gamma).to(dtype=x.dtype)  # type: ignore[attr-defined]
+    mask = (torch.rand((x.shape[0], *x.shape[2:]), device=x.device) <= gamma).to(dtype=x.dtype)
 
     # Expand zero positions to block size
     mask = 1 - F.max_pool2d(mask, kernel_size=(block_size, block_size), stride=(1, 1), padding=block_size // 2)
@@ -525,7 +525,7 @@ def dice_loss(
         torch.Tensor: loss reduced with `reduction` method
     """
     inter = gamma * (x * target).flatten(2).sum((0, 2))
-    cardinality = (x + gamma * target).flatten(2).sum((0, 2))  # type: ignore[attr-defined]
+    cardinality = (x + gamma * target).flatten(2).sum((0, 2))
 
     dice_coeff = (inter + eps) / (cardinality + eps)
 
@@ -536,7 +536,7 @@ def dice_loss(
         # Tensor type
         if weight.type() != x.data.type():
             weight = weight.type_as(x.data)
-        loss = 1 - (1 + 1 / gamma) * (weight * dice_coeff).sum() / weight.sum()  # type: ignore[union-attr]
+        loss = 1 - (1 + 1 / gamma) * (weight * dice_coeff).sum() / weight.sum()
 
     return loss
 
@@ -589,9 +589,9 @@ def poly_loss(
         if weight.type() != x.data.type():
             weight = weight.type_as(x.data)
         if target.ndim == x.ndim - 1:
-            loss = weight.gather(0, target.data.view(-1)) * loss  # type: ignore[union-attr]
+            loss = weight.gather(0, target.data.view(-1)) * loss
         else:
-            loss = weight.reshape(1, -1) * loss  # type: ignore[union-attr]
+            loss = weight.reshape(1, -1) * loss
 
     # Ignore index (set loss contribution to 0)
     if target.ndim == x.ndim - 1:
diff --git a/holocron/trainer/classification.py b/holocron/trainer/classification.py
index 5fd76fceb..677339d16 100644
--- a/holocron/trainer/classification.py
+++ b/holocron/trainer/classification.py
@@ -183,7 +183,7 @@ def _get_loss(
 
         # AMP
         if self.amp:
-            with torch.cuda.amp.autocast():  # type: ignore[attr-defined]
+            with torch.cuda.amp.autocast():
                 # Forward
                 out = self.model(x)
                 # Loss computation
diff --git a/holocron/trainer/core.py b/holocron/trainer/core.py
index a2c20cabc..0a4f7c1ae 100644
--- a/holocron/trainer/core.py
+++ b/holocron/trainer/core.py
@@ -191,7 +191,7 @@ def _backprop_step(self, loss: Tensor) -> None:
                 # Safeguard for Gradient explosion
                 if isinstance(self.grad_clip, float):
                     self.scaler.unscale_(self.optimizer)
-                    nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip)  # type: ignore[attr-defined]
+                    nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip)
                 self.scaler.step(self.optimizer)
                 self.scaler.update()
                 self.optimizer.zero_grad()
@@ -202,7 +202,7 @@ def _backprop_step(self, loss: Tensor) -> None:
             if self._grad_count == self.gradient_acc:
                 # Safeguard for Gradient explosion
                 if isinstance(self.grad_clip, float):
-                    nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip)  # type: ignore[attr-defined]
+                    nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip)
                 self.optimizer.step()
                 self.optimizer.zero_grad()
                 self._grad_count = 0
@@ -210,7 +210,7 @@ def _backprop_step(self, loss: Tensor) -> None:
     def _get_loss(self, x: Tensor, target: Tensor, return_logits: bool = False) -> Union[Tensor, Tuple[Tensor, Tensor]]:
         # AMP
         if self.amp:
-            with torch.cuda.amp.autocast():  # type: ignore[attr-defined]
+            with torch.cuda.amp.autocast():
                 # Forward
                 out = self.model(x)
                 # Loss computation
@@ -294,7 +294,7 @@ def fit_n_epochs(
         self._reset_scheduler(lr, num_epochs, sched_type, **kwargs)
 
         if self.amp:
-            self.scaler = torch.cuda.amp.GradScaler()  # type: ignore[attr-defined]
+            self.scaler = torch.cuda.amp.GradScaler()
 
         mb = master_bar(range(num_epochs))
         for _ in mb:
@@ -347,7 +347,7 @@ def find_lr(
         self.loss_recorder = []
 
         if self.amp:
-            self.scaler = torch.cuda.amp.GradScaler()  # type: ignore[attr-defined]
+            self.scaler = torch.cuda.amp.GradScaler()
 
         for batch_idx, (x, target) in enumerate(self.train_loader):
             x, target = self.to_cuda(x, target)
@@ -433,7 +433,7 @@ def check_setup(
         _losses = []
 
         if self.amp:
-            self.scaler = torch.cuda.amp.GradScaler()  # type: ignore[attr-defined]
+            self.scaler = torch.cuda.amp.GradScaler()
 
         for _ in range(num_it):
             # Forward
diff --git a/holocron/trainer/detection.py b/holocron/trainer/detection.py
index 0f9d91b34..f170d3397 100644
--- a/holocron/trainer/detection.py
+++ b/holocron/trainer/detection.py
@@ -22,14 +22,14 @@ def assign_iou(gt_boxes: Tensor, pred_boxes: Tensor, iou_threshold: float = 0.5)
     assign_unique = torch.unique(iou.indices[gt_kept])
     # Filter
     if iou.indices[gt_kept].shape[0] == assign_unique.shape[0]:
-        return torch.arange(gt_boxes.shape[0])[gt_kept], iou.indices[gt_kept]
+        return torch.arange(gt_boxes.shape[0])[gt_kept], iou.indices[gt_kept]  # type: ignore[return-value]
 
     gt_indices, pred_indices = [], []
     for pred_idx in assign_unique:
         selection = iou.values[gt_kept][iou.indices[gt_kept] == pred_idx].argmax()
         gt_indices.append(torch.arange(gt_boxes.shape[0])[gt_kept][selection].item())
         pred_indices.append(iou.indices[gt_kept][selection].item())
-    return gt_indices, pred_indices
+    return gt_indices, pred_indices  # type: ignore[return-value]
 
 
 class DetectionTrainer(Trainer):
@@ -63,7 +63,7 @@ def _to_cuda(  # type: ignore[override]
     def _get_loss(self, x: List[Tensor], target: List[Dict[str, Tensor]]) -> Tensor:  # type: ignore[override]
         # AMP
         if self.amp:
-            with torch.cuda.amp.autocast():  # type: ignore[attr-defined]
+            with torch.cuda.amp.autocast():
                 # Forward & loss computation
                 loss_dict = self.model(x, target)
                 return sum(loss_dict.values())
@@ -97,7 +97,7 @@ def evaluate(self, iou_threshold: float = 0.5) -> Dict[str, Optional[float]]:
             x, target = self.to_cuda(x, target)
 
             if self.amp:
-                with torch.cuda.amp.autocast():  # type: ignore[attr-defined]
+                with torch.cuda.amp.autocast():
                     detections = self.model(x)
             else:
                 detections = self.model(x)
diff --git a/pyproject.toml b/pyproject.toml
index 095b44ec8..1023c6d5a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,7 +57,7 @@ training = [
 ]
 quality = [
     "ruff==0.1.14",
-    "mypy==1.8.0",
+    "mypy==1.9.0",
     "types-tqdm",
     "pre-commit>=3.0.0,<4.0.0",
 ]
@@ -84,7 +84,7 @@ dev = [
     "onnx>=1.13.0,<2.0.0",
     # style
     "ruff==0.1.14",
-    "mypy==1.8.0",
+    "mypy==1.9.0",
     "types-tqdm",
     "pre-commit>=3.0.0,<4.0.0",
     # docs