Skip to content
/ linux Public

Commit 2051c70

Browse files
Jianbo Liugregkh
authored andcommitted
net/mlx5e: Fix race condition during IPSec ESN update
[ Upstream commit beb6e2e ] In IPSec full offload mode, the device reports an ESN (Extended Sequence Number) wrap event to the driver. The driver validates this event by querying the IPSec ASO and checking that the esn_event_arm field is 0x0, which indicates an event has occurred. After handling the event, the driver must re-arm the context by setting esn_event_arm back to 0x1. A race condition exists in this handling path. After validating the event, the driver calls mlx5_accel_esp_modify_xfrm() to update the kernel's xfrm state. This function temporarily releases and re-acquires the xfrm state lock. So, need to acknowledge the event first by setting esn_event_arm to 0x1. This prevents the driver from reprocessing the same ESN update if the hardware sends events for other reason. Since the next ESN update only occurs after nearly 2^31 packets are received, there's no risk of missing an update, as it will happen long after this handling has finished. Processing the event twice causes the ESN high-order bits (esn_msb) to be incremented incorrectly. The driver then programs the hardware with this invalid ESN state, which leads to anti-replay failures and a complete halt of IPSec traffic. Fix this by re-arming the ESN event immediately after it is validated, before calling mlx5_accel_esp_modify_xfrm(). This ensures that any spurious, duplicate events are correctly ignored, closing the race window. Fixes: fef0667 ("net/mlx5e: Fix ESN update kernel panic") Signed-off-by: Jianbo Liu <jianbol@nvidia.com> Reviewed-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Link: https://patch.msgid.link/20260316094603.6999-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent c3db55d commit 2051c70

File tree

1 file changed

+14
-19
lines changed

1 file changed

+14
-19
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -311,10 +311,11 @@ static void mlx5e_ipsec_aso_update(struct mlx5e_ipsec_sa_entry *sa_entry,
311311
mlx5e_ipsec_aso_query(sa_entry, data);
312312
}
313313

314-
static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry,
315-
u32 mode_param)
314+
static void
315+
mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry,
316+
u32 mode_param,
317+
struct mlx5_accel_esp_xfrm_attrs *attrs)
316318
{
317-
struct mlx5_accel_esp_xfrm_attrs attrs = {};
318319
struct mlx5_wqe_aso_ctrl_seg data = {};
319320

320321
if (mode_param < MLX5E_IPSEC_ESN_SCOPE_MID) {
@@ -324,18 +325,7 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry,
324325
sa_entry->esn_state.overlap = 1;
325326
}
326327

327-
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs);
328-
329-
/* It is safe to execute the modify below unlocked since the only flows
330-
* that could affect this HW object, are create, destroy and this work.
331-
*
332-
* Creation flow can't co-exist with this modify work, the destruction
333-
* flow would cancel this work, and this work is a single entity that
334-
* can't conflict with it self.
335-
*/
336-
spin_unlock_bh(&sa_entry->x->lock);
337-
mlx5_accel_esp_modify_xfrm(sa_entry, &attrs);
338-
spin_lock_bh(&sa_entry->x->lock);
328+
mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, attrs);
339329

340330
data.data_offset_condition_operand =
341331
MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET;
@@ -452,7 +442,9 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work)
452442
struct mlx5e_ipsec_work *work =
453443
container_of(_work, struct mlx5e_ipsec_work, work);
454444
struct mlx5e_ipsec_sa_entry *sa_entry = work->data;
445+
struct mlx5_accel_esp_xfrm_attrs tmp = {};
455446
struct mlx5_accel_esp_xfrm_attrs *attrs;
447+
bool need_modify = false;
456448
int ret;
457449

458450
attrs = &sa_entry->attrs;
@@ -462,19 +454,22 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work)
462454
if (ret)
463455
goto unlock;
464456

457+
if (attrs->lft.soft_packet_limit != XFRM_INF)
458+
mlx5e_ipsec_handle_limits(sa_entry);
459+
465460
if (attrs->replay_esn.trigger &&
466461
!MLX5_GET(ipsec_aso, sa_entry->ctx, esn_event_arm)) {
467462
u32 mode_param = MLX5_GET(ipsec_aso, sa_entry->ctx,
468463
mode_parameter);
469464

470-
mlx5e_ipsec_update_esn_state(sa_entry, mode_param);
465+
mlx5e_ipsec_update_esn_state(sa_entry, mode_param, &tmp);
466+
need_modify = true;
471467
}
472468

473-
if (attrs->lft.soft_packet_limit != XFRM_INF)
474-
mlx5e_ipsec_handle_limits(sa_entry);
475-
476469
unlock:
477470
spin_unlock_bh(&sa_entry->x->lock);
471+
if (need_modify)
472+
mlx5_accel_esp_modify_xfrm(sa_entry, &tmp);
478473
kfree(work);
479474
}
480475

0 commit comments

Comments
 (0)