Skip to content

Commit cac1eb2

Browse files
mark-blochSaeed Mahameed
authored andcommitted
net/mlx5: Lag, properly lock eswitch if needed
Currently when doing hardware lag we check the eswitch mode but as this isn't done under a lock the check isn't valid. As the code needs to sync between two different devices an extra care is needed. - When going to change eswitch mode, if hardware lag is active destroy it. - While changing eswitch modes block any hardware bond creation. - Delay handling bonding events until there are no mode changes in progress. - When attaching a new mdev to lag, block until there is no mode change in progress. In order for the mode change to finish the interface lock will have to be taken. Release the lock and sleep for 100ms to allow forward progress. As this is a very rare condition (can happen if the user unbinds and binds a PCI function while also changing eswitch mode of the other PCI function) it has no real world impact. As taking multiple eswitch mode locks is now required lockdep will complain about a possible deadlock. Register a key per eswitch to make lockdep happy. Signed-off-by: Mark Bloch <mbloch@nvidia.com> Reviewed-by: Mark Zhang <markzhang@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
1 parent 898b078 commit cac1eb2

File tree

7 files changed

+107
-18
lines changed

7 files changed

+107
-18
lines changed

drivers/net/ethernet/mellanox/mlx5/core/eswitch.c

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,8 +1458,6 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
14581458

14591459
esw->mode = mode;
14601460

1461-
mlx5_lag_update(esw->dev);
1462-
14631461
if (mode == MLX5_ESWITCH_LEGACY) {
14641462
err = esw_legacy_enable(esw);
14651463
} else {
@@ -1506,6 +1504,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
15061504
if (!mlx5_esw_allowed(esw))
15071505
return 0;
15081506

1507+
mlx5_lag_disable_change(esw->dev);
15091508
down_write(&esw->mode_lock);
15101509
if (esw->mode == MLX5_ESWITCH_NONE) {
15111510
ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
@@ -1519,6 +1518,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
15191518
esw->esw_funcs.num_vfs = num_vfs;
15201519
}
15211520
up_write(&esw->mode_lock);
1521+
mlx5_lag_enable_change(esw->dev);
15221522
return ret;
15231523
}
15241524

@@ -1550,8 +1550,6 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
15501550
old_mode = esw->mode;
15511551
esw->mode = MLX5_ESWITCH_NONE;
15521552

1553-
mlx5_lag_update(esw->dev);
1554-
15551553
if (old_mode == MLX5_ESWITCH_OFFLOADS)
15561554
mlx5_rescan_drivers(esw->dev);
15571555

@@ -1567,10 +1565,12 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf)
15671565
if (!mlx5_esw_allowed(esw))
15681566
return;
15691567

1568+
mlx5_lag_disable_change(esw->dev);
15701569
down_write(&esw->mode_lock);
15711570
mlx5_eswitch_disable_locked(esw, clear_vf);
15721571
esw->esw_funcs.num_vfs = 0;
15731572
up_write(&esw->mode_lock);
1573+
mlx5_lag_enable_change(esw->dev);
15741574
}
15751575

15761576
static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out)
@@ -1759,7 +1759,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
17591759
ida_init(&esw->offloads.vport_metadata_ida);
17601760
xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC);
17611761
mutex_init(&esw->state_lock);
1762+
lockdep_register_key(&esw->mode_lock_key);
17621763
init_rwsem(&esw->mode_lock);
1764+
lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key);
17631765

17641766
esw->enabled_vports = 0;
17651767
esw->mode = MLX5_ESWITCH_NONE;
@@ -1793,6 +1795,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
17931795

17941796
esw->dev->priv.eswitch = NULL;
17951797
destroy_workqueue(esw->work_queue);
1798+
lockdep_unregister_key(&esw->mode_lock_key);
17961799
mutex_destroy(&esw->state_lock);
17971800
WARN_ON(!xa_empty(&esw->offloads.vhca_map));
17981801
xa_destroy(&esw->offloads.vhca_map);
@@ -2366,9 +2369,22 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw)
23662369
*/
23672370
void mlx5_esw_unlock(struct mlx5_eswitch *esw)
23682371
{
2372+
if (!mlx5_esw_allowed(esw))
2373+
return;
23692374
up_write(&esw->mode_lock);
23702375
}
23712376

2377+
/**
2378+
* mlx5_esw_lock() - Take write lock on esw mode lock
2379+
* @esw: eswitch device.
2380+
*/
2381+
void mlx5_esw_lock(struct mlx5_eswitch *esw)
2382+
{
2383+
if (!mlx5_esw_allowed(esw))
2384+
return;
2385+
down_write(&esw->mode_lock);
2386+
}
2387+
23722388
/**
23732389
* mlx5_eswitch_get_total_vports - Get total vports of the eswitch
23742390
*

drivers/net/ethernet/mellanox/mlx5/core/eswitch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ struct mlx5_eswitch {
323323
u32 large_group_num;
324324
} params;
325325
struct blocking_notifier_head n_head;
326+
struct lock_class_key mode_lock_key;
326327
};
327328

328329
void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -707,6 +708,7 @@ void mlx5_esw_get(struct mlx5_core_dev *dev);
707708
void mlx5_esw_put(struct mlx5_core_dev *dev);
708709
int mlx5_esw_try_lock(struct mlx5_eswitch *esw);
709710
void mlx5_esw_unlock(struct mlx5_eswitch *esw);
711+
void mlx5_esw_lock(struct mlx5_eswitch *esw);
710712

711713
void esw_vport_change_handle_locked(struct mlx5_vport *vport);
712714

@@ -727,6 +729,9 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
727729
return ERR_PTR(-EOPNOTSUPP);
728730
}
729731

732+
static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; }
733+
static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; }
734+
730735
static inline struct mlx5_flow_handle *
731736
esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
732737
{

drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3051,10 +3051,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
30513051
if (esw_mode_from_devlink(mode, &mlx5_mode))
30523052
return -EINVAL;
30533053

3054+
mlx5_lag_disable_change(esw->dev);
30543055
err = mlx5_esw_try_lock(esw);
30553056
if (err < 0) {
30563057
NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
3057-
return err;
3058+
goto enable_lag;
30583059
}
30593060
cur_mlx5_mode = err;
30603061
err = 0;
@@ -3071,6 +3072,8 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
30713072

30723073
unlock:
30733074
mlx5_esw_unlock(esw);
3075+
enable_lag:
3076+
mlx5_lag_enable_change(esw->dev);
30743077
return err;
30753078
}
30763079

drivers/net/ethernet/mellanox/mlx5/core/lag.c

Lines changed: 72 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -418,21 +418,48 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
418418
queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
419419
}
420420

421+
static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
422+
struct mlx5_core_dev *dev1)
423+
{
424+
if (dev0)
425+
mlx5_esw_lock(dev0->priv.eswitch);
426+
if (dev1)
427+
mlx5_esw_lock(dev1->priv.eswitch);
428+
}
429+
430+
static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
431+
struct mlx5_core_dev *dev1)
432+
{
433+
if (dev1)
434+
mlx5_esw_unlock(dev1->priv.eswitch);
435+
if (dev0)
436+
mlx5_esw_unlock(dev0->priv.eswitch);
437+
}
438+
421439
static void mlx5_do_bond_work(struct work_struct *work)
422440
{
423441
struct delayed_work *delayed_work = to_delayed_work(work);
424442
struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
425443
bond_work);
444+
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
445+
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
426446
int status;
427447

428448
status = mlx5_dev_list_trylock();
429449
if (!status) {
430-
/* 1 sec delay. */
431450
mlx5_queue_bond_work(ldev, HZ);
432451
return;
433452
}
434453

454+
if (ldev->mode_changes_in_progress) {
455+
mlx5_dev_list_unlock();
456+
mlx5_queue_bond_work(ldev, HZ);
457+
return;
458+
}
459+
460+
mlx5_lag_lock_eswitches(dev0, dev1);
435461
mlx5_do_bond(ldev);
462+
mlx5_lag_unlock_eswitches(dev0, dev1);
436463
mlx5_dev_list_unlock();
437464
}
438465

@@ -630,15 +657,15 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
630657
}
631658

632659
/* Must be called with intf_mutex held */
633-
static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
660+
static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
634661
{
635662
struct mlx5_lag *ldev = NULL;
636663
struct mlx5_core_dev *tmp_dev;
637664

638665
if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
639666
!MLX5_CAP_GEN(dev, lag_master) ||
640667
MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
641-
return;
668+
return 0;
642669

643670
tmp_dev = mlx5_get_next_phys_dev(dev);
644671
if (tmp_dev)
@@ -648,15 +675,17 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
648675
ldev = mlx5_lag_dev_alloc(dev);
649676
if (!ldev) {
650677
mlx5_core_err(dev, "Failed to alloc lag dev\n");
651-
return;
678+
return 0;
652679
}
653680
} else {
681+
if (ldev->mode_changes_in_progress)
682+
return -EAGAIN;
654683
mlx5_ldev_get(ldev);
655684
}
656685

657686
mlx5_ldev_add_mdev(ldev, dev);
658687

659-
return;
688+
return 0;
660689
}
661690

662691
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
@@ -667,16 +696,30 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
667696
if (!ldev)
668697
return;
669698

699+
recheck:
670700
mlx5_dev_list_lock();
701+
if (ldev->mode_changes_in_progress) {
702+
mlx5_dev_list_unlock();
703+
msleep(100);
704+
goto recheck;
705+
}
671706
mlx5_ldev_remove_mdev(ldev, dev);
672707
mlx5_dev_list_unlock();
673708
mlx5_ldev_put(ldev);
674709
}
675710

676711
void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
677712
{
713+
int err;
714+
715+
recheck:
678716
mlx5_dev_list_lock();
679-
__mlx5_lag_dev_add_mdev(dev);
717+
err = __mlx5_lag_dev_add_mdev(dev);
718+
if (err) {
719+
mlx5_dev_list_unlock();
720+
msleep(100);
721+
goto recheck;
722+
}
680723
mlx5_dev_list_unlock();
681724
}
682725

@@ -716,6 +759,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
716759

717760
if (i >= MLX5_MAX_PORTS)
718761
ldev->flags |= MLX5_LAG_FLAG_READY;
762+
mlx5_queue_bond_work(ldev, 0);
719763
}
720764

721765
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
@@ -789,19 +833,36 @@ bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
789833
}
790834
EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
791835

792-
void mlx5_lag_update(struct mlx5_core_dev *dev)
836+
void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
793837
{
838+
struct mlx5_core_dev *dev0;
839+
struct mlx5_core_dev *dev1;
794840
struct mlx5_lag *ldev;
795841

796842
mlx5_dev_list_lock();
843+
797844
ldev = mlx5_lag_dev(dev);
798-
if (!ldev)
799-
goto unlock;
845+
dev0 = ldev->pf[MLX5_LAG_P1].dev;
846+
dev1 = ldev->pf[MLX5_LAG_P2].dev;
800847

801-
mlx5_do_bond(ldev);
848+
ldev->mode_changes_in_progress++;
849+
if (__mlx5_lag_is_active(ldev)) {
850+
mlx5_lag_lock_eswitches(dev0, dev1);
851+
mlx5_disable_lag(ldev);
852+
mlx5_lag_unlock_eswitches(dev0, dev1);
853+
}
854+
mlx5_dev_list_unlock();
855+
}
802856

803-
unlock:
857+
void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
858+
{
859+
struct mlx5_lag *ldev;
860+
861+
mlx5_dev_list_lock();
862+
ldev = mlx5_lag_dev(dev);
863+
ldev->mode_changes_in_progress--;
804864
mlx5_dev_list_unlock();
865+
mlx5_queue_bond_work(ldev, 0);
805866
}
806867

807868
struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)

drivers/net/ethernet/mellanox/mlx5/core/lag.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ struct lag_tracker {
3939
*/
4040
struct mlx5_lag {
4141
u8 flags;
42+
int mode_changes_in_progress;
4243
bool shared_fdb;
4344
u8 v2p_map[MLX5_MAX_PORTS];
4445
struct kref ref;

drivers/net/ethernet/mellanox/mlx5/core/main.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,18 +1179,19 @@ static int mlx5_load(struct mlx5_core_dev *dev)
11791179
goto err_ec;
11801180
}
11811181

1182+
mlx5_lag_add_mdev(dev);
11821183
err = mlx5_sriov_attach(dev);
11831184
if (err) {
11841185
mlx5_core_err(dev, "sriov init failed %d\n", err);
11851186
goto err_sriov;
11861187
}
11871188

11881189
mlx5_sf_dev_table_create(dev);
1189-
mlx5_lag_add_mdev(dev);
11901190

11911191
return 0;
11921192

11931193
err_sriov:
1194+
mlx5_lag_remove_mdev(dev);
11941195
mlx5_ec_cleanup(dev);
11951196
err_ec:
11961197
mlx5_sf_hw_table_destroy(dev);
@@ -1222,9 +1223,9 @@ static int mlx5_load(struct mlx5_core_dev *dev)
12221223

12231224
static void mlx5_unload(struct mlx5_core_dev *dev)
12241225
{
1225-
mlx5_lag_remove_mdev(dev);
12261226
mlx5_sf_dev_table_destroy(dev);
12271227
mlx5_sriov_detach(dev);
1228+
mlx5_lag_remove_mdev(dev);
12281229
mlx5_ec_cleanup(dev);
12291230
mlx5_sf_hw_table_destroy(dev);
12301231
mlx5_vhca_event_stop(dev);

drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
168168
void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev);
169169
void mlx5_lag_add_mdev(struct mlx5_core_dev *dev);
170170
void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev);
171+
void mlx5_lag_disable_change(struct mlx5_core_dev *dev);
172+
void mlx5_lag_enable_change(struct mlx5_core_dev *dev);
171173

172174
int mlx5_events_init(struct mlx5_core_dev *dev);
173175
void mlx5_events_cleanup(struct mlx5_core_dev *dev);

0 commit comments

Comments
 (0)