Skip to content

Commit 598fe77

Browse files
mark-blochSaeed Mahameed
authored andcommitted
net/mlx5: Lag, Create shared FDB when in switchdev mode
If both eswitches are in switchdev mode and the uplink representors are enslaved to the same bond device create a shared FDB configuration. When moving to shared FDB mode not only the hardware needs be configured but the RDMA driver needs to reconfigure itself. When such change is done, unload the RDMA devices, configure the hardware and load the RDMA representors. When destroying the lag (can happen if a PCI function is unbinded, driver is unloaded or by just removing a netdev from the bond) make sure to restore the system to the previous state only if possible. For example, if a PCI function is unbinded there is no need to load the representors as the device is going away. Signed-off-by: Mark Bloch <mbloch@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
1 parent db20299 commit 598fe77

File tree

3 files changed

+105
-18
lines changed

3 files changed

+105
-18
lines changed

drivers/net/ethernet/mellanox/mlx5/core/lag.c

Lines changed: 102 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232

3333
#include <linux/netdevice.h>
3434
#include <linux/mlx5/driver.h>
35+
#include <linux/mlx5/eswitch.h>
3536
#include <linux/mlx5/vport.h>
37+
#include "lib/devcom.h"
3638
#include "mlx5_core.h"
3739
#include "eswitch.h"
3840
#include "lag.h"
@@ -45,7 +47,7 @@
4547
static DEFINE_SPINLOCK(lag_lock);
4648

4749
static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
48-
u8 remap_port2)
50+
u8 remap_port2, bool shared_fdb)
4951
{
5052
u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
5153
void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
@@ -54,6 +56,7 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
5456

5557
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
5658
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
59+
MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
5760

5861
return mlx5_cmd_exec_in(dev, create_lag, in);
5962
}
@@ -224,35 +227,59 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
224227
}
225228

226229
static int mlx5_create_lag(struct mlx5_lag *ldev,
227-
struct lag_tracker *tracker)
230+
struct lag_tracker *tracker,
231+
bool shared_fdb)
228232
{
229233
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
234+
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
235+
u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
230236
int err;
231237

232238
mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
233239
&ldev->v2p_map[MLX5_LAG_P2]);
234240

235-
mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
236-
ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
241+
mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
242+
ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
243+
shared_fdb);
237244

238245
err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
239-
ldev->v2p_map[MLX5_LAG_P2]);
240-
if (err)
246+
ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
247+
if (err) {
241248
mlx5_core_err(dev0,
242249
"Failed to create LAG (%d)\n",
243250
err);
251+
return err;
252+
}
253+
254+
if (shared_fdb) {
255+
err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
256+
dev1->priv.eswitch);
257+
if (err)
258+
mlx5_core_err(dev0, "Can't enable single FDB mode\n");
259+
else
260+
mlx5_core_info(dev0, "Operation mode is single FDB\n");
261+
}
262+
263+
if (err) {
264+
MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
265+
if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
266+
mlx5_core_err(dev0,
267+
"Failed to deactivate RoCE LAG; driver restart required\n");
268+
}
269+
244270
return err;
245271
}
246272

247273
int mlx5_activate_lag(struct mlx5_lag *ldev,
248274
struct lag_tracker *tracker,
249-
u8 flags)
275+
u8 flags,
276+
bool shared_fdb)
250277
{
251278
bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
252279
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
253280
int err;
254281

255-
err = mlx5_create_lag(ldev, tracker);
282+
err = mlx5_create_lag(ldev, tracker, shared_fdb);
256283
if (err) {
257284
if (roce_lag) {
258285
mlx5_core_err(dev0,
@@ -266,6 +293,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
266293
}
267294

268295
ldev->flags |= flags;
296+
ldev->shared_fdb = shared_fdb;
269297
return 0;
270298
}
271299

@@ -278,6 +306,12 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
278306

279307
ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
280308

309+
if (ldev->shared_fdb) {
310+
mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
311+
ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
312+
ldev->shared_fdb = false;
313+
}
314+
281315
MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
282316
err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
283317
if (err) {
@@ -333,6 +367,10 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
333367
if (!ldev->pf[i].dev)
334368
continue;
335369

370+
if (ldev->pf[i].dev->priv.flags &
371+
MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
372+
continue;
373+
336374
ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
337375
mlx5_rescan_drivers_locked(ldev->pf[i].dev);
338376
}
@@ -342,12 +380,15 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
342380
{
343381
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
344382
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
383+
bool shared_fdb = ldev->shared_fdb;
345384
bool roce_lag;
346385
int err;
347386

348387
roce_lag = __mlx5_lag_is_roce(ldev);
349388

350-
if (roce_lag) {
389+
if (shared_fdb) {
390+
mlx5_lag_remove_devices(ldev);
391+
} else if (roce_lag) {
351392
if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
352393
dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
353394
mlx5_rescan_drivers_locked(dev0);
@@ -359,8 +400,34 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev)
359400
if (err)
360401
return;
361402

362-
if (roce_lag)
403+
if (shared_fdb || roce_lag)
363404
mlx5_lag_add_devices(ldev);
405+
406+
if (shared_fdb) {
407+
if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
408+
mlx5_eswitch_reload_reps(dev0->priv.eswitch);
409+
if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
410+
mlx5_eswitch_reload_reps(dev1->priv.eswitch);
411+
}
412+
}
413+
414+
static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
415+
{
416+
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
417+
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
418+
419+
if (is_mdev_switchdev_mode(dev0) &&
420+
is_mdev_switchdev_mode(dev1) &&
421+
mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
422+
mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
423+
mlx5_devcom_is_paired(dev0->priv.devcom,
424+
MLX5_DEVCOM_ESW_OFFLOADS) &&
425+
MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
426+
MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
427+
MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
428+
return true;
429+
430+
return false;
364431
}
365432

366433
static void mlx5_do_bond(struct mlx5_lag *ldev)
@@ -380,6 +447,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
380447
}
381448

382449
if (do_bond && !__mlx5_lag_is_active(ldev)) {
450+
bool shared_fdb = mlx5_shared_fdb_supported(ldev);
451+
383452
roce_lag = !mlx5_sriov_is_enabled(dev0) &&
384453
!mlx5_sriov_is_enabled(dev1);
385454

@@ -389,23 +458,40 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
389458
dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
390459
#endif
391460

392-
if (roce_lag)
461+
if (shared_fdb || roce_lag)
393462
mlx5_lag_remove_devices(ldev);
394463

395464
err = mlx5_activate_lag(ldev, &tracker,
396465
roce_lag ? MLX5_LAG_FLAG_ROCE :
397-
MLX5_LAG_FLAG_SRIOV);
466+
MLX5_LAG_FLAG_SRIOV,
467+
shared_fdb);
398468
if (err) {
399-
if (roce_lag)
469+
if (shared_fdb || roce_lag)
400470
mlx5_lag_add_devices(ldev);
401471

402472
return;
403-
}
404-
405-
if (roce_lag) {
473+
} else if (roce_lag) {
406474
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
407475
mlx5_rescan_drivers_locked(dev0);
408476
mlx5_nic_vport_enable_roce(dev1);
477+
} else if (shared_fdb) {
478+
dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
479+
mlx5_rescan_drivers_locked(dev0);
480+
481+
err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
482+
if (!err)
483+
err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
484+
485+
if (err) {
486+
dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
487+
mlx5_rescan_drivers_locked(dev0);
488+
mlx5_deactivate_lag(ldev);
489+
mlx5_lag_add_devices(ldev);
490+
mlx5_eswitch_reload_reps(dev0->priv.eswitch);
491+
mlx5_eswitch_reload_reps(dev1->priv.eswitch);
492+
mlx5_core_err(dev0, "Failed to enable lag\n");
493+
return;
494+
}
409495
}
410496
} else if (do_bond && __mlx5_lag_is_active(ldev)) {
411497
mlx5_modify_lag(ldev, &tracker);

drivers/net/ethernet/mellanox/mlx5/core/lag.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ void mlx5_modify_lag(struct mlx5_lag *ldev,
7373
struct lag_tracker *tracker);
7474
int mlx5_activate_lag(struct mlx5_lag *ldev,
7575
struct lag_tracker *tracker,
76-
u8 flags);
76+
u8 flags,
77+
bool shared_fdb);
7778
int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
7879
struct net_device *ndev);
7980

drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
161161
struct lag_tracker tracker;
162162

163163
tracker = ldev->tracker;
164-
mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
164+
mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
165165
}
166166

167167
mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);

0 commit comments

Comments
 (0)