Skip to content

Commit

Permalink
common/mlx5: use new port query API if available
Browse files Browse the repository at this point in the history
[ upstream commit d0cf77e ]

In order to get E-Switch vport identifiers the mlx5 PMD relies
on two approaches:
  [a] use port query API if it is provided by rdma-core library
  [b] otherwise, deduce vport ids from the related VF index
The latter is not reliable and may not work with newer kernel
drivers and in some configurations (LAG), causing E-Switch
malfunction. Hence, engaging the port query API is highly
desirable.

Depending on rdma-core version the port query API is:
  - very old OFED versions have no query API (approach [b])
  - rdma-core OFED < 5.5 provides mlx5dv_query_devx_port,
    HAVE_MLX5DV_DR_DEVX_PORT flag is defined (approach [a])
  - rdma-core OFED >= 5.5 has mlx5dv_query_port, flag
    HAVE_MLX5DV_DR_DEVX_PORT_V35 is defined (approach [a])
  - future OFED versions might remove mlx5dv_query_devx_port
    and HAVE_MLX5DV_DR_DEVX_PORT will not be defined
  - Upstream rdma-core < v35 has no port query API (approach [b])
  - Upstream rdma-core >= v35 has  mlx5dv_query_port, flag
    HAVE_MLX5DV_DR_DEVX_PORT_V35 is defined (approach [a])

In order to support the new mlx5dv_query_port routine, the
conditional compilation flag HAVE_MLX5DV_DR_DEVX_PORT_V35
is introduced by this patch. The flag HAVE_MLX5DV_DR_DEVX_PORT
is kept for compatibility with previous rdma-core versions.

Despite this patch is not a bugfix (it follows the introduced API
variation in underlying library), it resolves the compatibility
issue and is highly desired to be ported to DPDK LTS.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
  • Loading branch information
viacheslavo authored and bluca committed Jul 12, 2021
1 parent 66d5363 commit 245c98f
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 44 deletions.
2 changes: 2 additions & 0 deletions drivers/common/mlx5/linux/meson.build
Expand Up @@ -94,6 +94,8 @@ has_sym_args = [
'IBV_WQ_FLAG_RX_END_PADDING' ],
[ 'HAVE_MLX5DV_DR_DEVX_PORT', 'infiniband/mlx5dv.h',
'mlx5dv_query_devx_port' ],
[ 'HAVE_MLX5DV_DR_DEVX_PORT_V35', 'infiniband/mlx5dv.h',
'mlx5dv_query_port' ],
[ 'HAVE_IBV_DEVX_OBJ', 'infiniband/mlx5dv.h',
'mlx5dv_devx_obj_create' ],
[ 'HAVE_IBV_FLOW_DEVX_COUNTERS', 'infiniband/mlx5dv.h',
Expand Down
55 changes: 46 additions & 9 deletions drivers/common/mlx5/linux/mlx5_glue.c
Expand Up @@ -1087,17 +1087,54 @@ mlx5_glue_devx_wq_query(struct ibv_wq *wq, const void *in, size_t inlen,
static int
mlx5_glue_devx_port_query(struct ibv_context *ctx,
uint32_t port_num,
struct mlx5dv_devx_port *mlx5_devx_port)
{
struct mlx5_port_info *info)
{
int err = 0;

info->query_flags = 0;
#ifdef HAVE_MLX5DV_DR_DEVX_PORT_V35
/* The DevX port query API is implemented (rdma-core v35 and above). */
struct mlx5_ib_uapi_query_port devx_port;

memset(&devx_port, 0, sizeof(devx_port));
err = mlx5dv_query_port(ctx, port_num, &devx_port);
if (err)
return err;
if (devx_port.flags & MLX5DV_QUERY_PORT_VPORT_REG_C0) {
info->vport_meta_tag = devx_port.reg_c0.value;
info->vport_meta_mask = devx_port.reg_c0.mask;
info->query_flags |= MLX5_PORT_QUERY_REG_C0;
}
if (devx_port.flags & MLX5DV_QUERY_PORT_VPORT) {
info->vport_id = devx_port.vport;
info->query_flags |= MLX5_PORT_QUERY_VPORT;
}
#else
#ifdef HAVE_MLX5DV_DR_DEVX_PORT
return mlx5dv_query_devx_port(ctx, port_num, mlx5_devx_port);
/* The legacy DevX port query API is implemented (prior v35). */
struct mlx5dv_devx_port devx_port = {
.comp_mask = MLX5DV_DEVX_PORT_VPORT |
MLX5DV_DEVX_PORT_MATCH_REG_C_0
};

err = mlx5dv_query_devx_port(ctx, port_num, &devx_port);
if (err)
return err;
if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) {
info->vport_meta_tag = devx_port.reg_c_0.value;
info->vport_meta_mask = devx_port.reg_c_0.mask;
info->query_flags |= MLX5_PORT_QUERY_REG_C0;
}
if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) {
info->vport_id = devx_port.vport_num;
info->query_flags |= MLX5_PORT_QUERY_VPORT;
}
#else
(void)ctx;
(void)port_num;
(void)mlx5_devx_port;
errno = ENOTSUP;
return errno;
#endif
RTE_SET_USED(ctx);
RTE_SET_USED(port_num);
#endif /* HAVE_MLX5DV_DR_DEVX_PORT */
#endif /* HAVE_MLX5DV_DR_DEVX_PORT_V35 */
return err;
}

static int
Expand Down
16 changes: 15 additions & 1 deletion drivers/common/mlx5/linux/mlx5_glue.h
Expand Up @@ -84,6 +84,20 @@ struct mlx5dv_dr_action;
struct mlx5dv_devx_port;
#endif

#ifndef HAVE_MLX5DV_DR_DEVX_PORT_V35
struct mlx5dv_port;
#endif

#define MLX5_PORT_QUERY_VPORT (1u << 0)
#define MLX5_PORT_QUERY_REG_C0 (1u << 1)

struct mlx5_port_info {
uint16_t query_flags;
uint16_t vport_id; /* Associated VF vport index (if any). */
uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */
uint32_t vport_meta_mask; /* Used for vport index field match mask. */
};

#ifndef HAVE_MLX5_DR_CREATE_ACTION_FLOW_METER
struct mlx5dv_dr_flow_meter_attr;
#endif
Expand Down Expand Up @@ -311,7 +325,7 @@ struct mlx5_glue {
void *out, size_t outlen);
int (*devx_port_query)(struct ibv_context *ctx,
uint32_t port_num,
struct mlx5dv_devx_port *mlx5_devx_port);
struct mlx5_port_info *info);
int (*dr_dump_domain)(FILE *file, void *domain);
int (*devx_query_eqn)(struct ibv_context *context, uint32_t cpus,
uint32_t *eqn);
Expand Down
60 changes: 26 additions & 34 deletions drivers/net/mlx5/linux/mlx5_os.c
Expand Up @@ -695,9 +695,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
int own_domain_id = 0;
uint16_t port_id;
unsigned int i;
#ifdef HAVE_MLX5DV_DR_DEVX_PORT
struct mlx5dv_devx_port devx_port = { .comp_mask = 0 };
#endif
struct mlx5_port_info vport_info = { .query_flags = 0 };

/* Determine if this port representor is supposed to be spawned. */
if (switch_info->representor && dpdk_dev->devargs) {
Expand Down Expand Up @@ -940,29 +938,27 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
priv->vport_meta_tag = 0;
priv->vport_meta_mask = 0;
priv->pf_bond = spawn->pf_bond;
#ifdef HAVE_MLX5DV_DR_DEVX_PORT
/*
* The DevX port query API is implemented. E-Switch may use
* either vport or reg_c[0] metadata register to match on
* vport index. The engaged part of metadata register is
* defined by mask.
* If we have E-Switch we should determine the vport attributes.
* E-Switch may use either source vport field or reg_c[0] metadata
* register to match on vport index. The engaged part of metadata
* register is defined by mask.
*/
if (switch_info->representor || switch_info->master) {
devx_port.comp_mask = MLX5DV_DEVX_PORT_VPORT |
MLX5DV_DEVX_PORT_MATCH_REG_C_0;
err = mlx5_glue->devx_port_query(sh->ctx, spawn->phys_port,
&devx_port);
err = mlx5_glue->devx_port_query(sh->ctx,
spawn->phys_port,
&vport_info);
if (err) {
DRV_LOG(WARNING,
"can't query devx port %d on device %s",
spawn->phys_port,
mlx5_os_get_dev_device_name(spawn->phys_dev));
devx_port.comp_mask = 0;
vport_info.query_flags = 0;
}
}
if (devx_port.comp_mask & MLX5DV_DEVX_PORT_MATCH_REG_C_0) {
priv->vport_meta_tag = devx_port.reg_c_0.value;
priv->vport_meta_mask = devx_port.reg_c_0.mask;
if (vport_info.query_flags & MLX5_PORT_QUERY_REG_C0) {
priv->vport_meta_tag = vport_info.vport_meta_tag;
priv->vport_meta_mask = vport_info.vport_meta_mask;
if (!priv->vport_meta_mask) {
DRV_LOG(ERR, "vport zero mask for port %d"
" on bonding device %s",
Expand All @@ -982,8 +978,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
goto error;
}
}
if (devx_port.comp_mask & MLX5DV_DEVX_PORT_VPORT) {
priv->vport_id = devx_port.vport_num;
if (vport_info.query_flags & MLX5_PORT_QUERY_VPORT) {
priv->vport_id = vport_info.vport_id;
} else if (spawn->pf_bond >= 0 &&
(switch_info->representor || switch_info->master)) {
DRV_LOG(ERR, "can't deduce vport index for port %d"
Expand All @@ -993,25 +989,21 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
err = ENOTSUP;
goto error;
} else {
/* Suppose vport index in compatible way. */
/*
* Suppose vport index in compatible way. Kernel/rdma_core
* support single E-Switch per PF configurations only and
* vport_id field contains the vport index for associated VF,
* which is deduced from representor port name.
* For example, let's have the IB device port 10, it has
* attached network device eth0, which has port name attribute
* pf0vf2, we can deduce the VF number as 2, and set vport index
* as 3 (2+1). This assigning schema should be changed if the
* multiple E-Switch instances per PF configurations or/and PCI
* subfunctions are added.
*/
priv->vport_id = switch_info->representor ?
switch_info->port_name + 1 : -1;
}
#else
/*
* Kernel/rdma_core support single E-Switch per PF configurations
* only and vport_id field contains the vport index for
* associated VF, which is deduced from representor port name.
* For example, let's have the IB device port 10, it has
* attached network device eth0, which has port name attribute
* pf0vf2, we can deduce the VF number as 2, and set vport index
* as 3 (2+1). This assigning schema should be changed if the
* multiple E-Switch instances per PF configurations or/and PCI
* subfunctions are added.
*/
priv->vport_id = switch_info->representor ?
switch_info->port_name + 1 : -1;
#endif
/* representor_id field keeps the unmodified VF index. */
priv->representor_id = switch_info->representor ?
switch_info->port_name : -1;
Expand Down

0 comments on commit 245c98f

Please sign in to comment.