Skip to content

Commit

Permalink
net/mlx5: Initialize flow steering during driver probe
Browse files Browse the repository at this point in the history
Currently, software objects of flow steering are created and destroyed
during reload flow. In case a device is unloaded, the following error
is printed during grace period:

 mlx5_core 0000:00:0b.0: mlx5_fw_fatal_reporter_err_work:690:(pid 95):
    Driver is in error state. Unloading

As a solution to fix use-after-free bugs, where we try to access
these objects, when reading the value of flow_steering_mode devlink
param[1], let's split flow steering creation and destruction into two
routines:
    * init and cleanup: memory, cache, and pools allocation/free.
    * create and destroy: namespaces initialization and cleanup.

While at it, re-order the cleanup function to mirror the init function.

[1]
Kasan trace:

[  385.119849 ] BUG: KASAN: use-after-free in mlx5_devlink_fs_mode_get+0x3b/0xa0
[  385.119849 ] Read of size 4 at addr ffff888104b79308 by task bash/291
[  385.119849 ]
[  385.119849 ] CPU: 1 PID: 291 Comm: bash Not tainted 5.17.0-rc1+ ljalves#2
[  385.119849 ] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014
[  385.119849 ] Call Trace:
[  385.119849 ]  <TASK>
[  385.119849 ]  dump_stack_lvl+0x6e/0x91
[  385.119849 ]  print_address_description.constprop.0+0x1f/0x160
[  385.119849 ]  ? mlx5_devlink_fs_mode_get+0x3b/0xa0
[  385.119849 ]  ? mlx5_devlink_fs_mode_get+0x3b/0xa0
[  385.119849 ]  kasan_report.cold+0x83/0xdf
[  385.119849 ]  ? devlink_param_notify+0x20/0x190
[  385.119849 ]  ? mlx5_devlink_fs_mode_get+0x3b/0xa0
[  385.119849 ]  mlx5_devlink_fs_mode_get+0x3b/0xa0
[  385.119849 ]  devlink_nl_param_fill+0x18a/0xa50
[  385.119849 ]  ? _raw_spin_lock_irqsave+0x8d/0xe0
[  385.119849 ]  ? devlink_flash_update_timeout_notify+0xf0/0xf0
[  385.119849 ]  ? __wake_up_common+0x4b/0x1e0
[  385.119849 ]  ? preempt_count_sub+0x14/0xc0
[  385.119849 ]  ? _raw_spin_unlock_irqrestore+0x28/0x40
[  385.119849 ]  ? __wake_up_common_lock+0xe3/0x140
[  385.119849 ]  ? __wake_up_common+0x1e0/0x1e0
[  385.119849 ]  ? __sanitizer_cov_trace_const_cmp8+0x27/0x80
[  385.119849 ]  ? __rcu_read_unlock+0x48/0x70
[  385.119849 ]  ? kasan_unpoison+0x23/0x50
[  385.119849 ]  ? __kasan_slab_alloc+0x2c/0x80
[  385.119849 ]  ? memset+0x20/0x40
[  385.119849 ]  ? __sanitizer_cov_trace_const_cmp4+0x25/0x80
[  385.119849 ]  devlink_param_notify+0xce/0x190
[  385.119849 ]  devlink_unregister+0x92/0x2b0
[  385.119849 ]  remove_one+0x41/0x140
[  385.119849 ]  pci_device_remove+0x68/0x140
[  385.119849 ]  ? pcibios_free_irq+0x10/0x10
[  385.119849 ]  __device_release_driver+0x294/0x3f0
[  385.119849 ]  device_driver_detach+0x82/0x130
[  385.119849 ]  unbind_store+0x193/0x1b0
[  385.119849 ]  ? subsys_interface_unregister+0x270/0x270
[  385.119849 ]  drv_attr_store+0x4e/0x70
[  385.119849 ]  ? drv_attr_show+0x60/0x60
[  385.119849 ]  sysfs_kf_write+0xa7/0xc0
[  385.119849 ]  kernfs_fop_write_iter+0x23a/0x2f0
[  385.119849 ]  ? sysfs_kf_bin_read+0x160/0x160
[  385.119849 ]  new_sync_write+0x311/0x430
[  385.119849 ]  ? new_sync_read+0x480/0x480
[  385.119849 ]  ? _raw_spin_lock+0x87/0xe0
[  385.119849 ]  ? __sanitizer_cov_trace_cmp4+0x25/0x80
[  385.119849 ]  ? security_file_permission+0x94/0xa0
[  385.119849 ]  vfs_write+0x4c7/0x590
[  385.119849 ]  ksys_write+0xf6/0x1e0
[  385.119849 ]  ? __x64_sys_read+0x50/0x50
[  385.119849 ]  ? fpregs_assert_state_consistent+0x99/0xa0
[  385.119849 ]  do_syscall_64+0x3d/0x90
[  385.119849 ]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  385.119849 ] RIP: 0033:0x7fc36ef38504
[  385.119849 ] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f
80 00 00 00 00 48 8d 05 f9 61 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f
05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53
[  385.119849 ] RSP: 002b:00007ffde0ff3d08 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[  385.119849 ] RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007fc36ef38504
[  385.119849 ] RDX: 000000000000000c RSI: 00007fc370521040 RDI: 0000000000000001
[  385.119849 ] RBP: 00007fc370521040 R08: 00007fc36f00b8c0 R09: 00007fc36ee4b740
[  385.119849 ] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fc36f00a760
[  385.119849 ] R13: 000000000000000c R14: 00007fc36f005760 R15: 000000000000000c
[  385.119849 ]  </TASK>
[  385.119849 ]
[  385.119849 ] Allocated by task 65:
[  385.119849 ]  kasan_save_stack+0x1e/0x40
[  385.119849 ]  __kasan_kmalloc+0x81/0xa0
[  385.119849 ]  mlx5_init_fs+0x11b/0x1160
[  385.119849 ]  mlx5_load+0x13c/0x220
[  385.119849 ]  mlx5_load_one+0xda/0x160
[  385.119849 ]  mlx5_recover_device+0xb8/0x100
[  385.119849 ]  mlx5_health_try_recover+0x2f9/0x3a1
[  385.119849 ]  devlink_health_reporter_recover+0x75/0x100
[  385.119849 ]  devlink_health_report+0x26c/0x4b0
[  385.275909 ]  mlx5_fw_fatal_reporter_err_work+0x11e/0x1b0
[  385.275909 ]  process_one_work+0x520/0x970
[  385.275909 ]  worker_thread+0x378/0x950
[  385.275909 ]  kthread+0x1bb/0x200
[  385.275909 ]  ret_from_fork+0x1f/0x30
[  385.275909 ]
[  385.275909 ] Freed by task 65:
[  385.275909 ]  kasan_save_stack+0x1e/0x40
[  385.275909 ]  kasan_set_track+0x21/0x30
[  385.275909 ]  kasan_set_free_info+0x20/0x30
[  385.275909 ]  __kasan_slab_free+0xfc/0x140
[  385.275909 ]  kfree+0xa5/0x3b0
[  385.275909 ]  mlx5_unload+0x2e/0xb0
[  385.275909 ]  mlx5_unload_one+0x86/0xb0
[  385.275909 ]  mlx5_fw_fatal_reporter_err_work.cold+0xca/0xcf
[  385.275909 ]  process_one_work+0x520/0x970
[  385.275909 ]  worker_thread+0x378/0x950
[  385.275909 ]  kthread+0x1bb/0x200
[  385.275909 ]  ret_from_fork+0x1f/0x30
[  385.275909 ]
[  385.275909 ] The buggy address belongs to the object at ffff888104b79300
[  385.275909 ]  which belongs to the cache kmalloc-128 of size 128
[  385.275909 ] The buggy address is located 8 bytes inside of
[  385.275909 ]  128-byte region [ffff888104b79300, ffff888104b79380)
[  385.275909 ] The buggy address belongs to the page:
[  385.275909 ] page:00000000de44dd39 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x104b78
[  385.275909 ] head:00000000de44dd39 order:1 compound_mapcount:0
[  385.275909 ] flags: 0x8000000000010200(slab|head|zone=2)
[  385.275909 ] raw: 8000000000010200 0000000000000000 dead000000000122 ffff8881000428c0
[  385.275909 ] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
[  385.275909 ] page dumped because: kasan: bad access detected
[  385.275909 ]
[  385.275909 ] Memory state around the buggy address:
[  385.275909 ]  ffff888104b79200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc
[  385.275909 ]  ffff888104b79280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  385.275909 ] >ffff888104b79300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  385.275909 ]                       ^
[  385.275909 ]  ffff888104b79380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[  385.275909 ]  ffff888104b79400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  385.275909 ]]

Fixes: e890acd ("net/mlx5: Add devlink flow_steering_mode parameter")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
  • Loading branch information
shayshyi authored and Saeed Mahameed committed May 18, 2022
1 parent 2c5fc6c commit b338869
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 61 deletions.
131 changes: 75 additions & 56 deletions drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
Expand Up @@ -2663,28 +2663,6 @@ static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns)
clean_tree(&root_ns->ns.node);
}

void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;

cleanup_root_ns(steering->root_ns);
cleanup_root_ns(steering->fdb_root_ns);
steering->fdb_root_ns = NULL;
kfree(steering->fdb_sub_ns);
steering->fdb_sub_ns = NULL;
cleanup_root_ns(steering->port_sel_root_ns);
cleanup_root_ns(steering->sniffer_rx_root_ns);
cleanup_root_ns(steering->sniffer_tx_root_ns);
cleanup_root_ns(steering->rdma_rx_root_ns);
cleanup_root_ns(steering->rdma_tx_root_ns);
cleanup_root_ns(steering->egress_root_ns);
mlx5_cleanup_fc_stats(dev);
kmem_cache_destroy(steering->ftes_cache);
kmem_cache_destroy(steering->fgs_cache);
mlx5_ft_pool_destroy(dev);
kfree(steering);
}

static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
{
struct fs_prio *prio;
Expand Down Expand Up @@ -3086,42 +3064,27 @@ static int init_egress_root_ns(struct mlx5_flow_steering *steering)
return err;
}

int mlx5_init_fs(struct mlx5_core_dev *dev)
void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering;
int err = 0;

err = mlx5_init_fc_stats(dev);
if (err)
return err;

err = mlx5_ft_pool_init(dev);
if (err)
return err;

steering = kzalloc(sizeof(*steering), GFP_KERNEL);
if (!steering) {
err = -ENOMEM;
goto err;
}

steering->dev = dev;
dev->priv.steering = steering;
struct mlx5_flow_steering *steering = dev->priv.steering;

if (mlx5_fs_dr_is_supported(dev))
steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
else
steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;
cleanup_root_ns(steering->root_ns);
cleanup_root_ns(steering->fdb_root_ns);
steering->fdb_root_ns = NULL;
kfree(steering->fdb_sub_ns);
steering->fdb_sub_ns = NULL;
cleanup_root_ns(steering->port_sel_root_ns);
cleanup_root_ns(steering->sniffer_rx_root_ns);
cleanup_root_ns(steering->sniffer_tx_root_ns);
cleanup_root_ns(steering->rdma_rx_root_ns);
cleanup_root_ns(steering->rdma_tx_root_ns);
cleanup_root_ns(steering->egress_root_ns);
}

steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
sizeof(struct mlx5_flow_group), 0,
0, NULL);
steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
0, NULL);
if (!steering->ftes_cache || !steering->fgs_cache) {
err = -ENOMEM;
goto err;
}
int mlx5_fs_core_init(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
int err = 0;

if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
(MLX5_CAP_GEN(dev, nic_flow_table))) ||
Expand Down Expand Up @@ -3180,8 +3143,64 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
}

return 0;

err:
mlx5_fs_core_cleanup(dev);
return err;
}

void mlx5_fs_core_free(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering = dev->priv.steering;

kmem_cache_destroy(steering->ftes_cache);
kmem_cache_destroy(steering->fgs_cache);
kfree(steering);
mlx5_ft_pool_destroy(dev);
mlx5_cleanup_fc_stats(dev);
}

int mlx5_fs_core_alloc(struct mlx5_core_dev *dev)
{
struct mlx5_flow_steering *steering;
int err = 0;

err = mlx5_init_fc_stats(dev);
if (err)
return err;

err = mlx5_ft_pool_init(dev);
if (err)
goto err;

steering = kzalloc(sizeof(*steering), GFP_KERNEL);
if (!steering) {
err = -ENOMEM;
goto err;
}

steering->dev = dev;
dev->priv.steering = steering;

if (mlx5_fs_dr_is_supported(dev))
steering->mode = MLX5_FLOW_STEERING_MODE_SMFS;
else
steering->mode = MLX5_FLOW_STEERING_MODE_DMFS;

steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
sizeof(struct mlx5_flow_group), 0,
0, NULL);
steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
0, NULL);
if (!steering->ftes_cache || !steering->fgs_cache) {
err = -ENOMEM;
goto err;
}

return 0;

err:
mlx5_cleanup_fs(dev);
mlx5_fs_core_free(dev);
return err;
}

Expand Down
6 changes: 4 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
Expand Up @@ -298,8 +298,10 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
enum mlx5_flow_steering_mode mode);

int mlx5_init_fs(struct mlx5_core_dev *dev);
void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
int mlx5_fs_core_alloc(struct mlx5_core_dev *dev);
void mlx5_fs_core_free(struct mlx5_core_dev *dev);
int mlx5_fs_core_init(struct mlx5_core_dev *dev);
void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev);

int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports);
void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev);
Expand Down
15 changes: 12 additions & 3 deletions drivers/net/ethernet/mellanox/mlx5/core/main.c
Expand Up @@ -938,6 +938,12 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
goto err_sf_table_cleanup;
}

err = mlx5_fs_core_alloc(dev);
if (err) {
mlx5_core_err(dev, "Failed to alloc flow steering\n");
goto err_fs;
}

dev->dm = mlx5_dm_create(dev);
if (IS_ERR(dev->dm))
mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
Expand All @@ -948,6 +954,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)

return 0;

err_fs:
mlx5_sf_table_cleanup(dev);
err_sf_table_cleanup:
mlx5_sf_hw_table_cleanup(dev);
err_sf_hw_table_cleanup:
Expand Down Expand Up @@ -985,6 +993,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_hv_vhca_destroy(dev->hv_vhca);
mlx5_fw_tracer_destroy(dev->tracer);
mlx5_dm_cleanup(dev);
mlx5_fs_core_free(dev);
mlx5_sf_table_cleanup(dev);
mlx5_sf_hw_table_cleanup(dev);
mlx5_vhca_event_cleanup(dev);
Expand Down Expand Up @@ -1191,7 +1200,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
goto err_tls_start;
}

err = mlx5_init_fs(dev);
err = mlx5_fs_core_init(dev);
if (err) {
mlx5_core_err(dev, "Failed to init flow steering\n");
goto err_fs;
Expand Down Expand Up @@ -1236,7 +1245,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
err_vhca:
mlx5_vhca_event_stop(dev);
err_set_hca:
mlx5_cleanup_fs(dev);
mlx5_fs_core_cleanup(dev);
err_fs:
mlx5_accel_tls_cleanup(dev);
err_tls_start:
Expand Down Expand Up @@ -1265,7 +1274,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_ec_cleanup(dev);
mlx5_sf_hw_table_destroy(dev);
mlx5_vhca_event_stop(dev);
mlx5_cleanup_fs(dev);
mlx5_fs_core_cleanup(dev);
mlx5_accel_ipsec_cleanup(dev);
mlx5_accel_tls_cleanup(dev);
mlx5_fpga_device_stop(dev);
Expand Down

0 comments on commit b338869

Please sign in to comment.