Skip to content

Commit 391e238

Browse files
committed
Merge: PCI: hv: fix crash/hang Issues due to fast VF add/remove events
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/2713 Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2182619 Fix crash/hang Issues due to fast VF add/remove events. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Approved-by: Maxim Levitsky <mlevitsk@redhat.com> Approved-by: Cathy Avery <cavery@redhat.com> Signed-off-by: Jan Stancek <jstancek@redhat.com>
2 parents e341c7e + f118487 commit 391e238

File tree

1 file changed

+82
-57
lines changed

1 file changed

+82
-57
lines changed

drivers/pci/controller/pci-hyperv.c

Lines changed: 82 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,10 @@ struct hv_pcibus_device {
489489
struct fwnode_handle *fwnode;
490490
/* Protocol version negotiated with the host */
491491
enum pci_protocol_version_t protocol_version;
492+
493+
struct mutex state_lock;
492494
enum hv_pcibus_state state;
495+
493496
struct hv_device *hdev;
494497
resource_size_t low_mmio_space;
495498
resource_size_t high_mmio_space;
@@ -553,19 +556,10 @@ struct hv_dr_state {
553556
struct hv_pcidev_description func[];
554557
};
555558

556-
enum hv_pcichild_state {
557-
hv_pcichild_init = 0,
558-
hv_pcichild_requirements,
559-
hv_pcichild_resourced,
560-
hv_pcichild_ejecting,
561-
hv_pcichild_maximum
562-
};
563-
564559
struct hv_pci_dev {
565560
/* List protected by pci_rescan_remove_lock */
566561
struct list_head list_entry;
567562
refcount_t refs;
568-
enum hv_pcichild_state state;
569563
struct pci_slot *pci_slot;
570564
struct hv_pcidev_description desc;
571565
bool reported_missing;
@@ -643,6 +637,11 @@ static void hv_arch_irq_unmask(struct irq_data *data)
643637
pbus = pdev->bus;
644638
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
645639
int_desc = data->chip_data;
640+
if (!int_desc) {
641+
dev_warn(&hbus->hdev->device, "%s() can not unmask irq %u\n",
642+
__func__, data->irq);
643+
return;
644+
}
646645

647646
spin_lock_irqsave(&hbus->retarget_msi_interrupt_lock, flags);
648647

@@ -1911,12 +1910,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
19111910
hv_pci_onchannelcallback(hbus);
19121911
spin_unlock_irqrestore(&channel->sched_lock, flags);
19131912

1914-
if (hpdev->state == hv_pcichild_ejecting) {
1915-
dev_err_once(&hbus->hdev->device,
1916-
"the device is being ejected\n");
1917-
goto enable_tasklet;
1918-
}
1919-
19201913
udelay(100);
19211914
}
19221915

@@ -2522,6 +2515,8 @@ static void pci_devices_present_work(struct work_struct *work)
25222515
if (!dr)
25232516
return;
25242517

2518+
mutex_lock(&hbus->state_lock);
2519+
25252520
/* First, mark all existing children as reported missing. */
25262521
spin_lock_irqsave(&hbus->device_list_lock, flags);
25272522
list_for_each_entry(hpdev, &hbus->children, list_entry) {
@@ -2603,6 +2598,8 @@ static void pci_devices_present_work(struct work_struct *work)
26032598
break;
26042599
}
26052600

2601+
mutex_unlock(&hbus->state_lock);
2602+
26062603
kfree(dr);
26072604
}
26082605

@@ -2751,7 +2748,7 @@ static void hv_eject_device_work(struct work_struct *work)
27512748
hpdev = container_of(work, struct hv_pci_dev, wrk);
27522749
hbus = hpdev->hbus;
27532750

2754-
WARN_ON(hpdev->state != hv_pcichild_ejecting);
2751+
mutex_lock(&hbus->state_lock);
27552752

27562753
/*
27572754
* Ejection can come before or after the PCI bus has been set up, so
@@ -2789,6 +2786,8 @@ static void hv_eject_device_work(struct work_struct *work)
27892786
put_pcichild(hpdev);
27902787
put_pcichild(hpdev);
27912788
/* hpdev has been freed. Do not use it any more. */
2789+
2790+
mutex_unlock(&hbus->state_lock);
27922791
}
27932792

27942793
/**
@@ -2809,7 +2808,6 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
28092808
return;
28102809
}
28112810

2812-
hpdev->state = hv_pcichild_ejecting;
28132811
get_pcichild(hpdev);
28142812
INIT_WORK(&hpdev->wrk, hv_eject_device_work);
28152813
queue_work(hbus->wq, &hpdev->wrk);
@@ -3238,8 +3236,10 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
32383236
struct pci_bus_d0_entry *d0_entry;
32393237
struct hv_pci_compl comp_pkt;
32403238
struct pci_packet *pkt;
3239+
bool retry = true;
32413240
int ret;
32423241

3242+
enter_d0_retry:
32433243
/*
32443244
* Tell the host that the bus is ready to use, and moved into the
32453245
* powered-on state. This includes telling the host which region
@@ -3266,6 +3266,38 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
32663266
if (ret)
32673267
goto exit;
32683268

3269+
/*
3270+
* In certain case (Kdump) the pci device of interest was
3271+
* not cleanly shut down and resource is still held on host
3272+
* side, the host could return invalid device status.
3273+
* We need to explicitly request host to release the resource
3274+
* and try to enter D0 again.
3275+
*/
3276+
if (comp_pkt.completion_status < 0 && retry) {
3277+
retry = false;
3278+
3279+
dev_err(&hdev->device, "Retrying D0 Entry\n");
3280+
3281+
/*
3282+
* Hv_pci_bus_exit() calls hv_send_resource_released()
3283+
* to free up resources of its child devices.
3284+
* In the kdump kernel we need to set the
3285+
* wslot_res_allocated to 255 so it scans all child
3286+
* devices to release resources allocated in the
3287+
* normal kernel before panic happened.
3288+
*/
3289+
hbus->wslot_res_allocated = 255;
3290+
3291+
ret = hv_pci_bus_exit(hdev, true);
3292+
3293+
if (ret == 0) {
3294+
kfree(pkt);
3295+
goto enter_d0_retry;
3296+
}
3297+
dev_err(&hdev->device,
3298+
"Retrying D0 failed with ret %d\n", ret);
3299+
}
3300+
32693301
if (comp_pkt.completion_status < 0) {
32703302
dev_err(&hdev->device,
32713303
"PCI Pass-through VSP failed D0 Entry with status %x\n",
@@ -3308,6 +3340,24 @@ static int hv_pci_query_relations(struct hv_device *hdev)
33083340
if (!ret)
33093341
ret = wait_for_response(hdev, &comp);
33103342

3343+
/*
3344+
* In the case of fast device addition/removal, it's possible that
3345+
* vmbus_sendpacket() or wait_for_response() returns -ENODEV but we
3346+
* already got a PCI_BUS_RELATIONS* message from the host and the
3347+
* channel callback already scheduled a work to hbus->wq, which can be
3348+
* running pci_devices_present_work() -> survey_child_resources() ->
3349+
* complete(&hbus->survey_event), even after hv_pci_query_relations()
3350+
* exits and the stack variable 'comp' is no longer valid; as a result,
3351+
* a hang or a page fault may happen when the complete() calls
3352+
* raw_spin_lock_irqsave(). Flush hbus->wq before we exit from
3353+
* hv_pci_query_relations() to avoid the issues. Note: if 'ret' is
3354+
* -ENODEV, there can't be any more work item scheduled to hbus->wq
3355+
* after the flush_workqueue(): see vmbus_onoffer_rescind() ->
3356+
* vmbus_reset_channel_cb(), vmbus_rescind_cleanup() ->
3357+
* channel->rescind = true.
3358+
*/
3359+
flush_workqueue(hbus->wq);
3360+
33113361
return ret;
33123362
}
33133363

@@ -3493,7 +3543,6 @@ static int hv_pci_probe(struct hv_device *hdev,
34933543
struct hv_pcibus_device *hbus;
34943544
u16 dom_req, dom;
34953545
char *name;
3496-
bool enter_d0_retry = true;
34973546
int ret;
34983547

34993548
/*
@@ -3529,6 +3578,7 @@ static int hv_pci_probe(struct hv_device *hdev,
35293578
return -ENOMEM;
35303579

35313580
hbus->bridge = bridge;
3581+
mutex_init(&hbus->state_lock);
35323582
hbus->state = hv_pcibus_init;
35333583
hbus->wslot_res_allocated = -1;
35343584

@@ -3633,49 +3683,15 @@ static int hv_pci_probe(struct hv_device *hdev,
36333683
if (ret)
36343684
goto free_fwnode;
36353685

3636-
retry:
36373686
ret = hv_pci_query_relations(hdev);
36383687
if (ret)
36393688
goto free_irq_domain;
36403689

3641-
ret = hv_pci_enter_d0(hdev);
3642-
/*
3643-
* In certain case (Kdump) the pci device of interest was
3644-
* not cleanly shut down and resource is still held on host
3645-
* side, the host could return invalid device status.
3646-
* We need to explicitly request host to release the resource
3647-
* and try to enter D0 again.
3648-
* Since the hv_pci_bus_exit() call releases structures
3649-
* of all its child devices, we need to start the retry from
3650-
* hv_pci_query_relations() call, requesting host to send
3651-
* the synchronous child device relations message before this
3652-
* information is needed in hv_send_resources_allocated()
3653-
* call later.
3654-
*/
3655-
if (ret == -EPROTO && enter_d0_retry) {
3656-
enter_d0_retry = false;
3657-
3658-
dev_err(&hdev->device, "Retrying D0 Entry\n");
3659-
3660-
/*
3661-
* Hv_pci_bus_exit() calls hv_send_resources_released()
3662-
* to free up resources of its child devices.
3663-
* In the kdump kernel we need to set the
3664-
* wslot_res_allocated to 255 so it scans all child
3665-
* devices to release resources allocated in the
3666-
* normal kernel before panic happened.
3667-
*/
3668-
hbus->wslot_res_allocated = 255;
3669-
ret = hv_pci_bus_exit(hdev, true);
3670-
3671-
if (ret == 0)
3672-
goto retry;
3690+
mutex_lock(&hbus->state_lock);
36733691

3674-
dev_err(&hdev->device,
3675-
"Retrying D0 failed with ret %d\n", ret);
3676-
}
3692+
ret = hv_pci_enter_d0(hdev);
36773693
if (ret)
3678-
goto free_irq_domain;
3694+
goto release_state_lock;
36793695

36803696
ret = hv_pci_allocate_bridge_windows(hbus);
36813697
if (ret)
@@ -3693,12 +3709,15 @@ static int hv_pci_probe(struct hv_device *hdev,
36933709
if (ret)
36943710
goto free_windows;
36953711

3712+
mutex_unlock(&hbus->state_lock);
36963713
return 0;
36973714

36983715
free_windows:
36993716
hv_pci_free_bridge_windows(hbus);
37003717
exit_d0:
37013718
(void) hv_pci_bus_exit(hdev, true);
3719+
release_state_lock:
3720+
mutex_unlock(&hbus->state_lock);
37023721
free_irq_domain:
37033722
irq_domain_remove(hbus->irq_domain);
37043723
free_fwnode:
@@ -3948,20 +3967,26 @@ static int hv_pci_resume(struct hv_device *hdev)
39483967
if (ret)
39493968
goto out;
39503969

3970+
mutex_lock(&hbus->state_lock);
3971+
39513972
ret = hv_pci_enter_d0(hdev);
39523973
if (ret)
3953-
goto out;
3974+
goto release_state_lock;
39543975

39553976
ret = hv_send_resources_allocated(hdev);
39563977
if (ret)
3957-
goto out;
3978+
goto release_state_lock;
39583979

39593980
prepopulate_bars(hbus);
39603981

39613982
hv_pci_restore_msi_state(hbus);
39623983

39633984
hbus->state = hv_pcibus_installed;
3985+
mutex_unlock(&hbus->state_lock);
39643986
return 0;
3987+
3988+
release_state_lock:
3989+
mutex_unlock(&hbus->state_lock);
39653990
out:
39663991
vmbus_close(hdev->channel);
39673992
return ret;

0 commit comments

Comments
 (0)