Skip to content
/ linux Public

Commit b3d6ca4

Browse files
PhilipYangASasha Levin
authored andcommitted
drm/amdkfd: Handle GPU reset and drain retry fault race
[ Upstream commit 5b57c3c ] Only check and drain IH1 ring if CAM is not enabled. If GPU is under reset, don't access IH to drain retry fault. Signed-off-by: Philip Yang <Philip.Yang@amd.com> Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 0bd968c commit b3d6ca4

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

drivers/gpu/drm/amd/amdkfd/kfd_svm.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "amdgpu_hmm.h"
3434
#include "amdgpu.h"
3535
#include "amdgpu_xgmi.h"
36+
#include "amdgpu_reset.h"
3637
#include "kfd_priv.h"
3738
#include "kfd_svm.h"
3839
#include "kfd_migrate.h"
@@ -2343,6 +2344,9 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
23432344

23442345
pr_debug("drain retry fault gpu %d svms %p\n", i, svms);
23452346

2347+
if (!down_read_trylock(&pdd->dev->adev->reset_domain->sem))
2348+
continue;
2349+
23462350
amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
23472351
pdd->dev->adev->irq.retry_cam_enabled ?
23482352
&pdd->dev->adev->irq.ih :
@@ -2352,6 +2356,7 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms)
23522356
amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
23532357
&pdd->dev->adev->irq.ih_soft);
23542358

2359+
up_read(&pdd->dev->adev->reset_domain->sem);
23552360

23562361
pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);
23572362
}
@@ -2535,7 +2540,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,
25352540
adev = pdd->dev->adev;
25362541

25372542
/* Check and drain ih1 ring if cam not available */
2538-
if (adev->irq.ih1.ring_size) {
2543+
if (!adev->irq.retry_cam_enabled && adev->irq.ih1.ring_size) {
25392544
ih = &adev->irq.ih1;
25402545
checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
25412546
if (ih->rptr != checkpoint_wptr) {

0 commit comments

Comments
 (0)