Skip to content

Commit 097af47

Browse files
jiadozhualexdeucher
authored andcommitted
drm/amdgpu/gfx10: wait for reset done before remap
There is a racing condition that cp firmware modifies MQD in reset sequence after driver updates it for remapping. We have to wait till CP_HQD_ACTIVE becoming false then remap the queue. v2: fix KIQ locking (Alex) v3: fix KIQ locking harder (Jessie) Acked-by: Vitaly Prosyak <vitaly.prosyak@amd.com> Signed-off-by: Jiadong Zhu <Jiadong.Zhu@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 2f3806f commit 097af47

File tree

1 file changed

+30
-11
lines changed

1 file changed

+30
-11
lines changed

drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9475,30 +9475,50 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
94759475
struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
94769476
struct amdgpu_ring *kiq_ring = &kiq->ring;
94779477
unsigned long flags;
9478-
int r;
9478+
int i, r;
94799479

94809480
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
94819481
return -EINVAL;
94829482

94839483
spin_lock_irqsave(&kiq->ring_lock, flags);
94849484

94859485
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
9486-
r = -ENOMEM;
9487-
goto out;
9486+
spin_unlock_irqrestore(&kiq->ring_lock, flags);
9487+
return -ENOMEM;
94889488
}
94899489

94909490
kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
94919491
0, 0);
94929492
amdgpu_ring_commit(kiq_ring);
9493+
spin_unlock_irqrestore(&kiq->ring_lock, flags);
94939494

94949495
r = amdgpu_ring_test_ring(kiq_ring);
94959496
if (r)
9496-
goto out;
9497+
return r;
9498+
9499+
/* make sure dequeue is complete*/
9500+
gfx_v10_0_set_safe_mode(adev, 0);
9501+
mutex_lock(&adev->srbm_mutex);
9502+
nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
9503+
for (i = 0; i < adev->usec_timeout; i++) {
9504+
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
9505+
break;
9506+
udelay(1);
9507+
}
9508+
if (i >= adev->usec_timeout)
9509+
r = -ETIMEDOUT;
9510+
nv_grbm_select(adev, 0, 0, 0, 0);
9511+
mutex_unlock(&adev->srbm_mutex);
9512+
gfx_v10_0_unset_safe_mode(adev, 0);
9513+
if (r) {
9514+
dev_err(adev->dev, "fail to wait on hqd deactivate\n");
9515+
return r;
9516+
}
94979517

94989518
r = amdgpu_bo_reserve(ring->mqd_obj, false);
94999519
if (unlikely(r != 0)) {
95009520
dev_err(adev->dev, "fail to resv mqd_obj\n");
9501-
goto out;
9521+
return r;
95029522
}
95039523
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
95049524
if (!r) {
@@ -9509,20 +9529,19 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring,
95099529
amdgpu_bo_unreserve(ring->mqd_obj);
95109530
if (r) {
95119531
dev_err(adev->dev, "fail to unresv mqd_obj\n");
9512-
goto out;
9532+
return r;
95139533
}
95149534

9535+
spin_lock_irqsave(&kiq->ring_lock, flags);
95159536
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) {
9516-
r = -ENOMEM;
9517-
goto out;
9537+
spin_unlock_irqrestore(&kiq->ring_lock, flags);
9538+
return -ENOMEM;
95189539
}
95199540
kiq->pmf->kiq_map_queues(kiq_ring, ring);
95209541
amdgpu_ring_commit(kiq_ring);
9542+
spin_unlock_irqrestore(&kiq->ring_lock, flags);
95219543

95229544
r = amdgpu_ring_test_ring(kiq_ring);
9523-
9524-
out:
9525-
spin_unlock_irqrestore(&kiq->ring_lock, flags);
95269545
if (r)
95279546
return r;
95289547

0 commit comments

Comments
 (0)