Skip to content

Commit fdbd694

Browse files
jiadozhualexdeucher
authored andcommitted
drm/amdgpu/gfx9: wait for reset done before remap
There is a racing condition that cp firmware modifies MQD in reset sequence after driver updates it for remapping. We have to wait till CP_HQD_ACTIVE becoming false then remap the queue. v2: fix KIQ locking (Alex) v3: fix KIQ locking harder Acked-by: Vitaly Prosyak <vitaly.prosyak@amd.com> Signed-off-by: Jiadong Zhu <Jiadong.Zhu@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent b5e1a38 commit fdbd694

File tree

1 file changed

+31
-5
lines changed

1 file changed

+31
-5
lines changed

drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7125,7 +7125,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
71257125
struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
71267126
struct amdgpu_ring *kiq_ring = &kiq->ring;
71277127
unsigned long flags;
7128-
int r;
7128+
int i, r;
71297129

71307130
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
71317131
return -EINVAL;
@@ -7147,9 +7147,28 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
71477147
if (r)
71487148
return r;
71497149

7150+
/* make sure dequeue is complete*/
7151+
gfx_v9_0_set_safe_mode(adev, 0);
7152+
mutex_lock(&adev->srbm_mutex);
7153+
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
7154+
for (i = 0; i < adev->usec_timeout; i++) {
7155+
if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
7156+
break;
7157+
udelay(1);
7158+
}
7159+
if (i >= adev->usec_timeout)
7160+
r = -ETIMEDOUT;
7161+
soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7162+
mutex_unlock(&adev->srbm_mutex);
7163+
gfx_v9_0_unset_safe_mode(adev, 0);
7164+
if (r) {
7165+
dev_err(adev->dev, "fail to wait on hqd deactive\n");
7166+
return r;
7167+
}
7168+
71507169
r = amdgpu_bo_reserve(ring->mqd_obj, false);
71517170
if (unlikely(r != 0)){
7152-
DRM_ERROR("fail to resv mqd_obj\n");
7171+
dev_err(adev->dev, "fail to resv mqd_obj\n");
71537172
return r;
71547173
}
71557174
r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
@@ -7159,14 +7178,21 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
71597178
ring->mqd_ptr = NULL;
71607179
}
71617180
amdgpu_bo_unreserve(ring->mqd_obj);
7162-
if (r){
7163-
DRM_ERROR("fail to unresv mqd_obj\n");
7181+
if (r) {
7182+
dev_err(adev->dev, "fail to unresv mqd_obj\n");
71647183
return r;
71657184
}
7185+
spin_lock_irqsave(&kiq->ring_lock, flags);
71667186
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
7187+
if (r) {
7188+
spin_unlock_irqrestore(&kiq->ring_lock, flags);
7189+
return -ENOMEM;
7190+
}
71677191
kiq->pmf->kiq_map_queues(kiq_ring, ring);
7192+
amdgpu_ring_commit(kiq_ring);
7193+
spin_unlock_irqrestore(&kiq->ring_lock, flags);
71687194
r = amdgpu_ring_test_ring(kiq_ring);
7169-
if (r){
7195+
if (r) {
71707196
DRM_ERROR("fail to remap queue\n");
71717197
return r;
71727198
}

0 commit comments

Comments
 (0)