Skip to content

Commit 105caae

Browse files
houlz0507Sasha Levin
authored andcommitted
accel/amdxdna: Fix command hang on suspended hardware context
[ Upstream commit 07efce5 ] When a hardware context is suspended, the job scheduler is stopped. If a command is submitted while the context is suspended, the job is queued in the scheduler but aie2_sched_job_run() is never invoked to restart the hardware context. As a result, the command hangs. Fix this by modifying the hardware context suspend routine to keep the job scheduler running so that queued jobs can trigger context restart properly. Fixes: aac2430 ("accel/amdxdna: Add command execution") Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com> Link: https://patch.msgid.link/20260211205341.722982-1-lizhi.hou@amd.com Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 9aecc37 commit 105caae

File tree

1 file changed

+11
-7
lines changed

1 file changed

+11
-7
lines changed

drivers/accel/amdxdna/aie2_ctx.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwct
5353
{
5454
drm_sched_stop(&hwctx->priv->sched, bad_job);
5555
aie2_destroy_context(xdna->dev_handle, hwctx);
56+
drm_sched_start(&hwctx->priv->sched, 0);
5657
}
5758

5859
static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
@@ -80,7 +81,6 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw
8081
}
8182

8283
out:
83-
drm_sched_start(&hwctx->priv->sched, 0);
8484
XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
8585
return ret;
8686
}
@@ -297,19 +297,23 @@ aie2_sched_job_run(struct drm_sched_job *sched_job)
297297
struct dma_fence *fence;
298298
int ret;
299299

300-
if (!hwctx->priv->mbox_chann)
300+
ret = amdxdna_pm_resume_get(hwctx->client->xdna);
301+
if (ret)
302+
return NULL;
303+
304+
if (!hwctx->priv->mbox_chann) {
305+
amdxdna_pm_suspend_put(hwctx->client->xdna);
301306
return NULL;
307+
}
302308

303-
if (!mmget_not_zero(job->mm))
309+
if (!mmget_not_zero(job->mm)) {
310+
amdxdna_pm_suspend_put(hwctx->client->xdna);
304311
return ERR_PTR(-ESRCH);
312+
}
305313

306314
kref_get(&job->refcnt);
307315
fence = dma_fence_get(job->fence);
308316

309-
ret = amdxdna_pm_resume_get(hwctx->client->xdna);
310-
if (ret)
311-
goto out;
312-
313317
if (job->drv_cmd) {
314318
switch (job->drv_cmd->opcode) {
315319
case SYNC_DEBUG_BO:

0 commit comments

Comments
 (0)