Skip to content

Commit 797b0e9

Browse files
rtauro1895rodrigovivi
authored andcommitted
drm/xe: re-order lmem init check and wait for initialization to complete
Lmem init check should be done only after pcode initialization status is complete. Move lmem init check after pcode status check. Also wait for a short while after pcode status check to allow completion of the task. Failing to do so, can lead to aborting the module load leaving the system unusable. Wait until the lmem initialization is complete within a timeout (60s) or till the user aborts. v2: use bool as return type re-order the code comment (Rodrigo) add comment for deferring probe (Himal) v3: rebase Signed-off-by: Riana Tauro <riana.tauro@intel.com> Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240410085005.1126343-3-riana.tauro@intel.com Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
1 parent 933fd5f commit 797b0e9

File tree

3 files changed

+59
-28
lines changed

3 files changed

+59
-28
lines changed

drivers/gpu/drm/xe/xe_device.c

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -422,12 +422,68 @@ static int xe_set_dma_info(struct xe_device *xe)
422422
return err;
423423
}
424424

425+
static bool verify_lmem_ready(struct xe_gt *gt)
426+
{
427+
u32 val = xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT;
428+
429+
return !!val;
430+
}
431+
432+
static int wait_for_lmem_ready(struct xe_device *xe)
433+
{
434+
struct xe_gt *gt = xe_root_mmio_gt(xe);
435+
unsigned long timeout, start;
436+
437+
if (!IS_DGFX(xe))
438+
return 0;
439+
440+
if (IS_SRIOV_VF(xe))
441+
return 0;
442+
443+
if (verify_lmem_ready(gt))
444+
return 0;
445+
446+
drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
447+
448+
start = jiffies;
449+
timeout = start + msecs_to_jiffies(60 * 1000); /* 60 sec! */
450+
451+
do {
452+
if (signal_pending(current))
453+
return -EINTR;
454+
455+
/*
456+
* The boot firmware initializes local memory and
457+
* assesses its health. If memory training fails,
458+
* the punit will have been instructed to keep the GT powered
459+
* down.we won't be able to communicate with it
460+
*
461+
* If the status check is done before punit updates the register,
462+
* it can lead to the system being unusable.
463+
* use a timeout and defer the probe to prevent this.
464+
*/
465+
if (time_after(jiffies, timeout)) {
466+
drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
467+
return -EPROBE_DEFER;
468+
}
469+
470+
msleep(20);
471+
472+
} while (!verify_lmem_ready(gt));
473+
474+
drm_dbg(&xe->drm, "lmem ready after %ums",
475+
jiffies_to_msecs(jiffies - start));
476+
477+
return 0;
478+
}
479+
425480
/**
426481
* xe_device_probe_early: Device early probe
427482
* @xe: xe device instance
428483
*
429484
* Initialize MMIO resources that don't require any
430-
* knowledge about tile count. Also initialize pcode
485+
* knowledge about tile count. Also initialize pcode and
486+
* check vram initialization on root tile.
431487
*
432488
* Return: 0 on success, error code on failure
433489
*/
@@ -441,11 +497,11 @@ int xe_device_probe_early(struct xe_device *xe)
441497

442498
xe_sriov_probe_early(xe);
443499

444-
err = xe_mmio_verify_vram(xe);
500+
err = xe_pcode_probe_early(xe);
445501
if (err)
446502
return err;
447503

448-
err = xe_pcode_probe_early(xe);
504+
err = wait_for_lmem_ready(xe);
449505
if (err)
450506
return err;
451507

drivers/gpu/drm/xe/xe_mmio.c

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -420,30 +420,6 @@ int xe_mmio_init(struct xe_device *xe)
420420
return drmm_add_action_or_reset(&xe->drm, mmio_fini, xe);
421421
}
422422

423-
int xe_mmio_verify_vram(struct xe_device *xe)
424-
{
425-
struct xe_gt *gt = xe_root_mmio_gt(xe);
426-
427-
if (!IS_DGFX(xe))
428-
return 0;
429-
430-
if (IS_SRIOV_VF(xe))
431-
return 0;
432-
433-
/*
434-
* The boot firmware initializes local memory and assesses its health.
435-
* If memory training fails, the punit will have been instructed to
436-
* keep the GT powered down; we won't be able to communicate with it
437-
* and we should not continue with driver initialization.
438-
*/
439-
if (!(xe_mmio_read32(gt, GU_CNTL) & LMEM_INIT)) {
440-
drm_err(&xe->drm, "VRAM not initialized by firmware\n");
441-
return -ENODEV;
442-
}
443-
444-
return 0;
445-
}
446-
447423
u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
448424
{
449425
struct xe_tile *tile = gt_to_tile(gt);

drivers/gpu/drm/xe/xe_mmio.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ struct xe_device;
2121
#define LMEM_BAR 2
2222

2323
int xe_mmio_init(struct xe_device *xe);
24-
int xe_mmio_verify_vram(struct xe_device *xe);
2524
void xe_mmio_probe_tiles(struct xe_device *xe);
2625

2726
u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg);

0 commit comments

Comments
 (0)