Skip to content

Commit d40f275

Browse files
committed
drm/xe: Move survivability entirely to xe_pci
There's an odd split between xe_pci.c and xe_device.c wrt xe_survivability: it's initialized by xe_device, but then finalized by xe_pci. Move it entirely to the outer layer, xe_pci, so it controls the flow entirely. This also allows to stop ignoring some of the errors. E.g.: if there's an -ENOMEM, it shouldn't continue as if it survivability had been enabled. One change worth mentioning is that if "wait for lmem" fails, it will also check the pcode status to decide if it should enter or not in survivability mode, which it was not doing before. The bit from pcode for that decision should remain the same after lmem failed initialization, so it should be fine. Cc: Riana Tauro <riana.tauro@intel.com> Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com> Reviewed-by: Riana Tauro <riana.tauro@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250222001051.3012936-9-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
1 parent d41d048 commit d40f275

File tree

5 files changed

+49
-55
lines changed

5 files changed

+49
-55
lines changed

drivers/gpu/drm/xe/xe_device.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
#include "xe_pxp.h"
5454
#include "xe_query.h"
5555
#include "xe_sriov.h"
56-
#include "xe_survivability_mode.h"
5756
#include "xe_tile.h"
5857
#include "xe_ttm_stolen_mgr.h"
5958
#include "xe_ttm_sys_mgr.h"
@@ -695,12 +694,8 @@ int xe_device_probe_early(struct xe_device *xe)
695694
update_device_info(xe);
696695

697696
err = xe_pcode_probe_early(xe);
698-
if (err) {
699-
if (xe_survivability_mode_required(xe))
700-
xe_survivability_mode_init(xe);
701-
697+
if (err)
702698
return err;
703-
}
704699

705700
err = wait_for_lmem_ready(xe);
706701
if (err)

drivers/gpu/drm/xe/xe_heci_gsc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ void xe_heci_gsc_init(struct xe_device *xe)
201201
return;
202202
}
203203

204-
if (!def->use_polling && !xe_survivability_mode_enabled(xe)) {
204+
if (!def->use_polling && !xe_survivability_mode_is_enabled(xe)) {
205205
ret = heci_gsc_irq_setup(xe);
206206
if (ret)
207207
goto fail;

drivers/gpu/drm/xe/xe_pci.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -770,8 +770,8 @@ static void xe_pci_remove(struct pci_dev *pdev)
770770
if (IS_SRIOV_PF(xe))
771771
xe_pci_sriov_configure(pdev, 0);
772772

773-
if (xe_survivability_mode_enabled(xe))
774-
return xe_survivability_mode_remove(xe);
773+
if (xe_survivability_mode_is_enabled(xe))
774+
return;
775775

776776
xe_device_remove(xe);
777777
xe_pm_runtime_fini(xe);
@@ -846,13 +846,14 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
846846
err = xe_device_probe_early(xe);
847847

848848
/*
849-
* In Boot Survivability mode, no drm card is exposed
850-
* and driver is loaded with bare minimum to allow
851-
* for firmware to be flashed through mei. Return
852-
* success if survivability mode is enabled.
849+
* In Boot Survivability mode, no drm card is exposed and driver is
850+
* loaded with bare minimum to allow for firmware to be flashed through
851+
* mei. If early probe fails, check if survivability mode is flagged by
852+
* HW to be enabled. In that case enable it and return success.
853853
*/
854854
if (err) {
855-
if (xe_survivability_mode_enabled(xe))
855+
if (xe_survivability_mode_required(xe) &&
856+
xe_survivability_mode_enable(xe))
856857
return 0;
857858

858859
return err;
@@ -946,7 +947,7 @@ static int xe_pci_suspend(struct device *dev)
946947
struct xe_device *xe = pdev_to_xe_device(pdev);
947948
int err;
948949

949-
if (xe_survivability_mode_enabled(xe))
950+
if (xe_survivability_mode_is_enabled(xe))
950951
return -EBUSY;
951952

952953
err = xe_pm_suspend(xe);

drivers/gpu/drm/xe/xe_survivability_mode.c

Lines changed: 36 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -127,40 +127,54 @@ static ssize_t survivability_mode_show(struct device *dev,
127127

128128
static DEVICE_ATTR_ADMIN_RO(survivability_mode);
129129

130-
static void enable_survivability_mode(struct pci_dev *pdev)
130+
static void xe_survivability_mode_fini(void *arg)
131+
{
132+
struct xe_device *xe = arg;
133+
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
134+
struct device *dev = &pdev->dev;
135+
136+
sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
137+
xe_heci_gsc_fini(xe);
138+
}
139+
140+
static int enable_survivability_mode(struct pci_dev *pdev)
131141
{
132142
struct device *dev = &pdev->dev;
133143
struct xe_device *xe = pdev_to_xe_device(pdev);
134144
struct xe_survivability *survivability = &xe->survivability;
135145
int ret = 0;
136146

137-
/* set survivability mode */
138-
survivability->mode = true;
139-
dev_info(dev, "In Survivability Mode\n");
140-
141147
/* create survivability mode sysfs */
142148
ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr);
143149
if (ret) {
144150
dev_warn(dev, "Failed to create survivability sysfs files\n");
145-
return;
151+
return ret;
146152
}
147153

154+
ret = devm_add_action_or_reset(xe->drm.dev,
155+
xe_survivability_mode_fini, xe);
156+
if (ret)
157+
return ret;
158+
148159
xe_heci_gsc_init(xe);
149160

150161
xe_vsec_init(xe);
162+
163+
survivability->mode = true;
164+
dev_err(dev, "In Survivability Mode\n");
165+
166+
return 0;
151167
}
152168

153169
/**
154-
* xe_survivability_mode_enabled - check if survivability mode is enabled
170+
* xe_survivability_mode_is_enabled - check if survivability mode is enabled
155171
* @xe: xe device instance
156172
*
157173
* Returns true if in survivability mode, false otherwise
158174
*/
159-
bool xe_survivability_mode_enabled(struct xe_device *xe)
175+
bool xe_survivability_mode_is_enabled(struct xe_device *xe)
160176
{
161-
struct xe_survivability *survivability = &xe->survivability;
162-
163-
return survivability->mode;
177+
return xe->survivability.mode;
164178
}
165179

166180
/**
@@ -183,44 +197,30 @@ bool xe_survivability_mode_required(struct xe_device *xe)
183197
data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
184198
survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
185199

186-
return (survivability->boot_status == NON_CRITICAL_FAILURE ||
187-
survivability->boot_status == CRITICAL_FAILURE);
200+
return survivability->boot_status == NON_CRITICAL_FAILURE ||
201+
survivability->boot_status == CRITICAL_FAILURE;
188202
}
189203

190204
/**
191-
* xe_survivability_mode_remove - remove survivability mode
205+
* xe_survivability_mode_enable - Initialize and enable the survivability mode
192206
* @xe: xe device instance
193207
*
194-
* clean up sysfs entries of survivability mode
195-
*/
196-
void xe_survivability_mode_remove(struct xe_device *xe)
197-
{
198-
struct xe_survivability *survivability = &xe->survivability;
199-
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
200-
struct device *dev = &pdev->dev;
201-
202-
sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
203-
xe_heci_gsc_fini(xe);
204-
kfree(survivability->info);
205-
}
206-
207-
/**
208-
* xe_survivability_mode_init - Initialize the survivability mode
209-
* @xe: xe device instance
208+
* Initialize survivability information and enable survivability mode
210209
*
211-
* Initializes survivability information and enables survivability mode
210+
* Return: 0 for success, negative error code otherwise.
212211
*/
213-
void xe_survivability_mode_init(struct xe_device *xe)
212+
int xe_survivability_mode_enable(struct xe_device *xe)
214213
{
215214
struct xe_survivability *survivability = &xe->survivability;
216215
struct xe_survivability_info *info;
217216
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
218217

219218
survivability->size = MAX_SCRATCH_MMIO;
220219

221-
info = kcalloc(survivability->size, sizeof(*info), GFP_KERNEL);
220+
info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
221+
GFP_KERNEL);
222222
if (!info)
223-
return;
223+
return -ENOMEM;
224224

225225
survivability->info = info;
226226

@@ -229,9 +229,8 @@ void xe_survivability_mode_init(struct xe_device *xe)
229229
/* Only log debug information and exit if it is a critical failure */
230230
if (survivability->boot_status == CRITICAL_FAILURE) {
231231
log_survivability_info(pdev);
232-
kfree(survivability->info);
233-
return;
232+
return -ENXIO;
234233
}
235234

236-
enable_survivability_mode(pdev);
235+
return enable_survivability_mode(pdev);
237236
}

drivers/gpu/drm/xe/xe_survivability_mode.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@
1010

1111
struct xe_device;
1212

13-
void xe_survivability_mode_init(struct xe_device *xe);
14-
void xe_survivability_mode_remove(struct xe_device *xe);
15-
bool xe_survivability_mode_enabled(struct xe_device *xe);
13+
int xe_survivability_mode_enable(struct xe_device *xe);
14+
bool xe_survivability_mode_is_enabled(struct xe_device *xe);
1615
bool xe_survivability_mode_required(struct xe_device *xe);
1716

1817
#endif /* _XE_SURVIVABILITY_MODE_H_ */

0 commit comments

Comments
 (0)