Skip to content

Commit bc417e5

Browse files
rtauro1895lucasdemarchi
authored andcommitted
drm/xe: Enable configfs support for survivability mode
Enable survivability mode if supported and configfs attribute is set. Enabling survivability mode manually is useful in cases where pcode does not detect failure, validation and for IFR (in-field-repair). To set configfs survivability mode attribute for a device echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode The card enters survivability mode if supported v2: add a log if survivability mode is enabled for unsupported platforms (Rodrigo) Signed-off-by: Riana Tauro <riana.tauro@intel.com> Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com> Link: https://lore.kernel.org/r/20250407051414.1651616-4-riana.tauro@intel.com Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
1 parent 77052ab commit bc417e5

File tree

6 files changed

+108
-19
lines changed

6 files changed

+108
-19
lines changed

drivers/gpu/drm/xe/xe_configfs.c

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,68 @@ static struct configfs_subsystem xe_configfs = {
164164
},
165165
};
166166

167+
static struct xe_config_device *configfs_find_group(struct pci_dev *pdev)
168+
{
169+
struct config_item *item;
170+
char name[64];
171+
172+
snprintf(name, sizeof(name), "%04x:%02x:%02x.%x", pci_domain_nr(pdev->bus),
173+
pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
174+
175+
mutex_lock(&xe_configfs.su_mutex);
176+
item = config_group_find_item(&xe_configfs.su_group, name);
177+
mutex_unlock(&xe_configfs.su_mutex);
178+
179+
if (!item)
180+
return NULL;
181+
182+
return to_xe_config_device(item);
183+
}
184+
185+
/**
186+
* xe_configfs_get_survivability_mode - get configfs survivability mode attribute
187+
* @pdev: pci device
188+
*
189+
* find the configfs group that belongs to the pci device and return
190+
* the survivability mode attribute
191+
*
192+
* Return: survivability mode if config group is found, false otherwise
193+
*/
194+
bool xe_configfs_get_survivability_mode(struct pci_dev *pdev)
195+
{
196+
struct xe_config_device *dev = configfs_find_group(pdev);
197+
bool mode;
198+
199+
if (!dev)
200+
return false;
201+
202+
mode = dev->survivability_mode;
203+
config_item_put(&dev->group.cg_item);
204+
205+
return mode;
206+
}
207+
208+
/**
209+
* xe_configfs_clear_survivability_mode - clear configfs survivability mode attribute
210+
* @pdev: pci device
211+
*
212+
* find the configfs group that belongs to the pci device and clear survivability
213+
* mode attribute
214+
*/
215+
void xe_configfs_clear_survivability_mode(struct pci_dev *pdev)
216+
{
217+
struct xe_config_device *dev = configfs_find_group(pdev);
218+
219+
if (!dev)
220+
return;
221+
222+
mutex_lock(&dev->lock);
223+
dev->survivability_mode = 0;
224+
mutex_unlock(&dev->lock);
225+
226+
config_item_put(&dev->group.cg_item);
227+
}
228+
167229
int __init xe_configfs_init(void)
168230
{
169231
struct config_group *root = &xe_configfs.su_group;

drivers/gpu/drm/xe/xe_configfs.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,20 @@
55
#ifndef _XE_CONFIGFS_H_
66
#define _XE_CONFIGFS_H_
77

8+
#include <linux/types.h>
9+
10+
struct pci_dev;
11+
812
#if IS_ENABLED(CONFIG_CONFIGFS_FS)
913
int xe_configfs_init(void);
1014
void xe_configfs_exit(void);
15+
bool xe_configfs_get_survivability_mode(struct pci_dev *pdev);
16+
void xe_configfs_clear_survivability_mode(struct pci_dev *pdev);
1117
#else
1218
static inline int xe_configfs_init(void) { return 0; };
1319
static inline void xe_configfs_exit(void) {};
20+
static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; };
21+
static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) {};
1422
#endif
1523

1624
#endif

drivers/gpu/drm/xe/xe_device.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,7 @@ int xe_device_probe_early(struct xe_device *xe)
712712
sriov_update_device_info(xe);
713713

714714
err = xe_pcode_probe_early(xe);
715-
if (err) {
715+
if (err || xe_survivability_mode_is_requested(xe)) {
716716
int save_err = err;
717717

718718
/*

drivers/gpu/drm/xe/xe_pci.c

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -812,18 +812,17 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
812812
return err;
813813

814814
err = xe_device_probe_early(xe);
815-
if (err) {
816-
/*
817-
* In Boot Survivability mode, no drm card is exposed and driver
818-
* is loaded with bare minimum to allow for firmware to be
819-
* flashed through mei. If early probe failed, but it managed to
820-
* enable survivability mode, return success.
821-
*/
822-
if (xe_survivability_mode_is_enabled(xe))
823-
return 0;
815+
/*
816+
* In Boot Survivability mode, no drm card is exposed and driver
817+
* is loaded with bare minimum to allow for firmware to be
818+
* flashed through mei. Return success, if survivability mode
819+
* is enabled due to pcode failure or configfs being set
820+
*/
821+
if (xe_survivability_mode_is_enabled(xe))
822+
return 0;
824823

824+
if (err)
825825
return err;
826-
}
827826

828827
err = xe_info_init(xe, desc);
829828
if (err)

drivers/gpu/drm/xe/xe_survivability_mode.c

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <linux/pci.h>
1111
#include <linux/sysfs.h>
1212

13+
#include "xe_configfs.h"
1314
#include "xe_device.h"
1415
#include "xe_gt.h"
1516
#include "xe_heci_gsc.h"
@@ -145,6 +146,7 @@ static void xe_survivability_mode_fini(void *arg)
145146
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
146147
struct device *dev = &pdev->dev;
147148

149+
xe_configfs_clear_survivability_mode(pdev);
148150
sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
149151
}
150152

@@ -198,23 +200,40 @@ bool xe_survivability_mode_is_enabled(struct xe_device *xe)
198200
return xe->survivability.mode;
199201
}
200202

201-
/*
202-
* survivability_mode_requested - check if it's possible to enable
203-
* survivability mode and that was requested by firmware
203+
/**
204+
* xe_survivability_mode_is_requested - check if it's possible to enable survivability
205+
* mode that was requested by firmware or userspace
206+
* @xe: xe device instance
204207
*
205-
* This function reads the boot status from Pcode.
208+
* This function reads configfs and boot status from Pcode.
206209
*
207210
* Return: true if platform support is available and boot status indicates
208-
* failure, false otherwise.
211+
* failure or if survivability mode is requested, false otherwise.
209212
*/
210-
static bool survivability_mode_requested(struct xe_device *xe)
213+
bool xe_survivability_mode_is_requested(struct xe_device *xe)
211214
{
212215
struct xe_survivability *survivability = &xe->survivability;
213216
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
217+
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
214218
u32 data;
219+
bool survivability_mode;
220+
221+
if (!IS_DGFX(xe) || IS_SRIOV_VF(xe))
222+
return false;
223+
224+
survivability_mode = xe_configfs_get_survivability_mode(pdev);
215225

216-
if (!IS_DGFX(xe) || xe->info.platform < XE_BATTLEMAGE || IS_SRIOV_VF(xe))
226+
if (xe->info.platform < XE_BATTLEMAGE) {
227+
if (survivability_mode) {
228+
dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n");
229+
xe_configfs_clear_survivability_mode(pdev);
230+
}
217231
return false;
232+
}
233+
234+
/* Enable survivability mode if set via configfs */
235+
if (survivability_mode)
236+
return true;
218237

219238
data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
220239
survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
@@ -238,7 +257,7 @@ int xe_survivability_mode_enable(struct xe_device *xe)
238257
struct xe_survivability_info *info;
239258
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
240259

241-
if (!survivability_mode_requested(xe))
260+
if (!xe_survivability_mode_is_requested(xe))
242261
return 0;
243262

244263
survivability->size = MAX_SCRATCH_MMIO;

drivers/gpu/drm/xe/xe_survivability_mode.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ struct xe_device;
1212

1313
int xe_survivability_mode_enable(struct xe_device *xe);
1414
bool xe_survivability_mode_is_enabled(struct xe_device *xe);
15+
bool xe_survivability_mode_is_requested(struct xe_device *xe);
1516

1617
#endif /* _XE_SURVIVABILITY_MODE_H_ */

0 commit comments

Comments
 (0)