Skip to content

Commit f5419a4

Browse files
dmuszynsherbertx
authored andcommitted
crypto: qat - add auto reset on error
Expose the `auto_reset` sysfs attribute to configure the driver to reset the device when a fatal error is detected. When auto reset is enabled, the driver resets the device when it detects either an heartbeat failure or a fatal error through an interrupt. This patch is based on earlier work done by Shashank Gupta. Signed-off-by: Damian Muszynski <damian.muszynski@intel.com> Reviewed-by: Ahsan Atta <ahsan.atta@intel.com> Reviewed-by: Markas Rapoportas <markas.rapoportas@intel.com> Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com> Signed-off-by: Mun Chun Yep <mun.chun.yep@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
1 parent 2aaa199 commit f5419a4

File tree

5 files changed

+69
-1
lines changed

5 files changed

+69
-1
lines changed

Documentation/ABI/testing/sysfs-driver-qat

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,3 +141,23 @@ Description:
141141
64
142142

143143
This attribute is only available for qat_4xxx devices.
144+
145+
What: /sys/bus/pci/devices/<BDF>/qat/auto_reset
146+
Date: March 2024
147+
KernelVersion: 6.8
148+
Contact: qat-linux@intel.com
149+
Description: (RW) Reports the current state of the autoreset feature
150+
for a QAT device
151+
152+
Write to the attribute to enable or disable device auto reset.
153+
154+
Device auto reset is disabled by default.
155+
156+
The values are::
157+
158+
* 1/Yy/on: auto reset enabled. If the device encounters an
159+
unrecoverable error, it will be reset automatically.
160+
* 0/Nn/off: auto reset disabled. If the device encounters an
161+
unrecoverable error, it will not be reset.
162+
163+
This attribute is only available for qat_4xxx devices.

drivers/crypto/intel/qat/qat_common/adf_accel_devices.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ struct adf_accel_dev {
402402
struct adf_error_counters ras_errors;
403403
struct mutex state_lock; /* protect state of the device */
404404
bool is_vf;
405+
bool autoreset_on_error;
405406
u32 accel_id;
406407
};
407408
#endif

drivers/crypto/intel/qat/qat_common/adf_aer.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,14 @@ const struct pci_error_handlers adf_err_handler = {
204204
};
205205
EXPORT_SYMBOL_GPL(adf_err_handler);
206206

207+
int adf_dev_autoreset(struct adf_accel_dev *accel_dev)
208+
{
209+
if (accel_dev->autoreset_on_error)
210+
return adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_ASYNC);
211+
212+
return 0;
213+
}
214+
207215
static void adf_notify_fatal_error_worker(struct work_struct *work)
208216
{
209217
struct adf_fatal_error_data *wq_data =
@@ -215,10 +223,11 @@ static void adf_notify_fatal_error_worker(struct work_struct *work)
215223

216224
if (!accel_dev->is_vf) {
217225
/* Disable arbitration to stop processing of new requests */
218-
if (hw_device->exit_arb)
226+
if (accel_dev->autoreset_on_error && hw_device->exit_arb)
219227
hw_device->exit_arb(accel_dev);
220228
if (accel_dev->pf.vf_info)
221229
adf_pf2vf_notify_fatal_error(accel_dev);
230+
adf_dev_autoreset(accel_dev);
222231
}
223232

224233
kfree(wq_data);

drivers/crypto/intel/qat/qat_common/adf_common_drv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev);
8787
extern const struct pci_error_handlers adf_err_handler;
8888
void adf_reset_sbr(struct adf_accel_dev *accel_dev);
8989
void adf_reset_flr(struct adf_accel_dev *accel_dev);
90+
int adf_dev_autoreset(struct adf_accel_dev *accel_dev);
9091
void adf_dev_restore(struct adf_accel_dev *accel_dev);
9192
int adf_init_aer(void);
9293
void adf_exit_aer(void);

drivers/crypto/intel/qat/qat_common/adf_sysfs.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,42 @@ static ssize_t pm_idle_enabled_store(struct device *dev, struct device_attribute
204204
}
205205
static DEVICE_ATTR_RW(pm_idle_enabled);
206206

207+
static ssize_t auto_reset_show(struct device *dev, struct device_attribute *attr,
208+
char *buf)
209+
{
210+
char *auto_reset;
211+
struct adf_accel_dev *accel_dev;
212+
213+
accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
214+
if (!accel_dev)
215+
return -EINVAL;
216+
217+
auto_reset = accel_dev->autoreset_on_error ? "on" : "off";
218+
219+
return sysfs_emit(buf, "%s\n", auto_reset);
220+
}
221+
222+
static ssize_t auto_reset_store(struct device *dev, struct device_attribute *attr,
223+
const char *buf, size_t count)
224+
{
225+
struct adf_accel_dev *accel_dev;
226+
bool enabled = false;
227+
int ret;
228+
229+
ret = kstrtobool(buf, &enabled);
230+
if (ret)
231+
return ret;
232+
233+
accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
234+
if (!accel_dev)
235+
return -EINVAL;
236+
237+
accel_dev->autoreset_on_error = enabled;
238+
239+
return count;
240+
}
241+
static DEVICE_ATTR_RW(auto_reset);
242+
207243
static DEVICE_ATTR_RW(state);
208244
static DEVICE_ATTR_RW(cfg_services);
209245

@@ -291,6 +327,7 @@ static struct attribute *qat_attrs[] = {
291327
&dev_attr_pm_idle_enabled.attr,
292328
&dev_attr_rp2srv.attr,
293329
&dev_attr_num_rps.attr,
330+
&dev_attr_auto_reset.attr,
294331
NULL,
295332
};
296333

0 commit comments

Comments
 (0)