Skip to content

Commit

Permalink
remoteproc: add rproc_report_crash function to notify rproc crashes
Browse files Browse the repository at this point in the history
Allow low-level remoteproc drivers to report rproc crashes by exporting
a new rproc_report_crash() function (invoking this from non-rproc drivers
is probably wrong, and should be carefully scrutinized if ever needed).

rproc_report_crash() can be called from any context; it offloads the
tasks of handling the crash to a separate thread.

Handling the crash from a separate thread is helpful because:
- Ability to call invoke rproc_report_crash() from atomic context, due to
  the fact that many crashes trigger an interrupt, so this function can be
  called directly from ISR context.
- Avoiding deadlocks which could happen if rproc_report_crash() is called
  from a function which indirectly holds the rproc lock.

Handling the crash might involve:
- Remoteproc register dump
- Remoteproc stack dump
- Remoteproc core dump
- Saving Remoteproc traces so they can be read after the crash
- Reseting the remoteproc in order to make it functional again (hard recovery)

Right now, we only print the crash type which was detected, and only the
mmufault type is supported. Remoteproc low-level drivers can add more types
when needed.

Signed-off-by: Fernando Guzman Lugo <fernando.lugo@ti.com>
[ohad: some commentary, white space and commit log changes]
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
  • Loading branch information
Fernando Guzman Lugo authored and ohadbc committed Sep 18, 2012
1 parent a1a7e0a commit 8afd519
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 4 deletions.
7 changes: 7 additions & 0 deletions Documentation/remoteproc.txt
Expand Up @@ -129,6 +129,13 @@ int dummy_rproc_example(struct rproc *my_rproc)

Returns 0 on success and -EINVAL if @rproc isn't valid.

void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
- Report a crash in a remoteproc
This function must be called every time a crash is detected by the
platform specific rproc implementation. This should not be called from a
non-remoteproc driver. This function can be called from atomic/interrupt
context.

5. Implementation callbacks

These callbacks should be provided by platform-specific remoteproc
Expand Down
79 changes: 75 additions & 4 deletions drivers/remoteproc/remoteproc_core.c
Expand Up @@ -50,25 +50,38 @@ typedef int (*rproc_handle_resource_t)(struct rproc *rproc, void *, int avail);
/* Unique indices for remoteproc devices */
static DEFINE_IDA(rproc_dev_index);

static const char * const rproc_crash_names[] = {
[RPROC_MMUFAULT] = "mmufault",
};

/* translate rproc_crash_type to string */
static const char *rproc_crash_to_string(enum rproc_crash_type type)
{
if (type < ARRAY_SIZE(rproc_crash_names))
return rproc_crash_names[type];
return "unkown";
}

/*
* This is the IOMMU fault handler we register with the IOMMU API
* (when relevant; not all remote processors access memory through
* an IOMMU).
*
* IOMMU core will invoke this handler whenever the remote processor
* will try to access an unmapped device address.
*
* Currently this is mostly a stub, but it will be later used to trigger
* the recovery of the remote processor.
*/
static int rproc_iommu_fault(struct iommu_domain *domain, struct device *dev,
unsigned long iova, int flags, void *token)
{
struct rproc *rproc = token;

dev_err(dev, "iommu fault: da 0x%lx flags 0x%x\n", iova, flags);

rproc_report_crash(rproc, RPROC_MMUFAULT);

/*
* Let the iommu core know we're not really handling this fault;
* we just plan to use this as a recovery trigger.
* we just used it as a recovery trigger.
*/
return -ENOSYS;
}
Expand Down Expand Up @@ -871,6 +884,36 @@ static void rproc_fw_config_virtio(const struct firmware *fw, void *context)
complete_all(&rproc->firmware_loading_complete);
}

/**
* rproc_crash_handler_work() - handle a crash
*
* This function needs to handle everything related to a crash, like cpu
* registers and stack dump, information to help to debug the fatal error, etc.
*/
static void rproc_crash_handler_work(struct work_struct *work)
{
struct rproc *rproc = container_of(work, struct rproc, crash_handler);
struct device *dev = &rproc->dev;

dev_dbg(dev, "enter %s\n", __func__);

mutex_lock(&rproc->lock);

if (rproc->state == RPROC_CRASHED || rproc->state == RPROC_OFFLINE) {
/* handle only the first crash detected */
mutex_unlock(&rproc->lock);
return;
}

rproc->state = RPROC_CRASHED;
dev_err(dev, "handling crash #%u in %s\n", ++rproc->crash_cnt,
rproc->name);

mutex_unlock(&rproc->lock);

/* TODO: handle crash */
}

/**
* rproc_boot() - boot a remote processor
* @rproc: handle of a remote processor
Expand Down Expand Up @@ -1165,6 +1208,8 @@ struct rproc *rproc_alloc(struct device *dev, const char *name,
INIT_LIST_HEAD(&rproc->traces);
INIT_LIST_HEAD(&rproc->rvdevs);

INIT_WORK(&rproc->crash_handler, rproc_crash_handler_work);

rproc->state = RPROC_OFFLINE;

return rproc;
Expand Down Expand Up @@ -1221,6 +1266,32 @@ int rproc_del(struct rproc *rproc)
}
EXPORT_SYMBOL(rproc_del);

/**
* rproc_report_crash() - rproc crash reporter function
* @rproc: remote processor
* @type: crash type
*
* This function must be called every time a crash is detected by the low-level
* drivers implementing a specific remoteproc. This should not be called from a
* non-remoteproc driver.
*
* This function can be called from atomic/interrupt context.
*/
void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
{
if (!rproc) {
pr_err("NULL rproc pointer\n");
return;
}

dev_err(&rproc->dev, "crash detected in %s: type %s\n",
rproc->name, rproc_crash_to_string(type));

/* create a new task to handle the error */
schedule_work(&rproc->crash_handler);
}
EXPORT_SYMBOL(rproc_report_crash);

static int __init remoteproc_init(void)
{
rproc_init_debugfs();
Expand Down
18 changes: 18 additions & 0 deletions include/linux/remoteproc.h
Expand Up @@ -360,6 +360,19 @@ enum rproc_state {
RPROC_LAST = 4,
};

/**
* enum rproc_crash_type - remote processor crash types
* @RPROC_MMUFAULT: iommu fault
*
* Each element of the enum is used as an array index. So that, the value of
* the elements should be always something sane.
*
* Feel free to add more types when needed.
*/
enum rproc_crash_type {
RPROC_MMUFAULT,
};

/**
* struct rproc - represents a physical remote processor device
* @node: klist node of this rproc object
Expand All @@ -383,6 +396,8 @@ enum rproc_state {
* @rvdevs: list of remote virtio devices
* @notifyids: idr for dynamically assigning rproc-wide unique notify ids
* @index: index of this rproc device
* @crash_handler: workqueue for handling a crash
* @crash_cnt: crash counter
*/
struct rproc {
struct klist_node node;
Expand All @@ -406,6 +421,8 @@ struct rproc {
struct list_head rvdevs;
struct idr notifyids;
int index;
struct work_struct crash_handler;
unsigned crash_cnt;
};

/* we currently support only two vrings per rvdev */
Expand Down Expand Up @@ -460,6 +477,7 @@ int rproc_del(struct rproc *rproc);

int rproc_boot(struct rproc *rproc);
void rproc_shutdown(struct rproc *rproc);
void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);

static inline struct rproc_vdev *vdev_to_rvdev(struct virtio_device *vdev)
{
Expand Down

0 comments on commit 8afd519

Please sign in to comment.