Skip to content

Commit 2f1f570

Browse files
crojewsk-intelbroonie
authored andcommitted
ASoC: Intel: avs: Coredump and recovery flow
In rare occasions, under stress conditions or hardware malfunction, DSP firmware may fail. Software is notified about such situation with EXCEPTION_CAUGHT notification. IPC timeout is also counted as critical device failure. More often than not, driver can recover from such situations by performing full reset: killing and restarting ADSP. Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com> Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com> Link: https://lore.kernel.org/r/20220516101116.190192-7-cezary.rojewski@intel.com Signed-off-by: Mark Brown <broonie@kernel.org>
1 parent d070002 commit 2f1f570

File tree

4 files changed

+105
-2
lines changed

4 files changed

+105
-2
lines changed

sound/soc/intel/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ config SND_SOC_INTEL_AVS
219219
select SND_HDA_EXT_CORE
220220
select SND_HDA_DSP_LOADER
221221
select SND_INTEL_DSP_CONFIG
222+
select WANT_DEV_COREDUMP
222223
help
223224
Enable support for Intel(R) cAVS 1.5 platforms with DSP
224225
capabilities. This includes Skylake, Kabylake, Amberlake and

sound/soc/intel/avs/avs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ struct avs_dsp_ops {
4242
int (* const load_basefw)(struct avs_dev *, struct firmware *);
4343
int (* const load_lib)(struct avs_dev *, struct firmware *, u32);
4444
int (* const transfer_mods)(struct avs_dev *, bool, struct avs_module_entry *, u32);
45+
int (* const coredump)(struct avs_dev *, union avs_notify_msg *);
4546
};
4647

4748
#define avs_dsp_op(adev, op, ...) \
@@ -164,12 +165,15 @@ struct avs_ipc {
164165
struct avs_ipc_msg rx;
165166
u32 default_timeout_ms;
166167
bool ready;
168+
atomic_t recovering;
167169

168170
bool rx_completed;
169171
spinlock_t rx_lock;
170172
struct mutex msg_mutex;
171173
struct completion done_completion;
172174
struct completion busy_completion;
175+
176+
struct work_struct recovery_work;
173177
};
174178

175179
#define AVS_EIPC EREMOTEIO

sound/soc/intel/avs/ipc.c

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,89 @@
1414

1515
#define AVS_IPC_TIMEOUT_MS 300
1616

17+
static void avs_dsp_recovery(struct avs_dev *adev)
18+
{
19+
struct avs_soc_component *acomp;
20+
unsigned int core_mask;
21+
int ret;
22+
23+
mutex_lock(&adev->comp_list_mutex);
24+
/* disconnect all running streams */
25+
list_for_each_entry(acomp, &adev->comp_list, node) {
26+
struct snd_soc_pcm_runtime *rtd;
27+
struct snd_soc_card *card;
28+
29+
card = acomp->base.card;
30+
if (!card)
31+
continue;
32+
33+
for_each_card_rtds(card, rtd) {
34+
struct snd_pcm *pcm;
35+
int dir;
36+
37+
pcm = rtd->pcm;
38+
if (!pcm || rtd->dai_link->no_pcm)
39+
continue;
40+
41+
for_each_pcm_streams(dir) {
42+
struct snd_pcm_substream *substream;
43+
44+
substream = pcm->streams[dir].substream;
45+
if (!substream || !substream->runtime)
46+
continue;
47+
48+
snd_pcm_stop(substream, SNDRV_PCM_STATE_DISCONNECTED);
49+
}
50+
}
51+
}
52+
mutex_unlock(&adev->comp_list_mutex);
53+
54+
/* forcibly shutdown all cores */
55+
core_mask = GENMASK(adev->hw_cfg.dsp_cores - 1, 0);
56+
avs_dsp_core_disable(adev, core_mask);
57+
58+
/* attempt dsp reboot */
59+
ret = avs_dsp_boot_firmware(adev, true);
60+
if (ret < 0)
61+
dev_err(adev->dev, "dsp reboot failed: %d\n", ret);
62+
63+
pm_runtime_mark_last_busy(adev->dev);
64+
pm_runtime_enable(adev->dev);
65+
pm_request_autosuspend(adev->dev);
66+
67+
atomic_set(&adev->ipc->recovering, 0);
68+
}
69+
70+
static void avs_dsp_recovery_work(struct work_struct *work)
71+
{
72+
struct avs_ipc *ipc = container_of(work, struct avs_ipc, recovery_work);
73+
74+
avs_dsp_recovery(to_avs_dev(ipc->dev));
75+
}
76+
77+
static void avs_dsp_exception_caught(struct avs_dev *adev, union avs_notify_msg *msg)
78+
{
79+
struct avs_ipc *ipc = adev->ipc;
80+
81+
/* Account for the double-exception case. */
82+
ipc->ready = false;
83+
84+
if (!atomic_add_unless(&ipc->recovering, 1, 1)) {
85+
dev_err(adev->dev, "dsp recovery is already in progress\n");
86+
return;
87+
}
88+
89+
dev_crit(adev->dev, "communication severed, rebooting dsp..\n");
90+
91+
/* Re-enabled on recovery completion. */
92+
pm_runtime_disable(adev->dev);
93+
94+
/* Process received notification. */
95+
avs_dsp_op(adev, coredump, msg);
96+
97+
schedule_work(&ipc->recovery_work);
98+
}
99+
17100
static void avs_dsp_receive_rx(struct avs_dev *adev, u64 header)
18101
{
19102
struct avs_ipc *ipc = adev->ipc;
@@ -57,6 +140,9 @@ static void avs_dsp_process_notification(struct avs_dev *adev, u64 header)
57140
data_size = sizeof(struct avs_notify_res_data);
58141
break;
59142

143+
case AVS_NOTIFY_EXCEPTION_CAUGHT:
144+
break;
145+
60146
case AVS_NOTIFY_MODULE_EVENT:
61147
/* To know the total payload size, header needs to be read first. */
62148
memcpy_fromio(&mod_data, avs_uplink_addr(adev), sizeof(mod_data));
@@ -84,6 +170,10 @@ static void avs_dsp_process_notification(struct avs_dev *adev, u64 header)
84170
complete(&adev->fw_ready);
85171
break;
86172

173+
case AVS_NOTIFY_EXCEPTION_CAUGHT:
174+
avs_dsp_exception_caught(adev, &msg);
175+
break;
176+
87177
default:
88178
break;
89179
}
@@ -278,9 +368,10 @@ static int avs_dsp_do_send_msg(struct avs_dev *adev, struct avs_ipc_msg *request
278368
ret = avs_ipc_wait_busy_completion(ipc, timeout);
279369
if (ret) {
280370
if (ret == -ETIMEDOUT) {
281-
dev_crit(adev->dev, "communication severed: %d, rebooting dsp..\n", ret);
371+
union avs_notify_msg msg = AVS_NOTIFICATION(EXCEPTION_CAUGHT);
282372

283-
avs_ipc_block(ipc);
373+
/* Same treatment as on exception, just stack_dump=0. */
374+
avs_dsp_exception_caught(adev, &msg);
284375
}
285376
goto exit;
286377
}
@@ -368,6 +459,7 @@ int avs_ipc_init(struct avs_ipc *ipc, struct device *dev)
368459
ipc->dev = dev;
369460
ipc->ready = false;
370461
ipc->default_timeout_ms = AVS_IPC_TIMEOUT_MS;
462+
INIT_WORK(&ipc->recovery_work, avs_dsp_recovery_work);
371463
init_completion(&ipc->done_completion);
372464
init_completion(&ipc->busy_completion);
373465
spin_lock_init(&ipc->rx_lock);
@@ -379,4 +471,5 @@ int avs_ipc_init(struct avs_ipc *ipc, struct device *dev)
379471
void avs_ipc_block(struct avs_ipc *ipc)
380472
{
381473
ipc->ready = false;
474+
cancel_work_sync(&ipc->recovery_work);
382475
}

sound/soc/intel/avs/messages.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ enum avs_notify_msg_type {
187187
AVS_NOTIFY_PHRASE_DETECTED = 4,
188188
AVS_NOTIFY_RESOURCE_EVENT = 5,
189189
AVS_NOTIFY_FW_READY = 8,
190+
AVS_NOTIFY_EXCEPTION_CAUGHT = 10,
190191
AVS_NOTIFY_MODULE_EVENT = 12,
191192
};
192193

@@ -205,6 +206,10 @@ union avs_notify_msg {
205206
};
206207
union {
207208
u32 val;
209+
struct {
210+
u32 core_id:2;
211+
u32 stack_dump_size:16;
212+
} coredump;
208213
} ext;
209214
};
210215
} __packed;

0 commit comments

Comments
 (0)