@@ -112,6 +112,14 @@ static void ivpu_pm_recovery_work(struct work_struct *work)
112112 char * evt [2 ] = {"IVPU_PM_EVENT=IVPU_RECOVER" , NULL };
113113 int ret ;
114114
115+ ivpu_err (vdev , "Recovering the VPU (reset #%d)\n" , atomic_read (& vdev -> pm -> reset_counter ));
116+
117+ ret = pm_runtime_resume_and_get (vdev -> drm .dev );
118+ if (ret )
119+ ivpu_err (vdev , "Failed to resume VPU: %d\n" , ret );
120+
121+ ivpu_fw_log_dump (vdev );
122+
115123retry :
116124 ret = pci_try_reset_function (to_pci_dev (vdev -> drm .dev ));
117125 if (ret == - EAGAIN && !drm_dev_is_unplugged (& vdev -> drm )) {
@@ -123,11 +131,13 @@ static void ivpu_pm_recovery_work(struct work_struct *work)
123131 ivpu_err (vdev , "Failed to reset VPU: %d\n" , ret );
124132
125133 kobject_uevent_env (& vdev -> drm .dev -> kobj , KOBJ_CHANGE , evt );
134+ pm_runtime_mark_last_busy (vdev -> drm .dev );
135+ pm_runtime_put_autosuspend (vdev -> drm .dev );
126136}
127137
128- void ivpu_pm_schedule_recovery (struct ivpu_device * vdev )
138+ void ivpu_pm_trigger_recovery (struct ivpu_device * vdev , const char * reason )
129139{
130- struct ivpu_pm_info * pm = vdev -> pm ;
140+ ivpu_err ( vdev , "Recovery triggered by %s\n" , reason ) ;
131141
132142 if (ivpu_disable_recovery ) {
133143 ivpu_err (vdev , "Recovery not available when disable_recovery param is set\n" );
@@ -139,23 +149,20 @@ void ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
139149 return ;
140150 }
141151
142- /* Schedule recovery if it's not in progress */
143- if (atomic_cmpxchg (& pm -> in_reset , 0 , 1 ) == 0 ) {
144- ivpu_hw_irq_disable (vdev );
145- queue_work (system_long_wq , & pm -> recovery_work );
152+ /* Trigger recovery if it's not in progress */
153+ if (atomic_cmpxchg (& vdev -> pm -> reset_pending , 0 , 1 ) == 0 ) {
154+ ivpu_hw_diagnose_failure (vdev );
155+ ivpu_hw_irq_disable (vdev ); /* Disable IRQ early to protect from IRQ storm */
156+ queue_work (system_long_wq , & vdev -> pm -> recovery_work );
146157 }
147158}
148159
149160static void ivpu_job_timeout_work (struct work_struct * work )
150161{
151162 struct ivpu_pm_info * pm = container_of (work , struct ivpu_pm_info , job_timeout_work .work );
152163 struct ivpu_device * vdev = pm -> vdev ;
153- unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev -> timeout .tdr ;
154164
155- ivpu_err (vdev , "TDR detected, timeout %lu ms" , timeout_ms );
156- ivpu_hw_diagnose_failure (vdev );
157-
158- ivpu_pm_schedule_recovery (vdev );
165+ ivpu_pm_trigger_recovery (vdev , "TDR" );
159166}
160167
161168void ivpu_start_job_timeout_detection (struct ivpu_device * vdev )
@@ -228,6 +235,9 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
228235 bool hw_is_idle = true;
229236 int ret ;
230237
238+ drm_WARN_ON (& vdev -> drm , !xa_empty (& vdev -> submitted_jobs_xa ));
239+ drm_WARN_ON (& vdev -> drm , work_pending (& vdev -> pm -> recovery_work ));
240+
231241 ivpu_dbg (vdev , PM , "Runtime suspend..\n" );
232242
233243 if (!ivpu_hw_is_idle (vdev ) && vdev -> pm -> suspend_reschedule_counter ) {
@@ -310,11 +320,12 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev)
310320{
311321 struct ivpu_device * vdev = pci_get_drvdata (pdev );
312322
313- pm_runtime_get_sync (vdev -> drm .dev );
314-
315323 ivpu_dbg (vdev , PM , "Pre-reset..\n" );
316324 atomic_inc (& vdev -> pm -> reset_counter );
317- atomic_set (& vdev -> pm -> in_reset , 1 );
325+ atomic_set (& vdev -> pm -> reset_pending , 1 );
326+
327+ pm_runtime_get_sync (vdev -> drm .dev );
328+ down_write (& vdev -> pm -> reset_lock );
318329 ivpu_prepare_for_reset (vdev );
319330 ivpu_hw_reset (vdev );
320331 ivpu_pm_prepare_cold_boot (vdev );
@@ -331,9 +342,11 @@ void ivpu_pm_reset_done_cb(struct pci_dev *pdev)
331342 ret = ivpu_resume (vdev );
332343 if (ret )
333344 ivpu_err (vdev , "Failed to set RESUME state: %d\n" , ret );
334- atomic_set (& vdev -> pm -> in_reset , 0 );
345+ up_write (& vdev -> pm -> reset_lock );
346+ atomic_set (& vdev -> pm -> reset_pending , 0 );
335347 ivpu_dbg (vdev , PM , "Post-reset done.\n" );
336348
349+ pm_runtime_mark_last_busy (vdev -> drm .dev );
337350 pm_runtime_put_autosuspend (vdev -> drm .dev );
338351}
339352
@@ -346,7 +359,10 @@ void ivpu_pm_init(struct ivpu_device *vdev)
346359 pm -> vdev = vdev ;
347360 pm -> suspend_reschedule_counter = PM_RESCHEDULE_LIMIT ;
348361
349- atomic_set (& pm -> in_reset , 0 );
362+ init_rwsem (& pm -> reset_lock );
363+ atomic_set (& pm -> reset_pending , 0 );
364+ atomic_set (& pm -> reset_counter , 0 );
365+
350366 INIT_WORK (& pm -> recovery_work , ivpu_pm_recovery_work );
351367 INIT_DELAYED_WORK (& pm -> job_timeout_work , ivpu_job_timeout_work );
352368
0 commit comments