@@ -197,6 +197,13 @@ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
197197 if (amdgpu_ras_query_error_status (obj -> adev , & info ))
198198 return - EINVAL ;
199199
200+ /* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */
201+ if (obj -> adev -> ip_versions [MP0_HWIP ][0 ] != IP_VERSION (11 , 0 , 2 ) &&
202+ obj -> adev -> ip_versions [MP0_HWIP ][0 ] != IP_VERSION (11 , 0 , 4 )) {
203+ if (amdgpu_ras_reset_error_status (obj -> adev , info .head .block ))
204+ dev_warn (obj -> adev -> dev , "Failed to reset error counter and error status" );
205+ }
206+
200207 s = snprintf (val , sizeof (val ), "%s: %lu\n%s: %lu\n" ,
201208 "ue" , info .ue_count ,
202209 "ce" , info .ce_count );
@@ -550,9 +557,10 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
550557 if (amdgpu_ras_query_error_status (obj -> adev , & info ))
551558 return - EINVAL ;
552559
553- if (obj -> adev -> asic_type == CHIP_ALDEBARAN ) {
560+ if (obj -> adev -> ip_versions [MP0_HWIP ][0 ] != IP_VERSION (11 , 0 , 2 ) &&
561+ obj -> adev -> ip_versions [MP0_HWIP ][0 ] != IP_VERSION (11 , 0 , 4 )) {
554562 if (amdgpu_ras_reset_error_status (obj -> adev , info .head .block ))
555- DRM_WARN ( "Failed to reset error counter and error status" );
563+ dev_warn ( obj -> adev -> dev , "Failed to reset error counter and error status" );
556564 }
557565
558566 return sysfs_emit (buf , "%s: %lu\n%s: %lu\n" , "ue" , info .ue_count ,
@@ -1027,9 +1035,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
10271035 }
10281036 }
10291037
1030- if (!amdgpu_persistent_edc_harvesting_supported (adev ))
1031- amdgpu_ras_reset_error_status (adev , info -> head .block );
1032-
10331038 return 0 ;
10341039}
10351040
@@ -1149,6 +1154,12 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,
11491154 if (res )
11501155 return res ;
11511156
1157+ if (adev -> ip_versions [MP0_HWIP ][0 ] != IP_VERSION (11 , 0 , 2 ) &&
1158+ adev -> ip_versions [MP0_HWIP ][0 ] != IP_VERSION (11 , 0 , 4 )) {
1159+ if (amdgpu_ras_reset_error_status (adev , info .head .block ))
1160+ dev_warn (adev -> dev , "Failed to reset error counter and error status" );
1161+ }
1162+
11521163 ce += info .ce_count ;
11531164 ue += info .ue_count ;
11541165 }
@@ -1792,6 +1803,12 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
17921803 continue ;
17931804
17941805 amdgpu_ras_query_error_status (adev , & info );
1806+
1807+ if (adev -> ip_versions [MP0_HWIP ][0 ] != IP_VERSION (11 , 0 , 2 ) &&
1808+ adev -> ip_versions [MP0_HWIP ][0 ] != IP_VERSION (11 , 0 , 4 )) {
1809+ if (amdgpu_ras_reset_error_status (adev , info .head .block ))
1810+ dev_warn (adev -> dev , "Failed to reset error counter and error status" );
1811+ }
17951812 }
17961813}
17971814
0 commit comments