Skip to content

Commit 3a4bfa0

Browse files
singhra1johnharr-intel
authored andcommitted
drm/i915/selftest: Fix workarounds selftest for GuC submission
When GuC submission is enabled, the GuC controls engine resets. Rather than explicitly triggering a reset, the driver must submit a hanging context to GuC and wait for the reset to occur. Signed-off-by: Rahul Kumar Singh <rahul.kumar.singh@intel.com> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Signed-off-by: Matthew Brost <matthew.brost@intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-28-matthew.brost@intel.com
1 parent 3f5dff6 commit 3a4bfa0

File tree

6 files changed

+203
-34
lines changed

6 files changed

+203
-34
lines changed

drivers/gpu/drm/i915/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
280280
i915-$(CONFIG_DRM_I915_SELFTEST) += \
281281
gem/selftests/i915_gem_client_blt.o \
282282
gem/selftests/igt_gem_utils.o \
283+
selftests/intel_scheduler_helpers.o \
283284
selftests/i915_random.o \
284285
selftests/i915_selftest.o \
285286
selftests/igt_atomic.o \

drivers/gpu/drm/i915/gt/intel_engine_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ struct intel_engine_cs {
443443
#define I915_ENGINE_IS_VIRTUAL BIT(5)
444444
#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
445445
#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
446+
#define I915_ENGINE_WANT_FORCED_PREEMPTION BIT(8)
446447
unsigned int flags;
447448

448449
/*

drivers/gpu/drm/i915/gt/selftest_workarounds.c

Lines changed: 96 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "selftests/igt_flush_test.h"
1313
#include "selftests/igt_reset.h"
1414
#include "selftests/igt_spinner.h"
15+
#include "selftests/intel_scheduler_helpers.h"
1516
#include "selftests/mock_drm.h"
1617

1718
#include "gem/selftests/igt_gem_utils.h"
@@ -261,28 +262,34 @@ static int do_engine_reset(struct intel_engine_cs *engine)
261262
return intel_engine_reset(engine, "live_workarounds");
262263
}
263264

265+
static int do_guc_reset(struct intel_engine_cs *engine)
266+
{
267+
/* Currently a no-op as the reset is handled by GuC */
268+
return 0;
269+
}
270+
264271
static int
265272
switch_to_scratch_context(struct intel_engine_cs *engine,
266-
struct igt_spinner *spin)
273+
struct igt_spinner *spin,
274+
struct i915_request **rq)
267275
{
268276
struct intel_context *ce;
269-
struct i915_request *rq;
270277
int err = 0;
271278

272279
ce = intel_context_create(engine);
273280
if (IS_ERR(ce))
274281
return PTR_ERR(ce);
275282

276-
rq = igt_spinner_create_request(spin, ce, MI_NOOP);
283+
*rq = igt_spinner_create_request(spin, ce, MI_NOOP);
277284
intel_context_put(ce);
278285

279-
if (IS_ERR(rq)) {
286+
if (IS_ERR(*rq)) {
280287
spin = NULL;
281-
err = PTR_ERR(rq);
288+
err = PTR_ERR(*rq);
282289
goto err;
283290
}
284291

285-
err = request_add_spin(rq, spin);
292+
err = request_add_spin(*rq, spin);
286293
err:
287294
if (err && spin)
288295
igt_spinner_end(spin);
@@ -296,6 +303,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
296303
{
297304
struct intel_context *ce, *tmp;
298305
struct igt_spinner spin;
306+
struct i915_request *rq;
299307
intel_wakeref_t wakeref;
300308
int err;
301309

@@ -316,13 +324,24 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
316324
goto out_spin;
317325
}
318326

319-
err = switch_to_scratch_context(engine, &spin);
327+
err = switch_to_scratch_context(engine, &spin, &rq);
320328
if (err)
321329
goto out_spin;
322330

331+
/* Ensure the spinner hasn't aborted */
332+
if (i915_request_completed(rq)) {
333+
pr_err("%s spinner failed to start\n", name);
334+
err = -ETIMEDOUT;
335+
goto out_spin;
336+
}
337+
323338
with_intel_runtime_pm(engine->uncore->rpm, wakeref)
324339
err = reset(engine);
325340

341+
/* Ensure the reset happens and kills the engine */
342+
if (err == 0)
343+
err = intel_selftest_wait_for_rq(rq);
344+
326345
igt_spinner_end(&spin);
327346

328347
if (err) {
@@ -787,9 +806,27 @@ static int live_reset_whitelist(void *arg)
787806
continue;
788807

789808
if (intel_has_reset_engine(gt)) {
790-
err = check_whitelist_across_reset(engine,
791-
do_engine_reset,
792-
"engine");
809+
if (intel_engine_uses_guc(engine)) {
810+
struct intel_selftest_saved_policy saved;
811+
int err2;
812+
813+
err = intel_selftest_modify_policy(engine, &saved);
814+
if (err)
815+
goto out;
816+
817+
err = check_whitelist_across_reset(engine,
818+
do_guc_reset,
819+
"guc");
820+
821+
err2 = intel_selftest_restore_policy(engine, &saved);
822+
if (err == 0)
823+
err = err2;
824+
} else {
825+
err = check_whitelist_across_reset(engine,
826+
do_engine_reset,
827+
"engine");
828+
}
829+
793830
if (err)
794831
goto out;
795832
}
@@ -1235,31 +1272,40 @@ live_engine_reset_workarounds(void *arg)
12351272
reference_lists_init(gt, lists);
12361273

12371274
for_each_engine(engine, gt, id) {
1275+
struct intel_selftest_saved_policy saved;
1276+
bool using_guc = intel_engine_uses_guc(engine);
12381277
bool ok;
1278+
int ret2;
12391279

12401280
pr_info("Verifying after %s reset...\n", engine->name);
1281+
ret = intel_selftest_modify_policy(engine, &saved);
1282+
if (ret)
1283+
break;
1284+
12411285
ce = intel_context_create(engine);
12421286
if (IS_ERR(ce)) {
12431287
ret = PTR_ERR(ce);
1244-
break;
1288+
goto restore;
12451289
}
12461290

1247-
ok = verify_wa_lists(gt, lists, "before reset");
1248-
if (!ok) {
1249-
ret = -ESRCH;
1250-
goto err;
1251-
}
1291+
if (!using_guc) {
1292+
ok = verify_wa_lists(gt, lists, "before reset");
1293+
if (!ok) {
1294+
ret = -ESRCH;
1295+
goto err;
1296+
}
12521297

1253-
ret = intel_engine_reset(engine, "live_workarounds:idle");
1254-
if (ret) {
1255-
pr_err("%s: Reset failed while idle\n", engine->name);
1256-
goto err;
1257-
}
1298+
ret = intel_engine_reset(engine, "live_workarounds:idle");
1299+
if (ret) {
1300+
pr_err("%s: Reset failed while idle\n", engine->name);
1301+
goto err;
1302+
}
12581303

1259-
ok = verify_wa_lists(gt, lists, "after idle reset");
1260-
if (!ok) {
1261-
ret = -ESRCH;
1262-
goto err;
1304+
ok = verify_wa_lists(gt, lists, "after idle reset");
1305+
if (!ok) {
1306+
ret = -ESRCH;
1307+
goto err;
1308+
}
12631309
}
12641310

12651311
ret = igt_spinner_init(&spin, engine->gt);
@@ -1280,25 +1326,41 @@ live_engine_reset_workarounds(void *arg)
12801326
goto err;
12811327
}
12821328

1283-
ret = intel_engine_reset(engine, "live_workarounds:active");
1284-
if (ret) {
1285-
pr_err("%s: Reset failed on an active spinner\n",
1286-
engine->name);
1287-
igt_spinner_fini(&spin);
1288-
goto err;
1329+
/* Ensure the spinner hasn't aborted */
1330+
if (i915_request_completed(rq)) {
1331+
ret = -ETIMEDOUT;
1332+
goto skip;
1333+
}
1334+
1335+
if (!using_guc) {
1336+
ret = intel_engine_reset(engine, "live_workarounds:active");
1337+
if (ret) {
1338+
pr_err("%s: Reset failed on an active spinner\n",
1339+
engine->name);
1340+
igt_spinner_fini(&spin);
1341+
goto err;
1342+
}
12891343
}
12901344

1345+
/* Ensure the reset happens and kills the engine */
1346+
if (ret == 0)
1347+
ret = intel_selftest_wait_for_rq(rq);
1348+
1349+
skip:
12911350
igt_spinner_end(&spin);
12921351
igt_spinner_fini(&spin);
12931352

12941353
ok = verify_wa_lists(gt, lists, "after busy reset");
1295-
if (!ok) {
1354+
if (!ok)
12961355
ret = -ESRCH;
1297-
goto err;
1298-
}
12991356

13001357
err:
13011358
intel_context_put(ce);
1359+
1360+
restore:
1361+
ret2 = intel_selftest_restore_policy(engine, &saved);
1362+
if (ret == 0)
1363+
ret = ret2;
13021364
if (ret)
13031365
break;
13041366
}

drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,6 +1252,9 @@ static void guc_context_policy_init(struct intel_engine_cs *engine,
12521252
{
12531253
desc->policy_flags = 0;
12541254

1255+
if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
1256+
desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE;
1257+
12551258
/* NB: For both of these, zero means disabled. */
12561259
desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
12571260
desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// SPDX-License-Identifier: MIT
2+
/*
3+
* Copyright © 2021 Intel Corporation
4+
*/
5+
6+
//#include "gt/intel_engine_user.h"
7+
#include "gt/intel_gt.h"
8+
#include "i915_drv.h"
9+
#include "i915_selftest.h"
10+
11+
#include "selftests/intel_scheduler_helpers.h"
12+
13+
#define REDUCED_TIMESLICE 5
14+
#define REDUCED_PREEMPT 10
15+
#define WAIT_FOR_RESET_TIME 1000
16+
17+
int intel_selftest_modify_policy(struct intel_engine_cs *engine,
18+
struct intel_selftest_saved_policy *saved)
19+
20+
{
21+
int err;
22+
23+
saved->reset = engine->i915->params.reset;
24+
saved->flags = engine->flags;
25+
saved->timeslice = engine->props.timeslice_duration_ms;
26+
saved->preempt_timeout = engine->props.preempt_timeout_ms;
27+
28+
/*
29+
* Enable force pre-emption on time slice expiration
30+
* together with engine reset on pre-emption timeout.
31+
* This is required to make the GuC notice and reset
32+
* the single hanging context.
33+
* Also, reduce the preemption timeout to something
34+
* small to speed the test up.
35+
*/
36+
engine->i915->params.reset = 2;
37+
engine->flags |= I915_ENGINE_WANT_FORCED_PREEMPTION;
38+
engine->props.timeslice_duration_ms = REDUCED_TIMESLICE;
39+
engine->props.preempt_timeout_ms = REDUCED_PREEMPT;
40+
41+
if (!intel_engine_uses_guc(engine))
42+
return 0;
43+
44+
err = intel_guc_global_policies_update(&engine->gt->uc.guc);
45+
if (err)
46+
intel_selftest_restore_policy(engine, saved);
47+
48+
return err;
49+
}
50+
51+
int intel_selftest_restore_policy(struct intel_engine_cs *engine,
52+
struct intel_selftest_saved_policy *saved)
53+
{
54+
/* Restore the original policies */
55+
engine->i915->params.reset = saved->reset;
56+
engine->flags = saved->flags;
57+
engine->props.timeslice_duration_ms = saved->timeslice;
58+
engine->props.preempt_timeout_ms = saved->preempt_timeout;
59+
60+
if (!intel_engine_uses_guc(engine))
61+
return 0;
62+
63+
return intel_guc_global_policies_update(&engine->gt->uc.guc);
64+
}
65+
66+
int intel_selftest_wait_for_rq(struct i915_request *rq)
67+
{
68+
long ret;
69+
70+
ret = i915_request_wait(rq, 0, WAIT_FOR_RESET_TIME);
71+
if (ret < 0)
72+
return ret;
73+
74+
return 0;
75+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/*
3+
* Copyright © 2021 Intel Corporation
4+
*/
5+
6+
#ifndef _INTEL_SELFTEST_SCHEDULER_HELPERS_H_
7+
#define _INTEL_SELFTEST_SCHEDULER_HELPERS_H_
8+
9+
#include <linux/types.h>
10+
11+
struct i915_request;
12+
struct intel_engine_cs;
13+
14+
struct intel_selftest_saved_policy {
15+
u32 flags;
16+
u32 reset;
17+
u64 timeslice;
18+
u64 preempt_timeout;
19+
};
20+
21+
int intel_selftest_modify_policy(struct intel_engine_cs *engine,
22+
struct intel_selftest_saved_policy *saved);
23+
int intel_selftest_restore_policy(struct intel_engine_cs *engine,
24+
struct intel_selftest_saved_policy *saved);
25+
int intel_selftest_wait_for_rq(struct i915_request *rq);
26+
27+
#endif

0 commit comments

Comments
 (0)