Skip to content

Commit

Permalink
extend lifetimes for hoisted used in loop
Browse files Browse the repository at this point in the history
This makes the register recycling checks a bit more
precise.  At head we never recycle a register that's
holding a hoisted value, which is overly conservative.

We really should never recycle a register that's still
needed.  By extending the lifetime of any hoisted value
that's used in the loop, we prevent that, while still
allowing hoisted values that are only used in hoisted
computation to be reused.

This takes just a small tweak in the JIT code (removing
the !hoisted({x,y,z}) checks), and a somewhat larger
refactoring in the interpreter, making both hoisted and
non-hoisted code go through the same recycling register
assignment flow.

There's one diff in the existing cases where we now
reuse a hoisted register, and I've added a second test
just to make sure it's covered explicitly.

Change-Id: I25b37ab1f1fea3042d7fd167529abc8fed1dddff
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/233239
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
  • Loading branch information
Mike Klein authored and Skia Commit-Bot committed Aug 13, 2019
1 parent 5b2f04c commit f996311
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 53 deletions.
53 changes: 35 additions & 18 deletions resources/SkVMTest.expected
Original file line number Diff line number Diff line change
Expand Up @@ -229,29 +229,29 @@ G8 over G8
v20 = to_i32 v19
store8 arg(1) v20

10 registers, 21 instructions:
9 registers, 21 instructions:
r0 = splat 3B808081 (0.0039215689)
r1 = splat 3F800000 (1)
r2 = sub_f32 r1 r1
r3 = splat 3E59B3D0 (0.21259999)
r4 = splat 3F371759 (0.71520001)
r5 = splat 3D93DD98 (0.0722)
r6 = splat 437F0000 (255)
r7 = splat 3F000000 (0.5)
r1 = sub_f32 r1 r1
r2 = splat 3E59B3D0 (0.21259999)
r3 = splat 3F371759 (0.71520001)
r4 = splat 3D93DD98 (0.0722)
r5 = splat 437F0000 (255)
r6 = splat 3F000000 (0.5)
loop:
r8 = load8 arg(0)
r7 = load8 arg(0)
r7 = to_f32 r7
r7 = mul_f32 r0 r7
r8 = load8 arg(1)
r8 = to_f32 r8
r8 = mul_f32 r0 r8
r9 = load8 arg(1)
r9 = to_f32 r9
r9 = mul_f32 r0 r9
r8 = mad_f32 r9 r2 r8
r9 = mul_f32 r8 r5
r9 = mad_f32 r8 r4 r9
r9 = mad_f32 r8 r3 r9
r9 = mad_f32 r9 r6 r7
r9 = to_i32 r9
store8 arg(1) r9
r7 = mad_f32 r8 r1 r7
r8 = mul_f32 r7 r4
r8 = mad_f32 r7 r3 r8
r8 = mad_f32 r7 r2 r8
r8 = mad_f32 r8 r5 r6
r8 = to_i32 r8
store8 arg(1) r8

G8 over RGBA_8888
38 values:
Expand Down Expand Up @@ -729,3 +729,20 @@ r3 = bit_or r5 r3
r3 = add_i32 r2 r3
store32 arg(1) r3

6 values:
↑ v0 = splat 1 (1.4012985e-45)
↑ v1 = splat 2 (2.8025969e-45)
↑ v2 = add_i32 v0 v1
v3 = load32 arg(0)
v4 = mul_i32 v3 v2
store32 arg(0) v4

2 registers, 6 instructions:
r0 = splat 1 (1.4012985e-45)
r1 = splat 2 (2.8025969e-45)
r1 = add_i32 r0 r1
loop:
r0 = load32 arg(0)
r0 = mul_i32 r0 r1
store32 arg(0) r0

85 changes: 50 additions & 35 deletions src/core/SkVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@ namespace skvm {
if (inst.y != NA) { inst.hoist &= fProgram[inst.y].hoist; }
if (inst.z != NA) { inst.hoist &= fProgram[inst.z].hoist; }
}

// Any hoisted values used inside the loop need to live forever.
if (!inst.hoist) {
auto make_immortal = [&](Val arg) {
if (fProgram[arg].death != 0) {
fProgram[arg].death = (Val)fProgram.size();
}
};
if (inst.x != NA && fProgram[inst.x].hoist) { make_immortal(inst.x); }
if (inst.y != NA && fProgram[inst.y].hoist) { make_immortal(inst.y); }
if (inst.z != NA && fProgram[inst.z].hoist) { make_immortal(inst.z); }
}
}

return {fProgram, fStrides, debug_name};
Expand Down Expand Up @@ -1107,51 +1119,54 @@ namespace skvm {
// This next bit is a bit more complicated than strictly necessary;
// we could just assign every live instruction to its own register.
//
// But recycling registers in the loop is fairly cheap, and good practice
// for the JITs where minimizing register pressure really is important.
// (Also helps minimize unit test diffs.)
// But recycling registers is fairly cheap, and good practice for the
// JITs where minimizing register pressure really is important.

// Assign a register to each live hoisted instruction. We'll never recycle these.
fRegs = 0;
int live_instructions = 0;
std::vector<Reg> avail;

// Assign this value to a register, recycling them where we can.
auto assign_register = [&](Val id) {
live_instructions++;
const Builder::Instruction& inst = instructions[id];

// If this is a real input and it's lifetime ends at this instruction,
// we can recycle the register it's occupying.
auto maybe_recycle_register = [&](Val input) {
if (input != NA && instructions[input].death == id) {
avail.push_back(reg[input]);
}
};

// Take care to not recycle the same register twice.
if (true ) { maybe_recycle_register(inst.x); }
if (inst.y != inst.x ) { maybe_recycle_register(inst.y); }
if (inst.z != inst.x && inst.z != inst.y) { maybe_recycle_register(inst.z); }

// Allocate a register if we have to, preferring to reuse anything available.
if (avail.empty()) {
reg[id] = fRegs++;
} else {
reg[id] = avail.back();
avail.pop_back();
}
};

// Assign a register to each live hoisted instruction.
for (Val id = 0; id < (Val)instructions.size(); id++) {
const Builder::Instruction& inst = instructions[id];
if (inst.death != 0 && inst.hoist) {
live_instructions++;
reg[id] = fRegs++;
assign_register(id);
}
}

// Assign registers to each live loop instruction, recycling them when we can.
std::vector<Reg> avail;
// Assign registers to each live loop instruction.
for (Val id = 0; id < (Val)instructions.size(); id++) {
const Builder::Instruction& inst = instructions[id];
if (inst.death != 0 && !inst.hoist) {
live_instructions++;

/// If an instruction's input is no longer live, we can recycle its register.
auto maybe_recycle_register = [&](Val input) {
// If this is a real input and it's lifetime ends at this instruction,
// we can recycle the register it's occupying.
if (input != NA
&& !instructions[input].hoist
&& instructions[input].death == id) {
avail.push_back(reg[input]);
}
};
assign_register(id);

// Take care to not recycle the same register twice.
if (true ) { maybe_recycle_register(inst.x); }
if (inst.y != inst.x ) { maybe_recycle_register(inst.y); }
if (inst.z != inst.x && inst.z != inst.y) { maybe_recycle_register(inst.z); }

// Allocate a register if we have to, preferring to reuse anything available.
if (avail.empty()) {
reg[id] = fRegs++;
} else {
reg[id] = avail.back();
avail.pop_back();
}
}
}

Expand Down Expand Up @@ -1359,9 +1374,9 @@ namespace skvm {

// Now make available any registers that are consumed by this instruction.
// (The register pool we can pick dst from is >= the pool for tmp, adding any of these.)
if (x != NA && instructions[x].death == id && !hoisted(x)) { avail |= 1 << r[x]; }
if (y != NA && instructions[y].death == id && !hoisted(y)) { avail |= 1 << r[y]; }
if (z != NA && instructions[z].death == id && !hoisted(z)) { avail |= 1 << r[z]; }
if (x != NA && instructions[x].death == id) { avail |= 1 << r[x]; }
if (y != NA && instructions[y].death == id) { avail |= 1 << r[y]; }
if (z != NA && instructions[z].death == id) { avail |= 1 << r[z]; }
// set_dst() and dst() will work read/write with this perhaps-just-updated avail.

// Some ops may decide dst on their own to best fit the instruction (see Op::mad_f32).
Expand Down
35 changes: 35 additions & 0 deletions tests/SkVMTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,41 @@ DEF_TEST(SkVM, r) {
dump(builder, &buf);
}

{
skvm::Builder b;
skvm::Arg arg = b.varying<int>();

// x and y can both be hoisted,
// and x can die at y, while y lives forever.
skvm::I32 x = b.splat(1),
y = b.add(x, b.splat(2));
b.store32(arg, b.mul(b.load32(arg), y));

skvm::Program program = b.done();
REPORTER_ASSERT(r, program.nregs() == 2);

std::vector<skvm::Builder::Instruction> insts = b.program();
REPORTER_ASSERT(r, insts.size() == 6);
REPORTER_ASSERT(r, insts[0].hoist && insts[0].death == 2);
REPORTER_ASSERT(r, insts[1].hoist && insts[1].death == 2);
REPORTER_ASSERT(r, insts[2].hoist && insts[2].death == 6);
REPORTER_ASSERT(r, !insts[3].hoist);
REPORTER_ASSERT(r, !insts[4].hoist);
REPORTER_ASSERT(r, !insts[5].hoist);

dump(b, &buf);

test_jit_and_interpreter(std::move(program), [&](const skvm::Program& program) {
int arg[] = {0,1,2,3,4,5,6,7,8,9};

program.eval(SK_ARRAY_COUNT(arg), arg);

for (int i = 0; i < (int)SK_ARRAY_COUNT(arg); i++) {
REPORTER_ASSERT(r, arg[i] == i*3);
}
});
}

sk_sp<SkData> blob = buf.detachAsData();
{

Expand Down

0 comments on commit f996311

Please sign in to comment.