Skip to content

Commit

Permalink
amd/compiler: remove killed phi sgpr-operands from live vars between …
Browse files Browse the repository at this point in the history
…p_logical_end and block end.

As SSA elimination inserts parallelcopies at p_logical_end, SGPR operands might be dead already and can be re-used for RA
  • Loading branch information
daniel-schuermann committed Jul 9, 2019
1 parent a34cf70 commit 1f6010d
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 2 deletions.
12 changes: 10 additions & 2 deletions src/amd/compiler/aco_live_var_analysis.cpp
Expand Up @@ -33,7 +33,8 @@
namespace aco {
namespace {

void process_live_temps_per_block(Program *program, live& lives, Block* block, std::set<unsigned>& worklist)
void process_live_temps_per_block(Program *program, live& lives, Block* block,
std::set<unsigned>& worklist, std::vector<uint16_t>& phi_sgpr_ops)
{
std::vector<std::pair<uint16_t,uint16_t>>& register_demand = lives.register_demand[block->index];
uint16_t vgpr_demand = 0;
Expand Down Expand Up @@ -71,6 +72,8 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, s
}
}

sgpr_demand -= phi_sgpr_ops[block->index];

/* traverse the instructions backwards */
for (int idx = block->instructions.size() -1; idx >= 0; idx--)
{
Expand Down Expand Up @@ -124,9 +127,13 @@ void process_live_temps_per_block(Program *program, live& lives, Block* block, s
if (it.second) {
operand.setFirstKill(true);
worklist.insert(preds[i]);
if (insn->opcode == aco_opcode::p_phi && operand.getTemp().type() == sgpr)
phi_sgpr_ops[preds[i]] += operand.size();
}
}
}
} else if (insn->opcode == aco_opcode::p_logical_end) {
sgpr_demand += phi_sgpr_ops[block->index];
} else {
for (unsigned i = 0; i < insn->operandCount(); ++i)
{
Expand Down Expand Up @@ -227,6 +234,7 @@ live live_var_analysis(Program* program,
result.live_out.resize(program->blocks.size());
result.register_demand.resize(program->blocks.size());
std::set<unsigned> worklist;
std::vector<uint16_t> phi_sgpr_ops(program->blocks.size());
uint16_t vgpr_demand = 0;
uint16_t sgpr_demand = 0;

Expand All @@ -237,7 +245,7 @@ live live_var_analysis(Program* program,
std::set<unsigned>::reverse_iterator b_it = worklist.rbegin();
unsigned block_idx = *b_it;
worklist.erase(block_idx);
process_live_temps_per_block(program, result, &program->blocks[block_idx], worklist);
process_live_temps_per_block(program, result, &program->blocks[block_idx], worklist, phi_sgpr_ops);
vgpr_demand = std::max(vgpr_demand, program->blocks[block_idx].vgpr_demand);
sgpr_demand = std::max(sgpr_demand, program->blocks[block_idx].sgpr_demand);
}
Expand Down
34 changes: 34 additions & 0 deletions src/amd/compiler/aco_register_allocation.cpp
Expand Up @@ -1242,6 +1242,40 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
/* Handle all other instructions of the block */
for (; it != block.instructions.end(); ++it) {
aco_ptr<Instruction>& instr = *it;

/* parallelcopies from p_phi are inserted here which means
* live ranges of killed operands end here as well */
if (instr->opcode == aco_opcode::p_logical_end) {
/* no need to process this instruction any further */
if (block.logical_succs.size() != 1) {
instructions.emplace_back(std::move(instr));
continue;
}

Block& succ = program->blocks[block.logical_succs[0]];
unsigned idx = 0;
for (; idx < succ.logical_preds.size(); idx++) {
if (succ.logical_preds[idx] == block.index)
break;
}
for (aco_ptr<Instruction>& phi : succ.instructions) {
if (phi->opcode == aco_opcode::p_phi) {
if (phi->getOperand(idx).isTemp() &&
phi->getOperand(idx).getTemp().type() == sgpr &&
phi->getOperand(idx).isFirstKill()) {
Temp phi_op = read_variable(phi->getOperand(idx).getTemp(), block.index);
PhysReg reg = ctx.assignments[phi_op.id()].first;
assert(register_file[reg] == phi_op.id());
register_file[reg] = 0;
}
} else if (phi->opcode != aco_opcode::p_linear_phi) {
break;
}
}
instructions.emplace_back(std::move(instr));
continue;
}

std::vector<std::pair<Operand, Definition>> parallelcopy;

assert(!is_phi(instr));
Expand Down
34 changes: 34 additions & 0 deletions src/amd/compiler/aco_validate.cpp
Expand Up @@ -269,12 +269,22 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio

bool err = false;
aco::live live_vars = aco::live_var_analysis(program, options);
std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size());

std::map<unsigned, Assignment> assignments;
for (Block& block : program->blocks) {
Location loc;
loc.block = &block;
for (aco_ptr<Instruction>& instr : block.instructions) {
if (instr->opcode == aco_opcode::p_phi) {
for (unsigned i = 0; i < instr->num_operands; i++) {
if (instr->getOperand(i).isTemp() &&
instr->getOperand(i).getTemp().type() == sgpr &&
instr->getOperand(i).isFirstKill())
phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->getOperand(i).getTemp());
}
}

loc.instr = instr.get();
for (unsigned i = 0; i < instr->num_operands; i++) {
Operand& op = instr->getOperand(i);
Expand Down Expand Up @@ -321,6 +331,9 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio

std::set<Temp> live;
live.insert(live_vars.live_out[block.index].begin(), live_vars.live_out[block.index].end());
/* remove killed p_phi sgpr operands */
for (Temp tmp : phi_sgpr_ops[block.index])
live.erase(tmp);

/* check live out */
for (Temp tmp : live) {
Expand All @@ -337,6 +350,18 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio
for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); ++it) {
aco_ptr<Instruction>& instr = *it;

/* check killed p_phi sgpr operands */
if (instr->opcode == aco_opcode::p_logical_end) {
for (Temp tmp : phi_sgpr_ops[block.index]) {
PhysReg reg = assignments.at(tmp.id()).reg;
for (unsigned i = 0; i < tmp.size(); i++) {
if (regs[reg + i])
err |= ra_fail(output, loc, Location(), "Assignment of element %d of %%%d already taken by %%%d in live-out", i, tmp.id(), regs[reg + i]);
}
live.emplace(tmp);
}
}

for (unsigned i = 0; i < instr->num_definitions; i++) {
Definition& def = instr->getDefinition(i);
if (!def.isTemp())
Expand Down Expand Up @@ -364,6 +389,15 @@ bool validate_ra(Program *program, const struct radv_nir_compiler_options *optio

for (aco_ptr<Instruction>& instr : block.instructions) {
loc.instr = instr.get();

/* remove killed p_phi operands from regs */
if (instr->opcode == aco_opcode::p_logical_end) {
for (Temp tmp : phi_sgpr_ops[block.index]) {
PhysReg reg = assignments.at(tmp.id()).reg;
regs[reg] = 0;
}
}

if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
for (unsigned i = 0; i < instr->num_operands; i++) {
Operand& op = instr->getOperand(i);
Expand Down

0 comments on commit 1f6010d

Please sign in to comment.