Enable vector instructions for short format in selectEvaluator

This commit addresses the use of vector instructions to handle short format in the **select** evaluator. Previously, the use of vector instructions for short format in the select evaluator was disabled, even though on z14 and newer platforms, it is supported. The issue was caused by not correctly converting the condition code from GPR to FPR for short format. Changes for enabling vector instructions for short format: - Use of LLGFR instruction for long format for zero-extending a 32 bit conditionReg to 64 bits - Use of separate SLLG instruction for short format floats to preserve the float representation of the first 32 bits as it is later moved into FPR - Addition of mask values in the VFCE instruction to get the element size mask for floats and doubles respectively Closes: #5002 Signed-off-by: Sarwat Shaheen sarwat.shaheen@yahoo.com
eclipse · Nov 23, 2023 · 8bacfaf · 8bacfaf
1 parent 8b19b80
commit 8bacfaf
Showing 1 changed file with 17 additions and 4 deletions.
diff --git a/compiler/z/codegen/ControlFlowEvaluator.cpp b/compiler/z/codegen/ControlFlowEvaluator.cpp
@@ -2669,18 +2669,31 @@ OMR::Z::TreeEvaluator::dselectEvaluator(TR::Node *node, TR::CodeGenerator *cg)
    TR::Register *resultReg = cg->gprClobberEvaluate(trueValueNode);
    TR::Register *conditionReg = cg->evaluate(conditionNode);
    TR::Register *falseValReg = cg->evaluate(falseValueNode);
-   if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z13) && node->getOpCode().isDouble())
+   if ((cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z13) && node->getOpCode().isDouble())
+    || (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z14) && node->getOpCode().isFloat()))
       {
       TR::Register *vectorSelReg = cg->allocateRegister(TR_VRF);
       TR::Register *tempReg = cg->allocateRegister(TR_FPR);
       TR::Register *vzeroReg = cg->allocateRegister(TR_VRF);
-      // Convert 32 Bit register to 64 Bit (Comparison Child of the select node is 32 bit)
-      generateRRInstruction(cg, TR::InstOpCode::LLGFR, node, conditionReg, conditionReg);
+      if (node->getOpCode().isDouble())
+         {
+         // Convert 32 Bit register to 64 Bit for Doubles (Comparison Child of the select node is 32 bit)
+         generateRRInstruction(cg, TR::InstOpCode::LLGFR, node, conditionReg, conditionReg);
+         }
+      else
+         {
+         // Shift left the 32 least significant bits for preserving the float representaion as the hardware only operates on the first 32 bits in a FPR
+         generateRSInstruction(cg, TR::InstOpCode::SLLG, node, conditionReg, 32);
+         }
       // convert to floating point
       generateRRInstruction(cg, TR::InstOpCode::LDGR, node, tempReg, conditionReg);
       // generate compare with zero
       generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vzeroReg, 0, 0);
-      generateVRRcInstruction(cg, TR::InstOpCode::VFCE, node, vectorSelReg, tempReg, vzeroReg, 1, 0, 3);
+      // Mask values used for VFCE instruction:
+      // M4 - Floating-point-format control = getVectorElementSizeMask(node->getSize()) - gets the element size mask for doubles/floats respectively
+      // M5 - Single-Element-Control = 0x8, setting bit 0 to one, controlling the operation to take place only on the zero-indexed element in the vector
+      // M6 - Condition Code Set = 0, the Condition Code is not set and remains unchanged
+      generateVRRcInstruction(cg, TR::InstOpCode::VFCE, node, vectorSelReg, tempReg, vzeroReg, 0, 0x8, getVectorElementSizeMask(node->getSize()));
       // generate select - if condition == 0, vectorSelReg will contain all 1s, so false and true are swapped
       generateVRReInstruction(cg, TR::InstOpCode::VSEL, node, resultReg, falseValReg, resultReg, vectorSelReg);
       cg->stopUsingRegister(tempReg);