UoB-HPC · hal-jones · Apr 15, 2019 · Mar 20, 2019 · Mar 20, 2019 · Mar 21, 2019
diff --git a/src/A64Architecture.cc b/src/A64Architecture.cc
@@ -1,6 +1,7 @@
 #include "A64Architecture.hh"
 
 #include <cassert>
+#include <iomanip>
 #include <iostream>
 
 #include "A64InstructionMetadata.hh"
@@ -40,20 +41,20 @@ uint8_t A64Architecture::predecode(const void* ptr, uint8_t bytesAvailable,
     size_t size = 4;
     uint64_t address = 0;
 
-    // TODO: capture result (success state) and replace instruction with an
-    // "invalid decoding" implementation if not successful
-    cs_disasm_iter(capstoneHandle, &encoding, &size, &address, &rawInsn);
+    bool success =
+        cs_disasm_iter(capstoneHandle, &encoding, &size, &address, &rawInsn);
 
-    auto metadata = A64InstructionMetadata(rawInsn);
+    auto metadata = success ? A64InstructionMetadata(rawInsn)
+                            : A64InstructionMetadata(encoding);
 
     // Cache the metadata
-    metadataCache[insn] = metadata;
+    metadataCache.insert({insn, metadata});
     // Create and cache an instruction using the metadata
-    decodeCache.insert({insn, metadataCache[insn]});
+    decodeCache.insert({insn, metadataCache.find(insn)->second});
   }
 
   // Retrieve the cached instruction
-  std::shared_ptr<A64Instruction> uop =
+  std::shared_ptr<Instruction> uop =
       std::make_shared<A64Instruction>(decodeCache.find(insn)->second);
 
   uop->setInstructionAddress(instructionAddress);
@@ -66,6 +67,44 @@ uint8_t A64Architecture::predecode(const void* ptr, uint8_t bytesAvailable,
   return 4;
 }
 
+void A64Architecture::handleException(
+    std::shared_ptr<Instruction> instruction) const {
+  A64Instruction* insn = static_cast<A64Instruction*>(instruction.get());
+
+  A64InstructionException exception = insn->getException();
+
+  std::cout << "Encountered ";
+  switch (exception) {
+    case A64InstructionException::EncodingUnallocated:
+      std::cout << "illegal instruction";
+      break;
+    case A64InstructionException::ExecutionNotYetImplemented:
+      std::cout << "execution not-yet-implemented";
+      break;
+    default:
+      std::cout << "unknown (id: " << static_cast<unsigned int>(exception)
+                << ")";
+  }
+  std::cout << " exception\n";
+
+  std::cout << "  Generated by instruction: \n"
+            << "    0x" << std::hex << std::setfill('0') << std::setw(16)
+            << insn->getInstructionAddress() << ": ";
+
+  auto& metadata = insn->getMetadata();
+  for (uint8_t byte : metadata.encoding) {
+    std::cout << std::setfill('0') << std::setw(2)
+              << static_cast<unsigned int>(byte) << " ";
+  }
+  std::cout << std::dec << "    ";
+  if (exception == A64InstructionException::EncodingUnallocated) {
+    std::cout << "<unknown>";
+  } else {
+    std::cout << metadata.mnemonic << " " << metadata.operandStr;
+  }
+  std::cout << std::endl;
+}
+
 std::vector<RegisterFileStructure> A64Architecture::getRegisterFileStructures()
     const {
   return {

diff --git a/src/A64Architecture.hh b/src/A64Architecture.hh
@@ -28,6 +28,8 @@ class A64Architecture : public Architecture {
   /** Determine whether the specified register can be renamed. */
   bool canRename(Register reg) const override;
 
+  void handleException(std::shared_ptr<Instruction> instruction) const override;
+
  private:
   /** A decoding cache, mapping an instruction word to a previously decoded
    * instruction. Instructions are added to the cache as they're decoded, to

diff --git a/src/A64Instruction.cc b/src/A64Instruction.cc
@@ -14,15 +14,8 @@ A64Instruction::A64Instruction(const A64InstructionMetadata& metadata)
   decode();
 }
 
-void A64Instruction::setInstructionAddress(uint64_t address) {
-  instructionAddress = address;
-}
-void A64Instruction::setBranchPrediction(BranchPrediction prediction) {
-  this->prediction = prediction;
-}
-
-InstructionException A64Instruction::getException() const {
-  return static_cast<InstructionException>(exception);
+A64InstructionException A64Instruction::getException() const {
+  return exception;
 }
 
 void A64Instruction::setSourceRegisters(
@@ -101,11 +94,6 @@ std::vector<RegisterValue> A64Instruction::getData() const {
 
 bool A64Instruction::canExecute() const { return (operandsPending == 0); }
 
-bool A64Instruction::hasExecuted() const { return executed; }
-
-void A64Instruction::setCommitReady() { canCommit_ = true; }
-bool A64Instruction::canCommit() const { return canCommit_; }
-
 const span<RegisterValue> A64Instruction::getResults() const {
   return {const_cast<RegisterValue*>(results.data()), destinationRegisterCount};
 }
@@ -114,10 +102,6 @@ bool A64Instruction::isStore() const { return isStore_; }
 bool A64Instruction::isLoad() const { return isLoad_; }
 bool A64Instruction::isBranch() const { return isBranch_; }
 
-uint64_t A64Instruction::getInstructionAddress() const {
-  return instructionAddress;
-}
-
 void A64Instruction::setMemoryAddresses(
     const std::vector<std::pair<uint64_t, uint8_t>>& addresses) {
   memoryData = std::vector<RegisterValue>(addresses.size());
@@ -132,37 +116,19 @@ A64Instruction::getGeneratedAddresses() const {
 std::tuple<bool, uint64_t> A64Instruction::checkEarlyBranchMisprediction()
     const {
   assert(
-      !executed &&
+      !executed_ &&
       "Early branch misprediction check shouldn't be called after execution");
 
   if (!isBranch()) {
     // Instruction isn't a branch; if predicted as taken, it will require a
     // flush
-    return {prediction.taken, instructionAddress + 4};
+    return {prediction_.taken, instructionAddress_ + 4};
   }
 
   // Not enough information to determine this was a misprediction
   return {false, 0};
 }
 
-bool A64Instruction::wasBranchMispredicted() const {
-  assert(executed &&
-         "Branch misprediction check requires instruction to have executed");
-
-  // Flag as mispredicted if taken state was wrongly predicted, or taken and
-  // predicted target is wrong
-  return (branchTaken != prediction.taken ||
-          (branchTaken && prediction.target != branchAddress));
-}
-uint64_t A64Instruction::getBranchAddress() const { return branchAddress; }
-bool A64Instruction::wasBranchTaken() const { return branchTaken; }
-
-void A64Instruction::setSequenceId(uint64_t seqId) { sequenceId = seqId; };
-uint64_t A64Instruction::getSequenceId() const { return sequenceId; };
-
-void A64Instruction::setFlushed() { flushed = true; }
-bool A64Instruction::isFlushed() const { return flushed; }
-
 uint16_t A64Instruction::getGroup() const {
   if (isBranch()) {
     return A64InstructionGroups::BRANCH;
@@ -177,4 +143,8 @@ uint16_t A64Instruction::getGroup() const {
   return A64InstructionGroups::ARITHMETIC;
 }
 
+const A64InstructionMetadata& A64Instruction::getMetadata() const {
+  return metadata;
+}
+
 }  // namespace simeng
diff --git a/src/A64Instruction.hh b/src/A64Instruction.hh
@@ -41,17 +41,9 @@ class A64Instruction : public Instruction {
    */
   A64Instruction(const A64InstructionMetadata& metadata);
 
-  /** Supply an instruction address. Performed after construction to prevent
-   * values being cached. */
-  void setInstructionAddress(uint64_t address);
-
-  /** Supply a branch prediction. Performed after construction to prevent values
-   * being cached. */
-  void setBranchPrediction(BranchPrediction prediction);
-
   /** Retrieve the identifier for the first exception that occurred during
-   * decoding or execution. */
-  InstructionException getException() const override;
+   * processing this instruction. */
+  virtual A64InstructionException getException() const;
 
   /** Retrieve the source registers this instruction reads. */
   const span<Register> getOperandRegisters() const override;
@@ -82,17 +74,6 @@ class A64Instruction : public Instruction {
   /** Execute the instruction. */
   void execute() override;
 
-  /** Check whether the instruction has executed and has results ready to
-   * write back. */
-  bool hasExecuted() const override;
-
-  /** Mark the instruction as ready to commit. */
-  void setCommitReady() override;
-
-  /** Check whether the instruction has written its values back and is ready to
-   * commit. */
-  bool canCommit() const override;
-
   /** Retrieve register results. */
   const span<RegisterValue> getResults() const override;
 
@@ -114,15 +95,6 @@ class A64Instruction : public Instruction {
    * instruction. */
   std::tuple<bool, uint64_t> checkEarlyBranchMisprediction() const override;
 
-  /** Check for misprediction. */
-  bool wasBranchMispredicted() const override;
-
-  /** Was the branch taken? */
-  bool wasBranchTaken() const override;
-
-  /** Retrieve branch address. */
-  uint64_t getBranchAddress() const override;
-
   /** Is this a store operation? */
   bool isStore() const override;
 
@@ -132,24 +104,12 @@ class A64Instruction : public Instruction {
   /** Is this a branch operation? */
   bool isBranch() const override;
 
-  /** Get this instruction's instruction memory address. */
-  uint64_t getInstructionAddress() const override;
-
-  /** Set this instruction's sequence ID. */
-  void setSequenceId(uint64_t seqId) override;
-
-  /** Retrieve this instruction's sequence ID. */
-  uint64_t getSequenceId() const override;
-
-  /** Mark this instruction as flushed. */
-  void setFlushed() override;
-
-  /** Check whether this instruction has been flushed. */
-  bool isFlushed() const override;
-
   /** Retrieve the instruction group this instruction belongs to. */
   uint16_t getGroup() const override;
 
+  /** Retrieve the instruction's metadata. */
+  const A64InstructionMetadata& getMetadata() const;
+
   /** A special register value representing the zero register. If passed to
    * `setSourceRegisters`/`setDestinationRegisters`, the value will be
    * automatically supplied as zero. */
@@ -166,9 +126,6 @@ class A64Instruction : public Instruction {
   /** A reference to the decoding metadata for this instruction. */
   const A64InstructionMetadata& metadata;
 
-  /** The location in memory of this instruction was decoded at. */
-  uint64_t instructionAddress;
-
   /** An array of source registers. */
   std::array<Register, MAX_SOURCE_REGISTERS> sourceRegisters;
   /** The number of source registers this instruction reads from. */
@@ -215,12 +172,6 @@ class A64Instruction : public Instruction {
    * determine execution readiness. */
   short operandsPending = 0;
 
-  /** Whether or not this instruction has been executed. */
-  bool executed = false;
-
-  /** Whether or not this instruction is ready to commit. */
-  bool canCommit_ = false;
-
   // Execution
   /** Generate an ExecutionNotYetImplemented exception. */
   void executionNYI();
@@ -247,21 +198,6 @@ class A64Instruction : public Instruction {
    * for sending to memory (according to instruction type). Each entry
    * corresponds to a `memoryAddresses` entry. */
   std::vector<RegisterValue> memoryData;
-
-  // Branches
-  /** The predicted branching result. */
-  BranchPrediction prediction;
-  /** A branching address calculated by this instruction during execution. */
-  uint64_t branchAddress;
-  /** Was the branch taken? */
-  bool branchTaken;
-
-  /** This instruction's sequence ID; a higher ID represents a chronologically
-   * newer instruction. */
-  uint64_t sequenceId;
-
-  /** Has this instruction been flushed? */
-  bool flushed = false;
 };
 
 }  // namespace simeng
diff --git a/src/A64InstructionMetadata.cc b/src/A64InstructionMetadata.cc
@@ -14,6 +14,7 @@ A64InstructionMetadata::A64InstructionMetadata(const cs_insn& insn)
       setsFlags(insn.detail->arm64.update_flags),
       writeback(insn.detail->arm64.writeback),
       operandCount(insn.detail->arm64.op_count) {
+  std::memcpy(encoding, insn.bytes, sizeof(encoding));
   // Copy printed output
   std::strncpy(mnemonic, insn.mnemonic, CS_MNEMONIC_SIZE);
   std::strncpy(operandStr, insn.op_str, sizeof(operandStr));
@@ -28,4 +29,18 @@ A64InstructionMetadata::A64InstructionMetadata(const cs_insn& insn)
               sizeof(cs_arm64_op) * operandCount);
 }
 
+A64InstructionMetadata::A64InstructionMetadata(const uint8_t* invalidEncoding)
+    : id(ARM64_INS_INVALID),
+      opcode(A64Opcode::AArch64_INSTRUCTION_LIST_END),
+      implicitSourceCount(0),
+      implicitDestinationCount(0),
+      groupCount(0),
+      setsFlags(false),
+      writeback(false),
+      operandCount(0) {
+  std::memcpy(encoding, invalidEncoding, sizeof(encoding));
+  mnemonic[0] = '\0';
+  operandStr[0] = '\0';
+}
+
 }  // namespace simeng
diff --git a/src/A64InstructionMetadata.hh b/src/A64InstructionMetadata.hh
@@ -13,12 +13,13 @@ namespace A64Opcode {
 /** A simplified A64-only version of the Capstone instruction structure. */
 struct A64InstructionMetadata {
  public:
-  A64InstructionMetadata(){};
-
   /** Constructs a metadata object from a Capstone instruction representation.
    */
   A64InstructionMetadata(const cs_insn& insn);
 
+  /** Constructs an invalid metadata object containing the invalid encoding. */
+  A64InstructionMetadata(const uint8_t* invalidEncoding);
+
   static const size_t MAX_OPERAND_STR_LENGTH =
       sizeof(cs_insn::op_str) / sizeof(char);
   static const size_t MAX_IMPLICIT_SOURCES =
@@ -35,6 +36,9 @@ struct A64InstructionMetadata {
   /** The instruction's opcode. */
   unsigned int opcode;
 
+  /** The instruction's encoding. */
+  uint8_t encoding[4];
+
   /** The instruction's mnemonic. */
   char mnemonic[CS_MNEMONIC_SIZE];
   /** The remainder of the instruction's assembly representation. */

diff --git a/src/A64Instruction_decode.cc b/src/A64Instruction_decode.cc
@@ -111,6 +111,11 @@ const Register& filterZR(const Register& reg) {
  * DECODING LOGIC
  *****************/
 void A64Instruction::decode() {
+  if (metadata.id == ARM64_INS_INVALID) {
+    exception = A64InstructionException::EncodingUnallocated;
+    exceptionEncountered_ = true;
+  }
+
   // Extract implicit writes
   for (size_t i = 0; i < metadata.implicitDestinationCount; i++) {
     destinationRegisters[destinationRegisterCount] = csRegToRegister(
@@ -189,9 +194,11 @@ void A64Instruction::decode() {
 }
 
 void A64Instruction::nyi() {
+  exceptionEncountered_ = true;
   exception = A64InstructionException::EncodingNotYetImplemented;
 }
 void A64Instruction::unallocated() {
+  exceptionEncountered_ = true;
   exception = A64InstructionException::EncodingUnallocated;
 }