Use Dyninst for x86_64 classification of types (#34)

* Refactor getRegisterClassFromType to use Dyninst for type processing This also adds a first-pass at handling some cases for vectors of floating point types. * Fix comment * Add placeholders for classifying types other than scalars * Tidy up includes in allocators.hpp * Merge getRegisterString and getRegistersString This also expands vector register handling. * Use the new interfaces for classification and allocation. * Remove unused variables in parse_parameters * Use exact name matching in get_one Also, don't copy and edit the vector- just return the found function. * Generate test cases programmatically This will allow us to more easily expand test cases later when we add pointers, references, aggregate types, and multiple parameters per function. Also, change the test function linkages to C-style so that exact name matching can be done instead of the broken pattern matching that was done previously in `get_one`. * Rename `Class` to `classification` * Combine the x87 register allocation checks * Use dyninst master for build in main.yaml * Use dyninst master for style tests * Use dyninst master for docs tests Co-authored-by: Vanessasaurus <814322+vsoch@users.noreply.github.com>
buildsi · Jul 6, 2021 · 6c07fc4 · 6c07fc4
1 parent 7aff5b8
commit 6c07fc4
Show file tree

Hide file tree

Showing 9 changed files with 714 additions and 410 deletions.
diff --git a/.github/workflows/documentation.yaml b/.github/workflows/documentation.yaml
@@ -10,7 +10,7 @@ env:
 jobs:
   build:
     name: Build and publish documentation
-    container: ghcr.io/autamus/dyninst
+    container: ghcr.io/autamus/dyninst:master
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2

diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
@@ -9,7 +9,7 @@ env:
 
 jobs:
   build:
-    container: ghcr.io/autamus/dyninst
+    container: ghcr.io/autamus/dyninst:master
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2

diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml
@@ -11,7 +11,7 @@ env:
 
 jobs:
   build:
-    container: ghcr.io/autamus/dyninst
+    container: ghcr.io/autamus/dyninst:master
     runs-on: ubuntu-latest
     steps:
 

diff --git a/source/parser/x86_64/allocators.hpp b/source/parser/x86_64/allocators.hpp
@@ -5,12 +5,13 @@
 
 #pragma once
 
+#include <optional>
 #include <stack>
-#include <vector>
+#include <stdexcept>
+#include <string>
 
-#include "Symtab.h"
+#include "Type.h"
 #include "register_class.hpp"
-#include "smeagle/parameter.h"
 
 namespace smeagle::x86_64 {
 
@@ -53,35 +54,55 @@ namespace smeagle::x86_64 {
     }
 
     // Given two registers, return one combined string
-    std::string getRegistersString(std::pair<RegisterClass, RegisterClass> regClasses,
-                                   st::Type *paramType) {
-      std::string locA = this->getRegisterString(regClasses.first, paramType);
-      std::string locB = this->getRegisterString(regClasses.second, paramType);
-
-      // If B is empty (NO_CLASS) then return A
-      if (locB == "") {
-        return locA;
+    std::string getRegisterString(RegisterClass lo, RegisterClass hi, st::Type *paramType) {
+      if (lo == RegisterClass::NO_CLASS) {
+        throw std::runtime_error{"Can't allocate a {NO_CLASS, *}"};
       }
-      return locA + "|" + locB;
-    }
 
-    // Get a string location from a register class
-    std::string getRegisterString(RegisterClass regClass, st::Type *paramType) {
-      std::optional<std::string> regString;
+      if (lo == RegisterClass::MEMORY) {
+        // goes on the stack
+        return fallocator.nextFramebaseFromType(paramType);
+      }
 
-      // If the class is memory, pass the argument on the stack
-      if (regClass == RegisterClass::NO_CLASS) regString = "";
-      if (regClass == RegisterClass::SSE) regString = this->getNextSseRegister();
-      if (regClass == RegisterClass::INTEGER) regString = this->getNextIntRegister();
-      if (regClass == RegisterClass::MEMORY) regString = std::nullopt;
+      if (lo == RegisterClass::INTEGER) {
+        auto reg = getNextIntRegister();
+        if (!reg) {
+          // Ran out of registers, put it on the stack
+          return fallocator.nextFramebaseFromType(paramType);
+        }
+        return reg.value();
+      }
+
+      if (lo == RegisterClass::SSE) {
+        auto reg = getNextSseRegister();
+        if (!reg) {
+          // Ran out of registers, put it on the stack
+          return fallocator.nextFramebaseFromType(paramType);
+        }
+
+        if (hi == RegisterClass::SSEUP) {
+          // If the class is SSEUP, the eightbyte is passed in the next available eightbyte
+          // chunk of the last used vector register.
+        }
+        return reg.value();
+
+        /* TODO
+         *
+         *  For objects allocated in multiple registers, use the syntax '%r1 | %r2 | ...'
+         *  to denote this. This can only happen for aggregates.
+         *
+         *  Use ymm and zmm for larger vector types and check for aliasing
+         */
+      }
 
-      // If we don't have a value, we need a framebase
-      if (!regString.has_value()) {
-        regString = fallocator.nextFramebaseFromType(paramType);
+      // If the class is X87, X87UP or COMPLEX_X87, it is passed in memory
+      if (lo == RegisterClass::X87 || lo == RegisterClass::COMPLEX_X87
+          || hi == RegisterClass::X87UP) {
+        return fallocator.nextFramebaseFromType(paramType);
       }
 
-      // If we've run out of registers we get to this point
-      return regString.value();
+      // This should never be reached
+      throw std::runtime_error{"Unknown classification"};
     }
 
   private:

diff --git a/source/parser/x86_64/classifiers.hpp b/source/parser/x86_64/classifiers.hpp
@@ -0,0 +1,105 @@
+// Copyright 2013-2021 Lawrence Livermore National Security, LLC and other
+// Spack Project Developers. See the top-level COPYRIGHT file for details.
+//
+// SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+#pragma once
+
+#include <utility>
+
+#include "Type.h"
+#include "register_class.hpp"
+
+namespace smeagle::x86_64 {
+
+  struct classification {
+    RegisterClass lo, hi;
+    int pointer_indirections;
+    std::string name;
+  };
+
+  namespace st = Dyninst::SymtabAPI;
+
+  inline classification classify(st::typeScalar *t, int ptr_cnt) {
+    // size in BITS
+    const auto size = t->getSize() * 8;
+
+    if (ptr_cnt > 0) {
+      /*
+       * A pointer to a type X is converted to three abi_typelocation rules:
+       * 	1) 	A Pointer64 type at the base location and the base direction.
+       *
+       * 	2) 	A recursive conversion of application type X, at location “(base location)”
+       * 		(the base location wrapped in parentheses) and with direction ‘Export’.
+       *
+       * 	3)	A recursive conversion of application type X, at location “(base location)”
+       * 	(the base location wrapped in parentheses) and with direction ‘Import’.
+       *
+       * 	For example, an application type int* at base location %rax and direction ‘Import’
+       * would convert to: abi_typelocation(..., Import, Pointer64, “%rax”). abi_typelocation(...,
+       * Export, Integer32, “(%rax)”). abi_typelocation(..., Import, Integer32, “(%rax)”).
+       *
+       */
+      // TODO Should we integrate the directionality calculation here?
+      return {RegisterClass::INTEGER, RegisterClass::NO_CLASS, ptr_cnt, "Pointer64"};
+    }
+
+    // paramType properties have booleans to indicate types
+    auto const &props = t->properties();
+
+    // Integral types
+    if (props.is_integral || props.is_UTF) {
+      if (size > 128) {
+        return {RegisterClass::SSE, RegisterClass::SSEUP, ptr_cnt,
+                "IntegerVec" + std::to_string(size)};
+      }
+      if (size == 128) {
+        // __int128 is treated as struct{long,long};
+        // This is NOT correct, but we don't handle aggregates yet.
+        // How do we differentiate between __int128 and __m128i?
+        return {RegisterClass::MEMORY, RegisterClass::NO_CLASS, ptr_cnt, "Integer128"};
+      }
+
+      // _Decimal32, _Decimal64, and __m64 are supposed to be SSE.
+      // TODO How can we differentiate them here?
+      return {RegisterClass::INTEGER, RegisterClass::NO_CLASS, ptr_cnt,
+              "Integer" + std::to_string(size)};
+    }
+
+    if (props.is_floating_point) {
+      if (props.is_complex_float) {
+        if (size == 128) {
+          // x87 `complex long double`
+          return {RegisterClass::COMPLEX_X87, RegisterClass::NO_CLASS, ptr_cnt, "CplxFloat128"};
+        }
+        // This is NOT correct.
+        // TODO It should be struct{T r,i;};, but we don't handle aggregates yet
+        std::cout << "CplxFloat: " << t->getName() << " [" << t->getSize() << "]" << std::endl;
+        return {RegisterClass::MEMORY, RegisterClass::NO_CLASS, ptr_cnt,
+                "CplxFloat" + std::to_string(size / 2)};
+      }
+      if (size <= 64) {
+        // 32- or 64-bit floats
+        return {RegisterClass::SSE, RegisterClass::SSEUP, ptr_cnt, "Float" + std::to_string(size)};
+      }
+      if (size == 128) {
+        // x87 `long double` OR __m128[d]
+        // TODO: How do we differntiate the vector type here? Dyninst should help us
+        return {RegisterClass::X87, RegisterClass::X87UP, ptr_cnt, "Float128"};
+      }
+      if (size > 128) {
+        return {RegisterClass::SSE, RegisterClass::SSEUP, ptr_cnt,
+                "FloatVec" + std::to_string(size)};
+      }
+    }
+
+    throw std::runtime_error{"Unknown scalar type"};
+  }
+
+  inline classification classify(st::typeStruct *) { return {}; }
+  inline classification classify(st::typeUnion *) { return {}; }
+  inline classification classify(st::typeArray *) { return {}; }
+  inline classification classify(st::typeEnum *) { return {}; }
+  inline classification classify(st::typeFunction *) { return {}; }
+
+}  // namespace smeagle::x86_64
diff --git a/source/parser/x86_64/x86_64.cpp b/source/parser/x86_64/x86_64.cpp
@@ -15,6 +15,7 @@
 #include "Symtab.h"
 #include "Type.h"
 #include "allocators.hpp"
+#include "classifiers.hpp"
 #include "smeagle/parameter.h"
 #include "type_checker.hpp"
 
@@ -44,7 +45,7 @@ namespace smeagle::x86_64 {
   // Get directionality from argument type
   std::string getDirectionalityFromType(st::Type *paramType) {
     // Remove any top-level typedef
-    // NB: We can't call `dedecorate` here as we need to keep
+    // NB: We can't call `unwrap_underlying_type` here as we need to keep
     //     any reference type for the call to `is_indirect` work.
     paramType = remove_typedef(paramType);
     auto dataClass = paramType->getDataClass();
@@ -67,85 +68,37 @@ namespace smeagle::x86_64 {
   }
 
   // Get register class given the argument type
-  std::pair<RegisterClass, RegisterClass> getRegisterClassFromType(st::Type *paramType) {
-    // Remove top-level typedef
-    paramType = remove_typedef(paramType);
+  classification getRegisterClassFromType(st::Type *paramType) {
+    auto [base_type, ptr_cnt] = unwrap_underlying_type(paramType);
 
-    // If it's a pointer, remove it
-    if (is_indirect(paramType->getDataClass())) {
-      paramType = deref(paramType);
-      paramType = remove_typedef(paramType);
+    if (auto *t = base_type->getScalarType()) {
+      return classify(t, ptr_cnt);
     }
-
-    // Now get a string version of the type
-    std::string paramTypeString = paramType->getName();
-
-    // Signed and unsigned Bool,char,short,int,long,long long, and pointers
-    std::regex checkinteger("(int|char|short|long|pointer|bool)");
-
-    // Is it a constant?
-    std::regex checkconstant("(const)");
-
-    // float,double,_Decimal32,_Decimal64and__m64are in class SSE.
-    std::regex checksse("(double|decimal|float|Decimal|m64)");
-
-    // __float128 and__m128are split into two halves, least significant SSE,
-    // most significant in SSEUP.
-    std::regex checksseup("(m128|float128)");
-
-    // The 64-bit mantissa of arguments of type long double belongs to classX87
-    // the 16-bit exponent plus 6 bytes of padding belongs to class X87UP
-    std::regex checklongdouble("(long|double)");
-
-    // A variable of type complex long double is classified as type COMPLEX_X87
-    std::regex checkcomplexlong("(complex long double)");
-
-    // We will return a vector of classes
-    std::pair<RegisterClass, RegisterClass> regClasses;
-
-    // Does the type string match one of the types?
-    bool isinteger = (std::regex_search(paramTypeString, checkinteger));
-    bool isconst = (std::regex_search(paramTypeString, checkconstant));
-    bool issse = (std::regex_search(paramTypeString, checksse));
-    bool issseup = (std::regex_search(paramTypeString, checksseup));
-    bool islongdouble = (std::regex_search(paramTypeString, checklongdouble));
-    bool iscomplexlong = (std::regex_search(paramTypeString, checkcomplexlong));
-
-    // A parameter can have more than one class!
-    if (isconst) {
-      regClasses = {RegisterClass::MEMORY, RegisterClass::NO_CLASS};
-    }
-    if (issseup) {
-      regClasses = {RegisterClass::SSE, RegisterClass::SSEUP};
+    if (auto *t = base_type->getStructType()) {
+      // page 18 of abi document
+      return classify(t);
     }
-    if (issse) {
-      regClasses = {RegisterClass::SSE, RegisterClass::NO_CLASS};
+    if (auto *t = base_type->getUnionType()) {
+      return classify(t);
     }
-    if (isinteger) {
-      regClasses = {RegisterClass::INTEGER, RegisterClass::NO_CLASS};
+    if (auto *t = base_type->getArrayType()) {
+      return classify(t);
     }
-    if (iscomplexlong) {
-      regClasses = {RegisterClass::COMPLEX_X87, RegisterClass::NO_CLASS};
+    if (auto *t = base_type->getEnumType()) {
+      return classify(t);
     }
-    if (islongdouble) {
-      regClasses = {RegisterClass::X87, RegisterClass::X87UP};
+    if (auto *t = base_type->getFunctionType()) {
+      // This can only be a function pointer
+      return classify(t);
     }
 
-    // The classification of aggregate (structures and arrays) and union types worksas follows:
-    // TODO need to look for struct / arrays?
-    // page 18 of abi document
-    return regClasses;
+    throw std::runtime_error{"Unknown parameter type" + paramType->getName()};
   }
 
   std::vector<parameter> parse_parameters(st::Symbol *symbol) {
-    // Get the name and type of the symbol
-    std::string sname = symbol->getMangledName();
     st::Function *func = symbol->getFunction();
     std::vector<st::localVar *> params;
 
-    // The function name looks equivalent to the symbol name
-    std::string fname = func->getName();
-
     std::vector<parameter> typelocs;
 
     // Get parameters with types and names
@@ -158,7 +111,7 @@ namespace smeagle::x86_64 {
         st::Type *paramType = param->getType();
 
         // Get register class based on type
-        std::pair<RegisterClass, RegisterClass> regClasses = getRegisterClassFromType(paramType);
+        classification c = getRegisterClassFromType(paramType);
 
         // Get the directionality (export or import) given the type
         std::string direction = getDirectionalityFromType(paramType);
@@ -169,8 +122,8 @@ namespace smeagle::x86_64 {
         // Create a new typelocation to parse later
         parameter p;
         p.name = paramName;
-        p.type = paramType->getName();
-        p.location = allocator.getRegistersString(regClasses, paramType);
+        p.type = c.name;
+        p.location = allocator.getRegisterString(c.lo, c.hi, paramType);
         p.direction = direction;
         typelocs.push_back(p);
       }