Skip to content

Commit

Permalink
Use Dyninst for x86_64 classification of types (#34)
Browse files Browse the repository at this point in the history
* Refactor getRegisterClassFromType to use Dyninst for type processing
This also adds a first-pass at handling some cases for vectors of
floating point types.

* Fix comment
* Add placeholders for classifying types other than scalars
* Tidy up includes in allocators.hpp
* Merge getRegisterString and getRegistersString
This also expands vector register handling.
* Use the new interfaces for classification and allocation.
* Remove unused variables in parse_parameters
* Use exact name matching in get_one
Also, don't copy and edit the vector- just return the found function.
* Generate test cases programmatically

This will allow us to more easily expand test cases later when we add
pointers, references, aggregate types, and multiple parameters per
function. Also, change the test function linkages to C-style so that
exact name matching can be done instead of the broken pattern matching
that was done previously in `get_one`.

* Rename `Class` to `classification`
* Combine the x87 register allocation checks
* Use dyninst master for build in main.yaml
* Use dyninst master for style tests
* Use dyninst master for docs tests

Co-authored-by: Vanessasaurus <814322+vsoch@users.noreply.github.com>
  • Loading branch information
hainest and vsoch committed Jul 6, 2021
1 parent 7aff5b8 commit 6c07fc4
Show file tree
Hide file tree
Showing 9 changed files with 714 additions and 410 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/documentation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ env:
jobs:
build:
name: Build and publish documentation
container: ghcr.io/autamus/dyninst
container: ghcr.io/autamus/dyninst:master
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ env:

jobs:
build:
container: ghcr.io/autamus/dyninst
container: ghcr.io/autamus/dyninst:master
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ env:

jobs:
build:
container: ghcr.io/autamus/dyninst
container: ghcr.io/autamus/dyninst:master
runs-on: ubuntu-latest
steps:

Expand Down
73 changes: 47 additions & 26 deletions source/parser/x86_64/allocators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@

#pragma once

#include <optional>
#include <stack>
#include <vector>
#include <stdexcept>
#include <string>

#include "Symtab.h"
#include "Type.h"
#include "register_class.hpp"
#include "smeagle/parameter.h"

namespace smeagle::x86_64 {

Expand Down Expand Up @@ -53,35 +54,55 @@ namespace smeagle::x86_64 {
}

// Given two registers, return one combined string
std::string getRegistersString(std::pair<RegisterClass, RegisterClass> regClasses,
st::Type *paramType) {
std::string locA = this->getRegisterString(regClasses.first, paramType);
std::string locB = this->getRegisterString(regClasses.second, paramType);

// If B is empty (NO_CLASS) then return A
if (locB == "") {
return locA;
std::string getRegisterString(RegisterClass lo, RegisterClass hi, st::Type *paramType) {
if (lo == RegisterClass::NO_CLASS) {
throw std::runtime_error{"Can't allocate a {NO_CLASS, *}"};
}
return locA + "|" + locB;
}

// Get a string location from a register class
std::string getRegisterString(RegisterClass regClass, st::Type *paramType) {
std::optional<std::string> regString;
if (lo == RegisterClass::MEMORY) {
// goes on the stack
return fallocator.nextFramebaseFromType(paramType);
}

// If the class is memory, pass the argument on the stack
if (regClass == RegisterClass::NO_CLASS) regString = "";
if (regClass == RegisterClass::SSE) regString = this->getNextSseRegister();
if (regClass == RegisterClass::INTEGER) regString = this->getNextIntRegister();
if (regClass == RegisterClass::MEMORY) regString = std::nullopt;
if (lo == RegisterClass::INTEGER) {
auto reg = getNextIntRegister();
if (!reg) {
// Ran out of registers, put it on the stack
return fallocator.nextFramebaseFromType(paramType);
}
return reg.value();
}

if (lo == RegisterClass::SSE) {
auto reg = getNextSseRegister();
if (!reg) {
// Ran out of registers, put it on the stack
return fallocator.nextFramebaseFromType(paramType);
}

if (hi == RegisterClass::SSEUP) {
// If the class is SSEUP, the eightbyte is passed in the next available eightbyte
// chunk of the last used vector register.
}
return reg.value();

/* TODO
*
* For objects allocated in multiple registers, use the syntax '%r1 | %r2 | ...'
* to denote this. This can only happen for aggregates.
*
* Use ymm and zmm for larger vector types and check for aliasing
*/
}

// If we don't have a value, we need a framebase
if (!regString.has_value()) {
regString = fallocator.nextFramebaseFromType(paramType);
// If the class is X87, X87UP or COMPLEX_X87, it is passed in memory
if (lo == RegisterClass::X87 || lo == RegisterClass::COMPLEX_X87
|| hi == RegisterClass::X87UP) {
return fallocator.nextFramebaseFromType(paramType);
}

// If we've run out of registers we get to this point
return regString.value();
// This should never be reached
throw std::runtime_error{"Unknown classification"};
}

private:
Expand Down
105 changes: 105 additions & 0 deletions source/parser/x86_64/classifiers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright 2013-2021 Lawrence Livermore National Security, LLC and other
// Spack Project Developers. See the top-level COPYRIGHT file for details.
//
// SPDX-License-Identifier: (Apache-2.0 OR MIT)

#pragma once

#include <utility>

#include "Type.h"
#include "register_class.hpp"

namespace smeagle::x86_64 {

struct classification {
RegisterClass lo, hi;
int pointer_indirections;
std::string name;
};

namespace st = Dyninst::SymtabAPI;

inline classification classify(st::typeScalar *t, int ptr_cnt) {
// size in BITS
const auto size = t->getSize() * 8;

if (ptr_cnt > 0) {
/*
* A pointer to a type X is converted to three abi_typelocation rules:
* 1) A Pointer64 type at the base location and the base direction.
*
* 2) A recursive conversion of application type X, at location “(base location)”
* (the base location wrapped in parentheses) and with direction ‘Export’.
*
* 3) A recursive conversion of application type X, at location “(base location)”
* (the base location wrapped in parentheses) and with direction ‘Import’.
*
* For example, an application type int* at base location %rax and direction ‘Import’
* would convert to: abi_typelocation(..., Import, Pointer64, “%rax”). abi_typelocation(...,
* Export, Integer32, “(%rax)”). abi_typelocation(..., Import, Integer32, “(%rax)”).
*
*/
// TODO Should we integrate the directionality calculation here?
return {RegisterClass::INTEGER, RegisterClass::NO_CLASS, ptr_cnt, "Pointer64"};
}

// paramType properties have booleans to indicate types
auto const &props = t->properties();

// Integral types
if (props.is_integral || props.is_UTF) {
if (size > 128) {
return {RegisterClass::SSE, RegisterClass::SSEUP, ptr_cnt,
"IntegerVec" + std::to_string(size)};
}
if (size == 128) {
// __int128 is treated as struct{long,long};
// This is NOT correct, but we don't handle aggregates yet.
// How do we differentiate between __int128 and __m128i?
return {RegisterClass::MEMORY, RegisterClass::NO_CLASS, ptr_cnt, "Integer128"};
}

// _Decimal32, _Decimal64, and __m64 are supposed to be SSE.
// TODO How can we differentiate them here?
return {RegisterClass::INTEGER, RegisterClass::NO_CLASS, ptr_cnt,
"Integer" + std::to_string(size)};
}

if (props.is_floating_point) {
if (props.is_complex_float) {
if (size == 128) {
// x87 `complex long double`
return {RegisterClass::COMPLEX_X87, RegisterClass::NO_CLASS, ptr_cnt, "CplxFloat128"};
}
// This is NOT correct.
// TODO It should be struct{T r,i;};, but we don't handle aggregates yet
std::cout << "CplxFloat: " << t->getName() << " [" << t->getSize() << "]" << std::endl;
return {RegisterClass::MEMORY, RegisterClass::NO_CLASS, ptr_cnt,
"CplxFloat" + std::to_string(size / 2)};
}
if (size <= 64) {
// 32- or 64-bit floats
return {RegisterClass::SSE, RegisterClass::SSEUP, ptr_cnt, "Float" + std::to_string(size)};
}
if (size == 128) {
// x87 `long double` OR __m128[d]
// TODO: How do we differntiate the vector type here? Dyninst should help us
return {RegisterClass::X87, RegisterClass::X87UP, ptr_cnt, "Float128"};
}
if (size > 128) {
return {RegisterClass::SSE, RegisterClass::SSEUP, ptr_cnt,
"FloatVec" + std::to_string(size)};
}
}

throw std::runtime_error{"Unknown scalar type"};
}

inline classification classify(st::typeStruct *) { return {}; }
inline classification classify(st::typeUnion *) { return {}; }
inline classification classify(st::typeArray *) { return {}; }
inline classification classify(st::typeEnum *) { return {}; }
inline classification classify(st::typeFunction *) { return {}; }

} // namespace smeagle::x86_64
91 changes: 22 additions & 69 deletions source/parser/x86_64/x86_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "Symtab.h"
#include "Type.h"
#include "allocators.hpp"
#include "classifiers.hpp"
#include "smeagle/parameter.h"
#include "type_checker.hpp"

Expand Down Expand Up @@ -44,7 +45,7 @@ namespace smeagle::x86_64 {
// Get directionality from argument type
std::string getDirectionalityFromType(st::Type *paramType) {
// Remove any top-level typedef
// NB: We can't call `dedecorate` here as we need to keep
// NB: We can't call `unwrap_underlying_type` here as we need to keep
// any reference type for the call to `is_indirect` work.
paramType = remove_typedef(paramType);
auto dataClass = paramType->getDataClass();
Expand All @@ -67,85 +68,37 @@ namespace smeagle::x86_64 {
}

// Get register class given the argument type
std::pair<RegisterClass, RegisterClass> getRegisterClassFromType(st::Type *paramType) {
// Remove top-level typedef
paramType = remove_typedef(paramType);
classification getRegisterClassFromType(st::Type *paramType) {
auto [base_type, ptr_cnt] = unwrap_underlying_type(paramType);

// If it's a pointer, remove it
if (is_indirect(paramType->getDataClass())) {
paramType = deref(paramType);
paramType = remove_typedef(paramType);
if (auto *t = base_type->getScalarType()) {
return classify(t, ptr_cnt);
}

// Now get a string version of the type
std::string paramTypeString = paramType->getName();

// Signed and unsigned Bool,char,short,int,long,long long, and pointers
std::regex checkinteger("(int|char|short|long|pointer|bool)");

// Is it a constant?
std::regex checkconstant("(const)");

// float,double,_Decimal32,_Decimal64and__m64are in class SSE.
std::regex checksse("(double|decimal|float|Decimal|m64)");

// __float128 and__m128are split into two halves, least significant SSE,
// most significant in SSEUP.
std::regex checksseup("(m128|float128)");

// The 64-bit mantissa of arguments of type long double belongs to classX87
// the 16-bit exponent plus 6 bytes of padding belongs to class X87UP
std::regex checklongdouble("(long|double)");

// A variable of type complex long double is classified as type COMPLEX_X87
std::regex checkcomplexlong("(complex long double)");

// We will return a vector of classes
std::pair<RegisterClass, RegisterClass> regClasses;

// Does the type string match one of the types?
bool isinteger = (std::regex_search(paramTypeString, checkinteger));
bool isconst = (std::regex_search(paramTypeString, checkconstant));
bool issse = (std::regex_search(paramTypeString, checksse));
bool issseup = (std::regex_search(paramTypeString, checksseup));
bool islongdouble = (std::regex_search(paramTypeString, checklongdouble));
bool iscomplexlong = (std::regex_search(paramTypeString, checkcomplexlong));

// A parameter can have more than one class!
if (isconst) {
regClasses = {RegisterClass::MEMORY, RegisterClass::NO_CLASS};
}
if (issseup) {
regClasses = {RegisterClass::SSE, RegisterClass::SSEUP};
if (auto *t = base_type->getStructType()) {
// page 18 of abi document
return classify(t);
}
if (issse) {
regClasses = {RegisterClass::SSE, RegisterClass::NO_CLASS};
if (auto *t = base_type->getUnionType()) {
return classify(t);
}
if (isinteger) {
regClasses = {RegisterClass::INTEGER, RegisterClass::NO_CLASS};
if (auto *t = base_type->getArrayType()) {
return classify(t);
}
if (iscomplexlong) {
regClasses = {RegisterClass::COMPLEX_X87, RegisterClass::NO_CLASS};
if (auto *t = base_type->getEnumType()) {
return classify(t);
}
if (islongdouble) {
regClasses = {RegisterClass::X87, RegisterClass::X87UP};
if (auto *t = base_type->getFunctionType()) {
// This can only be a function pointer
return classify(t);
}

// The classification of aggregate (structures and arrays) and union types worksas follows:
// TODO need to look for struct / arrays?
// page 18 of abi document
return regClasses;
throw std::runtime_error{"Unknown parameter type" + paramType->getName()};
}

std::vector<parameter> parse_parameters(st::Symbol *symbol) {
// Get the name and type of the symbol
std::string sname = symbol->getMangledName();
st::Function *func = symbol->getFunction();
std::vector<st::localVar *> params;

// The function name looks equivalent to the symbol name
std::string fname = func->getName();

std::vector<parameter> typelocs;

// Get parameters with types and names
Expand All @@ -158,7 +111,7 @@ namespace smeagle::x86_64 {
st::Type *paramType = param->getType();

// Get register class based on type
std::pair<RegisterClass, RegisterClass> regClasses = getRegisterClassFromType(paramType);
classification c = getRegisterClassFromType(paramType);

// Get the directionality (export or import) given the type
std::string direction = getDirectionalityFromType(paramType);
Expand All @@ -169,8 +122,8 @@ namespace smeagle::x86_64 {
// Create a new typelocation to parse later
parameter p;
p.name = paramName;
p.type = paramType->getName();
p.location = allocator.getRegistersString(regClasses, paramType);
p.type = c.name;
p.location = allocator.getRegisterString(c.lo, c.hi, paramType);
p.direction = direction;
typelocs.push_back(p);
}
Expand Down

0 comments on commit 6c07fc4

Please sign in to comment.