Skip to content

Commit

Permalink
[X86] Sync AESENC/DEC Key Locker builtins with gcc.
Browse files Browse the repository at this point in the history
For the wide builtins, pass a single input and output pointer to
the builtins. Emit the GEPs and input loads from CGBuiltin.
  • Loading branch information
topperc committed Oct 4, 2020
1 parent 230c57b commit a02b449
Show file tree
Hide file tree
Showing 5 changed files with 587 additions and 153 deletions.
26 changes: 10 additions & 16 deletions clang/include/clang/Basic/BuiltinsX86.def
Expand Up @@ -1902,22 +1902,16 @@ TARGET_BUILTIN(__builtin_ia32_enqcmds, "Ucv*vC*", "n", "enqcmd")

// KEY LOCKER
TARGET_BUILTIN(__builtin_ia32_loadiwkey, "vV2OiV2OiV2OiUi", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_encodekey128_u32,
"UiUiV2Oiv*", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_encodekey256_u32,
"UiUiV2OiV2Oiv*", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_aesenc128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_aesenc256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_aesdec128kl, "UcV2Oi*V2OivC*", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_aesdec256kl, "UcV2Oi*V2OivC*", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_aesencwide128kl,
"UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl")
TARGET_BUILTIN(__builtin_ia32_aesencwide256kl,
"UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl")
TARGET_BUILTIN(__builtin_ia32_aesdecwide128kl,
"UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl")
TARGET_BUILTIN(__builtin_ia32_aesdecwide256kl,
"UcvC*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2Oi*V2OiV2OiV2OiV2OiV2OiV2OiV2OiV2Oi", "nV:128:", "kl,widekl")
TARGET_BUILTIN(__builtin_ia32_encodekey128_u32, "UiUiV2Oiv*", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_encodekey256_u32, "UiUiV2OiV2Oiv*", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_aesenc128kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_aesenc256kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_aesdec128kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_aesdec256kl_u8, "UcV2Oi*V2OivC*", "nV:128:", "kl")
TARGET_BUILTIN(__builtin_ia32_aesencwide128kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
TARGET_BUILTIN(__builtin_ia32_aesencwide256kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
TARGET_BUILTIN(__builtin_ia32_aesdecwide128kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")
TARGET_BUILTIN(__builtin_ia32_aesdecwide256kl_u8, "UcV2Oi*V2OiC*vC*", "nV:128:", "kl,widekl")

// SERIALIZE
TARGET_BUILTIN(__builtin_ia32_serialize, "v", "n", "serialize")
Expand Down
114 changes: 53 additions & 61 deletions clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -14070,75 +14070,67 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,

return Builder.CreateExtractValue(Call, 0);
}
case X86::BI__builtin_ia32_aesenc128kl:
case X86::BI__builtin_ia32_aesdec128kl:
case X86::BI__builtin_ia32_aesenc256kl:
case X86::BI__builtin_ia32_aesdec256kl:
case X86::BI__builtin_ia32_aesencwide128kl:
case X86::BI__builtin_ia32_aesdecwide128kl:
case X86::BI__builtin_ia32_aesencwide256kl:
case X86::BI__builtin_ia32_aesdecwide256kl: {
int FirstReturnOp;
int ResultCount;
SmallVector<Value*, 9> InOps;
unsigned ID;

case X86::BI__builtin_ia32_aesenc128kl_u8:
case X86::BI__builtin_ia32_aesdec128kl_u8:
case X86::BI__builtin_ia32_aesenc256kl_u8:
case X86::BI__builtin_ia32_aesdec256kl_u8: {
Intrinsic::ID IID;
switch (BuiltinID) {
default: llvm_unreachable("Unsupported intrinsic!");
case X86::BI__builtin_ia32_aesenc128kl:
case X86::BI__builtin_ia32_aesdec128kl:
case X86::BI__builtin_ia32_aesenc256kl:
case X86::BI__builtin_ia32_aesdec256kl: {
InOps = {Ops[1], Ops[2]};
FirstReturnOp = 0;
ResultCount = 1;
switch (BuiltinID) {
case X86::BI__builtin_ia32_aesenc128kl:
ID = Intrinsic::x86_aesenc128kl;
break;
case X86::BI__builtin_ia32_aesdec128kl:
ID = Intrinsic::x86_aesdec128kl;
break;
case X86::BI__builtin_ia32_aesenc256kl:
ID = Intrinsic::x86_aesenc256kl;
break;
case X86::BI__builtin_ia32_aesdec256kl:
ID = Intrinsic::x86_aesdec256kl;
break;
}
default: llvm_unreachable("Unexpected builtin");
case X86::BI__builtin_ia32_aesenc128kl_u8:
IID = Intrinsic::x86_aesenc128kl;
break;
case X86::BI__builtin_ia32_aesdec128kl_u8:
IID = Intrinsic::x86_aesdec128kl;
break;
case X86::BI__builtin_ia32_aesenc256kl_u8:
IID = Intrinsic::x86_aesenc256kl;
break;
case X86::BI__builtin_ia32_aesdec256kl_u8:
IID = Intrinsic::x86_aesdec256kl;
break;
}
case X86::BI__builtin_ia32_aesencwide128kl:
case X86::BI__builtin_ia32_aesdecwide128kl:
case X86::BI__builtin_ia32_aesencwide256kl:
case X86::BI__builtin_ia32_aesdecwide256kl: {
InOps = {Ops[0], Ops[9], Ops[10], Ops[11], Ops[12], Ops[13],
Ops[14], Ops[15], Ops[16]};
FirstReturnOp = 1;
ResultCount = 8;
switch (BuiltinID) {
case X86::BI__builtin_ia32_aesencwide128kl:
ID = Intrinsic::x86_aesencwide128kl;
break;
case X86::BI__builtin_ia32_aesdecwide128kl:
ID = Intrinsic::x86_aesdecwide128kl;
break;
case X86::BI__builtin_ia32_aesencwide256kl:
ID = Intrinsic::x86_aesencwide256kl;
break;
case X86::BI__builtin_ia32_aesdecwide256kl:
ID = Intrinsic::x86_aesdecwide256kl;
break;
}

Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});

Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
Ops[0]);

return Builder.CreateExtractValue(Call, 0);
}
case X86::BI__builtin_ia32_aesencwide128kl_u8:
case X86::BI__builtin_ia32_aesdecwide128kl_u8:
case X86::BI__builtin_ia32_aesencwide256kl_u8:
case X86::BI__builtin_ia32_aesdecwide256kl_u8: {
Intrinsic::ID IID;
switch (BuiltinID) {
case X86::BI__builtin_ia32_aesencwide128kl_u8:
IID = Intrinsic::x86_aesencwide128kl;
break;
case X86::BI__builtin_ia32_aesdecwide128kl_u8:
IID = Intrinsic::x86_aesdecwide128kl;
break;
case X86::BI__builtin_ia32_aesencwide256kl_u8:
IID = Intrinsic::x86_aesencwide256kl;
break;
case X86::BI__builtin_ia32_aesdecwide256kl_u8:
IID = Intrinsic::x86_aesdecwide256kl;
break;
}

Value *InOps[9];
InOps[0] = Ops[2];
for (int i = 0; i != 8; ++i) {
Value *Ptr = Builder.CreateConstGEP1_32(Ops[1], i);
InOps[i + 1] = Builder.CreateAlignedLoad(Ptr, Align(16));
}

Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), InOps);
Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);

for (int i = 0; i < ResultCount; ++i) {
Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, i + 1),
Ops[FirstReturnOp + i]);
for (int i = 0; i != 8; ++i) {
Value *Extract = Builder.CreateExtractValue(Call, i + 1);
Value *Ptr = Builder.CreateConstGEP1_32(Ops[0], i);
Builder.CreateAlignedStore(Extract, Ptr, Align(16));
}

return Builder.CreateExtractValue(Call, 0);
Expand Down
84 changes: 12 additions & 72 deletions clang/lib/Headers/keylockerintrin.h
Expand Up @@ -211,7 +211,7 @@ _mm_encodekey256_u32(unsigned int __htype, __m128i __key_lo, __m128i __key_hi,
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
return __builtin_ia32_aesenc128kl(__odata, __idata, __h);
return __builtin_ia32_aesenc128kl_u8((__v2di *)__odata, (__v2di)__idata, __h);
}

/// The AESENC256KL performs 14 rounds of AES to encrypt the __idata using
Expand Down Expand Up @@ -248,7 +248,7 @@ _mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
return __builtin_ia32_aesenc256kl(__odata, __idata, __h);
return __builtin_ia32_aesenc256kl_u8((__v2di *)__odata, (__v2di)__idata, __h);
}

/// The AESDEC128KL performs 10 rounds of AES to decrypt the __idata using
Expand Down Expand Up @@ -285,7 +285,7 @@ _mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
return __builtin_ia32_aesdec128kl(__odata, __idata, __h);
return __builtin_ia32_aesdec128kl_u8((__v2di *)__odata, (__v2di)__idata, __h);
}

/// The AESDEC256KL performs 10 rounds of AES to decrypt the __idata using
Expand Down Expand Up @@ -322,7 +322,7 @@ _mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
return __builtin_ia32_aesdec256kl(__odata, __idata, __h);
return __builtin_ia32_aesdec256kl_u8((__v2di *)__odata, (__v2di)__idata, __h);
}

#undef __DEFAULT_FN_ATTRS
Expand Down Expand Up @@ -374,23 +374,8 @@ _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) {
return __builtin_ia32_aesencwide128kl(__h,
__odata,
__odata + 1,
__odata + 2,
__odata + 3,
__odata + 4,
__odata + 5,
__odata + 6,
__odata + 7,
__idata[0],
__idata[1],
__idata[2],
__idata[3],
__idata[4],
__idata[5],
__idata[6],
__idata[7]);
return __builtin_ia32_aesencwide128kl_u8((__v2di *)__odata,
(const __v2di *)__idata, __h);
}

/// Encrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle
Expand Down Expand Up @@ -429,23 +414,8 @@ _mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void*
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) {
return __builtin_ia32_aesencwide256kl(__h,
__odata,
__odata + 1,
__odata + 2,
__odata + 3,
__odata + 4,
__odata + 5,
__odata + 6,
__odata + 7,
__idata[0],
__idata[1],
__idata[2],
__idata[3],
__idata[4],
__idata[5],
__idata[6],
__idata[7]);
return __builtin_ia32_aesencwide256kl_u8((__v2di *)__odata,
(const __v2di *)__idata, __h);
}

/// Decrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle
Expand Down Expand Up @@ -484,23 +454,8 @@ _mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void*
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) {
return __builtin_ia32_aesdecwide128kl(__h,
__odata,
__odata + 1,
__odata + 2,
__odata + 3,
__odata + 4,
__odata + 5,
__odata + 6,
__odata + 7,
__idata[0],
__idata[1],
__idata[2],
__idata[3],
__idata[4],
__idata[5],
__idata[6],
__idata[7]);
return __builtin_ia32_aesdecwide128kl_u8((__v2di *)__odata,
(const __v2di *)__idata, __h);
}

/// Decrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle
Expand Down Expand Up @@ -539,23 +494,8 @@ _mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void*
/// \endoperation
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) {
return __builtin_ia32_aesdecwide256kl(__h,
__odata,
__odata + 1,
__odata + 2,
__odata + 3,
__odata + 4,
__odata + 5,
__odata + 6,
__odata + 7,
__idata[0],
__idata[1],
__idata[2],
__idata[3],
__idata[4],
__idata[5],
__idata[6],
__idata[7]);
return __builtin_ia32_aesdecwide256kl_u8((__v2di *)__odata,
(const __v2di *)__idata, __h);
}

#undef __DEFAULT_FN_ATTRS
Expand Down

0 comments on commit a02b449

Please sign in to comment.