Skip to content
Permalink
Browse files

Merge pull request #794 from ThePortlandGroup/nv_stage

Pull 2019-09-03T14-15 Recent NVIDIA Changes
  • Loading branch information...
sscalpone committed Sep 3, 2019
2 parents 7e96858 + 7a7aae1 commit a0a67c574f723ee490b1755299e2bad0e164a832
@@ -105,6 +105,7 @@ vdouble __attribute__((noinline)) atan_d_vec(vdouble const x) {

vdouble result = vsel_vd_vo_vd_vd(f_big, result_f_big, result_not_f_big);

result = vreinterpret_vd_vm(vreinterpret_vm_vd(result) | vreinterpret_vm_vd(ans_sgn));

return result;
}

@@ -92,6 +92,7 @@ double __attribute__((noinline)) atan_d_scalar(double x) {

double result_d = FMA(x2 * xReduced, poly, xReduced);

result_d = copysign(result_d, x);

return result_d;
}

@@ -72,5 +72,8 @@ vfloat __attribute__((noinline)) atan_vec(vfloat const x) {

vfloat result = vsel_vf_vo_vf_vf(x_big, result_x_big, result_not_x_big);

//Make sure atanf(-0.0f) = -0.0f:
result = vreinterpret_vf_vm(vor_vm_vm_vm(vreinterpret_vm_vf(result), vreinterpret_vm_vf(ans_sgn)));

return result;
}
@@ -76,5 +76,8 @@ float __attribute__((noinline)) atan_scalar(const float x) {

float result_d = FMAF(x2 * xReduced, poly, xReduced);

//This fixes atanf(-0.0) = -0.0, but doesn't slow down the code seemingly
result_d = copysignf(result_d, x);

return result_d;
}
@@ -52,7 +52,7 @@ MTHINTRIN(atan , sv8m , avxfma4 , __fs_atan_8_mn , __rs_atan_8_mn
MTHINTRIN(atan , dv4m , avxfma4 , __fd_atan_4_mn , __rd_atan_4_mn , __pd_atan_4_mn ,__math_dispatch_error)

MTHINTRIN(atan , ss , avx2 , __fs_atan_1_avx2 , __fs_atan_1_avx2 , __mth_i_atan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , ds , avx2 , __fd_atan_1_avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , ds , avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , sv4 , avx2 , __fs_atan_4_avx2 , __fs_atan_4_avx2 , __gs_atan_4_p ,__math_dispatch_error)
MTHINTRIN(atan , dv2 , avx2 , __fd_atan_2_avx2 , __fd_atan_2_avx2 , __gd_atan_2_p ,__math_dispatch_error)
MTHINTRIN(atan , sv8 , avx2 , __fs_atan_8_avx2 , __fs_atan_8_avx2 , __gs_atan_8_p ,__math_dispatch_error)
@@ -63,7 +63,7 @@ MTHINTRIN(atan , sv8m , avx2 , __fs_atan_8_mn , __rs_atan_8_mn
MTHINTRIN(atan , dv4m , avx2 , __fd_atan_4_mn , __rd_atan_4_mn , __pd_atan_4_mn ,__math_dispatch_error)

MTHINTRIN(atan , ss , avx512knl , __fs_atan_1_avx2 , __fs_atan_1_avx2 , __mth_i_atan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , ds , avx512knl , __fd_atan_1_avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , ds , avx512knl , __fd_atan_1_avx2 , __mth_i_datan_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , sv4 , avx512knl , __fs_atan_4_avx2 , __fs_atan_4_avx2 , __gs_atan_4_p ,__math_dispatch_error)
MTHINTRIN(atan , dv2 , avx512knl , __fd_atan_2_avx2 , __fd_atan_2_avx2 , __gd_atan_2_p ,__math_dispatch_error)
MTHINTRIN(atan , sv8 , avx512knl , __fs_atan_8_avx2 , __fs_atan_8_avx2 , __gs_atan_8_p ,__math_dispatch_error)
@@ -78,7 +78,7 @@ MTHINTRIN(atan , sv16m, avx512knl , __fs_atan_16_mn , __rs_atan_16_mn
MTHINTRIN(atan , dv8m , avx512knl , __fd_atan_8_mn , __rd_atan_8_mn , __pd_atan_8_mn ,__math_dispatch_error)

MTHINTRIN(atan , ss , avx512 , __fs_atan_1_avx2 , __fs_atan_1_avx2 , __mth_i_atan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , ds , avx512 , __fd_atan_1_avx2 , __fd_atan_1_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , ds , avx512 , __fd_atan_1_avx2 , __mth_i_datan_avx2 , __mth_i_datan_avx2 ,__math_dispatch_error)
MTHINTRIN(atan , sv4 , avx512 , __fs_atan_4_avx2 , __fs_atan_4_avx2 , __gs_atan_4_p ,__math_dispatch_error)
MTHINTRIN(atan , dv2 , avx512 , __fd_atan_2_avx2 , __fd_atan_2_avx2 , __gd_atan_2_p ,__math_dispatch_error)
MTHINTRIN(atan , sv8 , avx512 , __fs_atan_8_avx2 , __fs_atan_8_avx2 , __gs_atan_8_p ,__math_dispatch_error)
@@ -2748,6 +2748,8 @@ semant1(int rednum, SST *top)
* <end stmt> ::= <END stmt> |
*/
case END_STMT1:
if (gbl.rutype == RU_SUBR || gbl.rutype == RU_FUNC)
defer_arg_chk(SPTR_NULL, SPTR_NULL, SPTR_NULL, 0, 0, true);
if (sem.interface && !gbl.rutype)
error(310, 3, gbl.lineno, "Missing ENDINTERFACE statement", CNULL);
else if (sem.which_pass)
@@ -2798,6 +2800,7 @@ semant1(int rednum, SST *top)
* <end stmt> ::= ENDFUNCTION <opt ident> |
*/
case END_STMT3:
defer_arg_chk(SPTR_NULL, SPTR_NULL, SPTR_NULL, 0, 0, true);
submod_proc_endfunc:
fix_iface(gbl.currsub);
if (sem.which_pass && !sem.interface) {
@@ -2889,6 +2892,7 @@ semant1(int rednum, SST *top)
* <end stmt> ::= ENDSUBROUTINE <opt ident> |
*/
case END_STMT6:
defer_arg_chk(SPTR_NULL, SPTR_NULL, SPTR_NULL, 0, 0, true);
fix_iface(gbl.currsub);
if (sem.which_pass && !sem.interface) {
fix_class_args(gbl.currsub);
@@ -1619,6 +1619,8 @@ LOGICAL chk_arguments(int, int, ITEM *, char *, int, int, int, int *);
LOGICAL ignore_tkr(int, int);
LOGICAL ignore_tkr_all(int);
int iface_intrinsic(int);
void defer_arg_chk(SPTR formal, SPTR actual, SPTR subprog,
cmp_interface_flags, int lineno, bool performChk);
/* end semfunc2.c */

/* semgnr.c */
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1994-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 1994-2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -1986,6 +1986,84 @@ sum_scatter_args(ITEM *list, int cnt)

/*---------------------------------------------------------------------*/

/** \brief Process information for deferred interface argument checking in
* in the compat_arg_lists() function below.
*
* If the performChk argument is false, then we save the information
* (defer the check). If performChk argument is true, then we perform
* the argument checking. Note: If performChk is true, then the other
* arguments are ignored.
*
* \param formal is the symbol table pointer of the dummy/formal argument.
* \param actual is the symbol table pointer of the actual argument.
* \param flags are comparison flags that enable/disable certain checks
* \param lineno is the source line number for the deferred check
* \param performChk is false to defer checks and true to perform the checks.
*/
void
defer_arg_chk(SPTR formal, SPTR actual, SPTR subprog,
cmp_interface_flags flags, int lineno, bool performChk)
{

typedef struct chkList {
char *formal;
SPTR actual;
char *subprog;
cmp_interface_flags flags;
int lineno;
struct chkList * next;
}CHKLIST;

static CHKLIST *list = NULL;
CHKLIST *ptr, *prev;

if (!performChk) {
/* Add a deferred check to the list */
NEW(ptr, CHKLIST, sizeof(CHKLIST));
NEW(ptr->formal, char, strlen(SYMNAME(formal))+1);
strcpy(ptr->formal, SYMNAME(formal));
ptr->actual = actual;
NEW(ptr->subprog, char, strlen(SYMNAME(subprog))+1);
strcpy(ptr->subprog, SYMNAME(subprog));
ptr->flags = flags;
ptr->lineno = lineno;
ptr->next = list;
list = ptr;
} else if (sem.which_pass == 1) {
for(prev = ptr = list; ptr != NULL; ) {
if (strcmp(SYMNAME(gbl.currsub),ptr->subprog) == 0) {
/* perform argument check */
formal = getsym(ptr->formal, strlen(ptr->formal));
if (!compatible_characteristics(formal, ptr->actual, ptr->flags)) {
char details[1000];
sprintf(details, "- arguments of %s and %s do not agree",
SYMNAME(ptr->actual), ptr->formal);
error(74, 3, ptr->lineno, ptr->subprog, details);
}
if (prev == ptr) {
prev = ptr->next;
FREE(ptr->formal);
FREE(ptr->subprog);
FREE(ptr);
list = ptr = prev;
} else {
prev->next = ptr->next;
FREE(ptr->formal);
FREE(ptr->subprog);
FREE(ptr);
ptr = prev->next;
}
} else {
prev = ptr;
ptr = ptr->next;
}
}
}

}



/** \brief For arguments that are subprograms, check that their argument lists
* are compatible.
*/
@@ -1995,23 +2073,40 @@ compat_arg_lists(int formal, int actual)
int paramct;
int fdscptr, adscptr;
int i;
bool func_chk;
cmp_interface_flags flags;

/* TODO: Not checking certain cases for now. */
if (STYPEG(actual) == ST_INTRIN || STYPEG(actual) == ST_GENERIC)
return TRUE;

if (STYPEG(formal) == ST_PROC && STYPEG(actual) == ST_PROC && FVALG(formal) &&
FVALG(actual) &&
!compatible_characteristics(formal, actual,
(IGNORE_ARG_NAMES | RELAX_STYPE_CHK |
RELAX_POINTER_CHK | RELAX_PURE_CHK_2))) {
flags = (IGNORE_ARG_NAMES | RELAX_STYPE_CHK | RELAX_POINTER_CHK |
RELAX_PURE_CHK_2);
func_chk = (STYPEG(formal) == ST_PROC && STYPEG(actual) == ST_PROC &&
FVALG(formal) && FVALG(actual));

if (func_chk && resolve_sym_aliases(SCOPEG(SCOPEG(formal))) == gbl.currsub){
flags |= DEFER_IFACE_CHK;
}

if (func_chk && !compatible_characteristics(formal, actual, flags)) {
return FALSE;
}

if (flags & DEFER_IFACE_CHK) {
/* We are calling an internal subprogram. We need to defer the
* check on the procedure dummy argument until we have seen the
* internal subprogram.
*/
defer_arg_chk(formal, actual, SCOPEG(formal), (flags ^ DEFER_IFACE_CHK),
gbl.lineno, false);
}

fdscptr = DPDSCG(formal);
adscptr = DPDSCG(actual);
if (fdscptr == 0 || adscptr == 0)
if (fdscptr == 0 || adscptr == 0 || (flags & DEFER_IFACE_CHK)) {
return TRUE; /* No dummy parameter descriptor; can't check. */
}
paramct = PARAMCTG(formal);
if (PARAMCTG(actual) != paramct)
return FALSE;
@@ -8521,10 +8521,10 @@ eval_merge(ACL *arg, DTYPE dtype)
return result;
}

/* Compare two constant ACLs. Return x > y or x < y depending on want_greater.
/* Compare two constant ACLs. Return x > y or x < y depending on want_max.
*/
static bool
cmp_acl(DTYPE dtype, ACL *x, ACL *y, bool want_greater, bool back)
cmp_acl(DTYPE dtype, ACL *x, ACL *y, bool want_max, bool back)
{
int cmp;
switch (DTY(dtype)) {
@@ -8535,10 +8535,12 @@ cmp_acl(DTYPE dtype, ACL *x, ACL *y, bool want_greater, bool back)
case TY_BINT:
case TY_SINT:
case TY_INT:
if (back && want_greater) {
cmp = x->conval >= y->conval ? 1 : -1;
if (x->conval == y->conval) {
cmp = 0;
} else if (x->conval > y->conval) {
cmp = 1;
} else {
cmp = x->conval > y->conval ? 1 : -1;
cmp = -1;
}
break;
case TY_REAL:
@@ -8553,9 +8555,9 @@ cmp_acl(DTYPE dtype, ACL *x, ACL *y, bool want_greater, bool back)
return false;
}
if (back) {
return want_greater ? cmp >= 0 : cmp <= 0;
return want_max ? cmp >= 0 : cmp <= 0;
} else {
return want_greater ? cmp > 0 : cmp < 0;
return want_max ? cmp > 0 : cmp < 0;
}
}

@@ -8730,6 +8732,7 @@ do_eval_minval_or_maxval(INDEX *index, DTYPE elem_dt, DTYPE loc_dt, ACL *elems,
if (!want_val) {
for (i = 0; i < locs_size; i++) {
ACL *elem = GET_ACL(15);
BZERO(elem, ACL, 1);
elem->id = AC_CONST;
elem->dtype = loc_dt;
elem->is_const = true;
@@ -2608,7 +2608,8 @@ compatible_characteristics(int psptr, int psptr2, cmp_interface_flags flag)
return false;
}

if (STYPEG(psptr) == ST_PROC && STYPEG(psptr2) == ST_PROC) {
if (STYPEG(psptr) == ST_PROC && STYPEG(psptr2) == ST_PROC &&
(flag & DEFER_IFACE_CHK) == 0) {
if (!cmp_interfaces_strict(psptr, psptr2, (flag | CMP_OPTARG))) {
return false;
}
@@ -2718,15 +2719,15 @@ cmp_interfaces_strict(SPTR sym1, SPTR sym2, cmp_interface_flags flag)

for (j = i = 0; i < paramct; ++i) {
psptr = aux.dpdsc_base[dpdsc + i];
if (CCSYMG(psptr)) {
if (CCSYMG(psptr) && CLASSG(psptr)) {
++j;
}
}
paramct -= j;

for (j = i = 0; i < paramct2; ++i) {
psptr2 = aux.dpdsc_base[dpdsc2 + i];
if (CCSYMG(psptr2)) {
if (CCSYMG(psptr2) && CLASSG(psptr2)) {
++j;
}
}
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2003-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2003-2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -473,9 +473,11 @@ typedef enum CMP_INTERFACE_FLAGS {
cmp_interfaces_strict() function */
RELAX_PURE_CHK_2 = 0x80, /**< relax pure check on argument #2 of
cmp_interfaces_strict() function */
CMP_SUBMOD_IFACE = 0x100 /**< make sure submodule interface of a procedure
CMP_SUBMOD_IFACE = 0x100, /**< make sure submodule interface of a procedure
defined by a separate module subprogram's
definition matches the declaration */
DEFER_IFACE_CHK = 0x200 /**< defer interface check for procedure dummy
arguments. */
} cmp_interface_flags;

bool compatible_characteristics(int psptr, int psptr2,
@@ -5282,6 +5282,8 @@ but the innermost loop ones.
.XB 0x1000000:
Only find ACIV induction variables for innermost loops.
reserved
.XB 0x2000000:
Assume that complex arrays on GPU are aligned as follows: complex:8-byte dcmplx:16-byte

.XF "202:"
Set number of bigbuffers for multi-buffer memory management for AMD GPU.
@@ -12555,10 +12555,13 @@ INLINE static OPERAND *
cons_expression_metadata_operand(LL_Type *llTy)
{
// FIXME: we don't need to always do this, do we? do a type check here
LL_DebugInfo *di = cpu_llvm_module->debug_info;
unsigned v = lldbg_encode_expression_arg(LL_DW_OP_deref, 0);
LL_MDRef exprMD = lldbg_emit_expression_mdnode(di, 1, v);
return make_mdref_op(exprMD);
if (llTy->data_type == LL_PTR) {
LL_DebugInfo *di = cpu_llvm_module->debug_info;
unsigned v = lldbg_encode_expression_arg(LL_DW_OP_deref, 0);
LL_MDRef exprMD = lldbg_emit_expression_mdnode(di, 1, v);
return make_mdref_op(exprMD);
}
return NULL;
}

INLINE static bool

0 comments on commit a0a67c5

Please sign in to comment.
You can’t perform that action at this time.