Skip to content
Permalink
Browse files

Merge pull request #712 from ThePortlandGroup/nv_stage

Pull 2019-05-03T14-12 Recent NVIDIA Changes
  • Loading branch information...
sscalpone committed May 3, 2019
2 parents 2039fc8 + f718fa7 commit d75521f80dbf9107f81de21e26734b454af8c70e
@@ -14,11 +14,42 @@
.\" * limitations under the License.
.\" *
.\" */
.MS W 972 "The directive #pragma acc mirror is deprecated; use #pragma acc declare create instead"
.MS W 973 "The directive #pragma acc reflected is deprecated; use #pragma acc declare present"
.MS W 974 "The directive #pragma acc region is deprecated; use #pragma acc kernels instead"
.MS W 975 "The directive #pragma acc data region is deprecated; use #pragma acc data instead"
.MS W 976 "The directive #pragma acc for is deprecated; use #pragma acc loop instead"
.MS W 977 "The directive #pragma acc for kernel is deprecated; use #pragma acc loop instead"
.MS W 978 "The clause parallel is deprecated; use clause gang instead"
.MS W 979 "The directive #pragma acc region for is deprecated; use #pragma acc kernels loop instead"
.MS W 980 "The directive #pragma acc region loop is deprecated; use #pragma acc kernels loop instead"
.MS W 981 "The directive #pragma acc kernels for is deprecated; use #pragma acc kernels loop instead"
.MS W 982 "The directive #pragma acc deviceptr is deprecated; use #pragma acc declare deviceptr instead"
.MS W 983 "The directive #pragma acc parallel for is deprecated; use #pragma acc parallel loop instead"
.MS W 984 "The directive #pragma acc scalar region is deprecated; use #pragma acc serial instead"
.MS W 985 "The clause local is deprecated; use clause create instead"
.MS W 986 "The clause cache is deprecated; use directive #pragma acc cache instead"
.MS W 987 "The clause update host is deprecated; use separate update host directive after the region instead"
.MS W 988 "The clause update device is deprecated; use separate update device directive before the region instead"
.MS W 989 "The clause update in is deprecated; use separate update device directeve before the region instead"
.MS W 990 "The clause update out is deprecated; use update self instead"
.MS W 991 "The clause pnot is deprecated; use no_create instead"
.MS W 992 "The clause updatein is deprecated; use update device instead"
.MS W 993 "The clause updateout is deprecated; use update self instead"
.MS W 994 "The directive #pragma acc copy is deprecated; use #pragma acc declare copy instead"
.MS W 995 "The directive #pragma acc copyin is deprecated; use #pragma acc declare copyin instead"
.MS W 996 "The directive #pragma acc copyout is deprecated; use #pragma acc declare copyout instead"
.MS W 997 "The directive #pragma acc device_resident is deprecated; use #pragma acc declare device_resident instead"
.MS W 998 "The directive #pragma acc for host is deprecated; no OpenACC equivalent"
.MS W 999 "The directive #pragma acc loop kernel is deprecated; no OpenACC equivalent"
.MS S 1000 "Call in OpenACC region to procedure '$' which has no acc routine information"
.MS S 1001 "All selected compute capabilities were disabled (see -Minfo)"
.MS S 1002 "Reduction type not supported for this variable datatype - $"
.MS W 1003 "Lambda capture by reference not supported in Accellerated region"
.MS W 1004 "Lambda capture 'this' by reference not supported in Accellerated region"
.MS W 1005 "The clause unroll is deprecated; no OpenACC equivalent"
.MS W 1006 "The clause mirror is deprecated; no OpenACC equivalent"
.MS W 1007 "The clause host is deprecated; no OpenACC equivalent"
.\" Skipping ahead to 1011 to keep the same numbers as the fortran front-end.
.MS S 1011 "Device variable cannot be THREADPRIVATE - $"
.MS S 1012 "Threadprivate variables are not supported in acc routine - $"
@@ -42,8 +73,14 @@
.MS F 1028 "The variable $ doesn't have predefined policy $ available"
.MS F 1029 "The variable $ using policy $ is not a structure-based type"
.MS F 1030 "Policy motion $ is not allowed in $ directive"
.MS W 1031 "The directive #pragma acc create is deprecated; use #pragma acc declare create instead"
.MS W 1032 "The directive #pragma acc present is deprecated; use #pragma acc declare present instead"
.MS W 1033 "The directive #pragma acc link is deprecated; use #pragma acc declare link instead"
.MS F 1034 "Only signed/unsigned 32 bits and 64 bits integer variables are allowed in bound expression. $ is is not such variable"
.MS F 1035 "Only integer sibling members and global variables are allowed in bound expression. $ is is neither of them."
.MS F 1036 "No global variable named $ has been defined"
.MS F 1037 "Default clause can only contain include and exclude keyword."
.MS W 1038 "COPY clause on a dummy variable with INTENT(IN) attribute demoted to COPYIN - $"
.MS S 1039 "OpenACC data clause expected after $"
.MS S 1040 "OpenACC $ data clause may not follow a device_type clause."
.MS S 1041 "OpenACC $ clause may not follow a device_type clause."
@@ -1523,3 +1523,6 @@ A DO CONCURRENT variable with LOCAL_INIT locality must have a host variable of t
.MS S 1210 "Parent module $ must declare a separate module procedure."
.MS S 1211 "Submodule's ancestor module $ must be a nonintrinsic module."
.MS S 1212 "$ was previously declared to be a module procedure."
.MS S 1213 "OpenACC $ data clause may not follow a device_type clause."
.MS S 1214 "PGI Accelerator $ data clause may not follow a device_type clause."
.MS S 1215 "OpenACC data clause expected after $."
@@ -239,7 +239,7 @@ dinit_data(VAR *ivl, ACL *ict, int dtype)
assert(dtype, "dinit_data: no object to initialize", 0, 2);
member = DTY(dtype + 1);
/* for type extension */
if (PARENTG(DTY(dtype + 3))) {
if (PARENTG(DTY(dtype + 3)) && get_seen_contains()) {
member = SYMLKG(member);
}
}
@@ -2530,8 +2530,11 @@ handle_seq_section(int entry, int arr, int loc, int std, int *retval,
} else {
/* right now, no members can be distributed anyway */
arrayalign = ALIGNG(arraysptr);
if (POINTERG(arraysptr) && !arrayalign)
is_seq_pointer = TRUE;
if (POINTERG(arraysptr)) {
is_pointer = TRUE;
if (!arrayalign)
is_seq_pointer = TRUE;
}
}
break;
default:
@@ -42,7 +42,7 @@
/* contents of this file: */

static void add_clause(int, LOGICAL);
static void clause_errchk(BIGINT64, char *);
static bool clause_errchk(BIGINT64, char *);
static void accel_sched_errchk();
static void accel_nosched_errchk();
static void accel_pragmagen(int, int, int);
@@ -5807,14 +5807,19 @@ add_clause(int clause, LOGICAL one_only)
CL_PRESENT(clause) = 1;
}

static void
static bool
clause_errchk(BIGINT64 bt, char *dirname)
{
int i;
bool any = false;

for (i = 0; i < CL_MAXV; i++)
if (CL_PRESENT(i) && !(CL_STMT(i) & bt))
error(533, 3, gbl.lineno, CL_NAME(i), dirname);
if (CL_PRESENT(i)) {
any = true;
if (!(CL_STMT(i) & bt))
error(533, 3, gbl.lineno, CL_NAME(i), dirname);
}
return any;
}

static void
@@ -1317,12 +1317,12 @@ definition on only one side of the conditional.
LLVM - disable extended conditional vectorization in all loops where the
predicate size is different than the computational size.
.XB 0x20:
LLVM - don't allow vectorization if multiple lhs data type sizes exist
LLVM - allow vectorization if multiple lhs data type sizes exist
within the inner loop
.XB 0x40:
LLVM - don't allow vectorization of DBLE ili
.XB 0x80:
LLVM - don't allow vectorization of DFLOAT ili
LLVM - don't allow vectorization of DFLOAT/IKMV ili

.XF "37":
.XB 0x01:
@@ -5274,6 +5274,11 @@ Don't demote address KMUL operations
.XB 0x200000:
in LLVM output, don't output the instruction info (lilix index, opcode)
.XB 0x400000:
.XB 0x800000:
If the number of ACIV induction variables is too large, kill off all
but the innermost loop ones.
.XB 0x1000000:
Only find ACIV induction variables for innermost loops.
reserved

.XF "202:"
@@ -2285,7 +2285,7 @@ exp_end(ILM *ilmp, int curilm, bool is_func)
}

/* emit any mp initialization for the function & its entries */
exp_mp_func_prologue();
exp_mp_func_prologue(true);

if (!XBIT(121, 0x01) || /* -Mnoframe isn't specified */
(flg.debug && !XBIT(123, 0x400)) || /* -debug is set */
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -3099,6 +3099,9 @@ exp_mp_atomic_capture(ILM *ilmp)
cpt.rhs[cnt] = ILI_OF(ILM_OPND(ilmp, 2));
cpt.nme[cnt] = nme[LHS_IDX] = NME_OF(ILM_OPND(ilmp, 1));
cpt.dtype[cnt] = dt_nme(nme[LHS_IDX]);
if (!cpt.dtype[cnt]) {
cpt.dtype[cnt] = get_dtype_from_ilm(ilmp);
}
cpt.mem_order[cnt] = ILM_OPND(ilmp, 3);

/* Don't use CSE for LHS */
@@ -2966,7 +2966,7 @@ clear_tplnk(void)
/** \brief Generate any mp-specific prologue for a function.
*/
void
exp_mp_func_prologue(void)
exp_mp_func_prologue(bool process_tp)
{
SPTR sym;
int ili, tmpthread;
@@ -2980,7 +2980,7 @@ exp_mp_func_prologue(void)
if (CUDAG(GBL_CURRFUNC) == CUDA_GLOBAL || CUDAG(GBL_CURRFUNC) == CUDA_DEVICE)
return;
#endif
if (1) {
if (process_tp) {
for (sym = gbl.threadprivate; sym > NOSYM; sym = TPLNKG(sym)) {
/* For each threadprivate common, must 'declare' the threads'
* copies by calling:
@@ -99,7 +99,7 @@ void clear_tplnk(void);
/**
\brief ...
*/
void exp_mp_func_prologue(void);
void exp_mp_func_prologue(bool);

/**
\brief ...
@@ -2724,7 +2724,7 @@ read_symbol(void)
NEW(agototab, int, agotosz);
agotomax = 0;
}
NEED(agoto, agototab, int, agotosz, agotosz + 32);
NEED(agoto, agototab, int, agotosz, agoto + 32);
agototab[agoto - 1] = newsptr;
if (agoto > agotomax)
agotomax = agoto;
@@ -1126,15 +1126,27 @@ respectively.


.IL KIMV krlnk
Move an Integer64 register to a integer register with truncation.
Used also for K to UI, UK to I, and UK to UI conversions.
Bug fix - don't treat this as a move for purposes of register allocation.
Signed or unsigned integer*8 to signed or unsigned integer*4 type
conversion with truncation. Note, we mustn't treat this as a move for
the purposes of register allocation.
.AT move null ir cse
.CG "mov" 'l'
.SI direct lat(1)
.SI ld direct lat(3)
.SI st direct lat(3)

.IL PKIMVX xmm xmm
Truncate 'N' packed signed or unsigned integer*8 values from 'xmm1'
(an xmm, ymm or zmm register) into 'N' packed signed or unsigned
integer*4 values in 'xmm2' (an xmm or ymm register), where 'N' is 2, 4
or 8 if 'xmm1' is an xmm, ymm or zmm register respectively. This
instruction requires support for AVX-512F and AVX-512VL, and the
source operand must be a register (unusually for AVX instructions the
destination can be a memory operand!).
.AT other null trm
.CG terminal "vpmovqd" avx_only avx_special


.IL IAMV irlnk
Move an integer reg into an address reg.
Sign extension required?
@@ -87,6 +87,7 @@
#define ACCVERYSTRICT XBIT(186,0x200000)

/*
* PGI Compiler Assisted Software Testing (PCAST)
* Compiler Assisted Software Testing (PCAST)
*/

#define XBIT_PCAST XBIT(215, 0x8)
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2006-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2006-2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -27,6 +27,12 @@

X86TYPE mach, mach_count;

#ifdef TARGET_WIN
#define DONT_GENERATE_AVX512 true /* a temporary restriction */
#else
#define DONT_GENERATE_AVX512 false
#endif

void
set_mach(X86TYPE *mach, int machtype)
{
@@ -181,15 +187,19 @@ set_mach(X86TYPE *mach, int machtype)
break;

case TP_SKYLAKE:
mach->type[MACH_INTEL_SKYLAKE] = 1;
mach->feature[FEATURE_AVX512VL] = 1;
if (! DONT_GENERATE_AVX512) {
mach->type[MACH_INTEL_SKYLAKE] = 1;
mach->feature[FEATURE_AVX512VL] = 1;
}
/* ...and fall through... */

case TP_KNIGHTS_LANDING:
if (machtype == TP_KNIGHTS_LANDING) {
mach->type[MACH_INTEL_KNIGHTS_LANDING] = 1;
if (! DONT_GENERATE_AVX512) {
if (machtype == TP_KNIGHTS_LANDING) {
mach->type[MACH_INTEL_KNIGHTS_LANDING] = 1;
}
mach->feature[FEATURE_AVX512F] = 1;
}
mach->feature[FEATURE_AVX512F] = 1;
/* ...and fall through... */

case TP_HASWELL:

0 comments on commit d75521f

Please sign in to comment.
You can’t perform that action at this time.