Skip to content

Commit

Permalink
Merge pull request #303 from ThePortlandGroup/nv_stage
Browse files Browse the repository at this point in the history
Pull 2017-11-10T16-05 Recent NVIDIA Changes
  • Loading branch information
sscalpone committed Nov 11, 2017
2 parents 7d8327e + 88e109c commit 2b20297
Show file tree
Hide file tree
Showing 14 changed files with 79 additions and 21 deletions.
1 change: 1 addition & 0 deletions runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_definitions(
-DMAXCPUSL=8
-DMAXCPUSR=8
-DTARGET_LINUX
-DTARGET_LLVM
-DLINUX
-DPGF90
-DPGFLANG
Expand Down
11 changes: 0 additions & 11 deletions runtime/flang/initpar.c
Original file line number Diff line number Diff line change
Expand Up @@ -852,9 +852,7 @@ f90_compiled_arg()


void
#if defined(TARGET_LLVM)
__attribute__((constructor))
#endif
f90_compiled()
{
#ifndef TARGET_LINUX_ARM
Expand All @@ -872,12 +870,3 @@ f90_compiled()
}
}

#ifndef PGLANG
/* Wrapper function to maintain compatibility with previous PGI products */
void
pgf90_compiled()
{
f90_compiled();
#endif
}

2 changes: 1 addition & 1 deletion tools/flang1/flang1exe/dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -2748,7 +2748,7 @@ dsym(int sptr)
PARAMVALP(0, 0);
#ifdef PDALNG
putnzint("pdaln", PDALNG(0));
PDALNP(0, 0);
b4P(0, 0);
#endif
putnzint("socptr", SOCPTRG(0));
SOCPTRP(0, 0);
Expand Down
1 change: 0 additions & 1 deletion tools/flang1/flang1exe/exterf.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@

#include "dpm_out.h"

#define MOD_CMN_IDX(a, c) (((a) << 1) | (c))
#define COMPILER_OWNED_MODULE XBIT(58,0x100000)

/* ------------------------------------------------------------------ */
Expand Down
2 changes: 1 addition & 1 deletion tools/flang1/flang1exe/lowersym.c
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ fill_fixed_array_dtype(int dtype)
if (mlpyr > 0) {
ADD_NUMELM(dtype) = mk_cnst(lower_getiszcon(mlpyr));
} else {
ADD_NUMELM(dtype) = 0;
ADD_NUMELM(dtype) = astb.bnd.zero;
}
} /* fill_fixed_array_dtype */

Expand Down
1 change: 1 addition & 0 deletions tools/flang1/flang1exe/semfunc.c
Original file line number Diff line number Diff line change
Expand Up @@ -6173,6 +6173,7 @@ ref_pd(SST *stktop, ITEM *list)
shape1 = A_SHAPEG(ARG_AST(0));
count = SHD_NDIM(shape1); /* rank of array arg */
argt_count = count * 2 + 2;
adjarr = 0;
asumsz = 0;
assumshp = 0;
arg1 = ARG_AST(0);
Expand Down
2 changes: 1 addition & 1 deletion tools/flang1/utils/prstab/prstab.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
#define MAXPROD 4000
#define MAXLST 24000
#define MAXBAS 44000
#define MAXTRN 28000
#define MAXTRN 40000
#define MAXRED 10000
#define MAXSCR 6000
#define MAXHASH 2048
Expand Down
10 changes: 5 additions & 5 deletions tools/flang2/flang2exe/exputil.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,14 +384,14 @@ chk_block(int newili)
}
}

/** \brief Like chk_block, but suppress CAN_THROW flag.
*
/** \brief Like chk_block, but suppress CAN_THROW flag.
*
* When a call can throw and defines two result registers, we have an ad-hoc
* rule that only the second store is marked as "can throw". This utility
* is useful for ensuring that the first store is not marked "can throw".
*/
void
chk_block_suppress_throw(int newili)
chk_block_suppress_throw(int newili)
{
chk_block(newili);
ILT_SET_CAN_THROW(expb.curilt, 0);
Expand Down Expand Up @@ -1181,10 +1181,10 @@ void
exp_add_copy(int lhssptr, int rhssptr)
{
int rhsacon, lhsacon, rhsnme, lhsnme, rhsld, lhsst, sz;
ILI_OP rhsopc, lhsopc;
ILI_OP rhsopc, lhsopc;
MSZ msz;
if (lhssptr == rhssptr)
return;
return;
rhsacon = ad_acon(rhssptr, 0);
sz = size_of(DTYPEG(rhssptr));
if (sz == 8) {
Expand Down
9 changes: 9 additions & 0 deletions tools/flang2/flang2exe/ilmutil.c
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,12 @@ _dumpilms(ILM_T *ilm_base, int check)
case PR_ACCENDSCALARREG:
s = "ACCENDSCALARREG";
break;
case PR_ACCSERIAL:
s = "ACCSERIAL";
break;
case PR_ACCENDSERIAL:
s = "ACCENDSERIAL";
break;
case PR_ACCPARCONSTRUCT:
s = "ACCPARCONSTRUCT";
break;
Expand Down Expand Up @@ -1191,6 +1197,9 @@ _dumpilms(ILM_T *ilm_base, int check)
case PR_ACCWAITDIR:
s = "ACCWAITDIR";
break;
case PR_ACCSLOOP:
s = "ACCSLOOP";
break;
case PR_ACCKLOOP:
s = "ACCKLOOP";
break;
Expand Down
9 changes: 9 additions & 0 deletions tools/flang2/flang2exe/outliner.c
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,15 @@ clone_uplevel(int uplevel_sptr, int uplevel_stblk_sptr)
ilix = ad2ili(IL_LDA, ili, addnme(NT_VAR, arg, 0, (INT)0));
}

/* set alignment of last argument for GPU "align 8". */
if (DTY(uplevel_dtype) == TY_STRUCT)
DTY(uplevel_dtype + 4) = 7;

if (DTY(DTYPEG(new_uplevel)) == TY_STRUCT)
DTY(DTYPEG(new_uplevel) + 4) = 7;



/* For C we have a homed argument, a pointer to a pointer to an uplevel.
* This will dereference the pointer, we do not need to do this for Fortran.
*/
Expand Down
15 changes: 15 additions & 0 deletions tools/flang2/utils/ilitp/aarch64/ilitp.n
Original file line number Diff line number Diff line change
Expand Up @@ -4277,11 +4277,26 @@ End a block of code to run as a scalar kernel on the accelerator
.AT other null trm
.CG notCG

.IL ACCSERIAL lnk
Start a block of code to run as a serial kernel on the accelerator
.AT other null trm
.CG notCG

.IL ACCENDSERIAL
End a block of code to run as a serial kernel on the accelerator
.AT other null trm
.CG notCG

.IL ACCELLP lnk
The following loop is to be targeted for the accelerator
.AT other null trm
.CG notCG

.IL ACCSLOOP lnk
The following loop in a serial region is to be targeted for the accelerator
.AT other null trm
.CG notCG

.IL ACCKLOOP lnk
The following loop in a kernels region is to be targeted for the accelerator
.AT other null trm
Expand Down
15 changes: 15 additions & 0 deletions tools/flang2/utils/ilitp/ppc64le/ilitp.n
Original file line number Diff line number Diff line change
Expand Up @@ -4274,11 +4274,26 @@ End a block of code to run as a scalar kernel on the accelerator
.AT other null trm
.CG notCG

.IL ACCSERIAL lnk
Start a block of code to run as a serial kernel on the accelerator
.AT other null trm
.CG notCG

.IL ACCENDSERIAL
End a block of code to run as a serial kernel on the accelerator
.AT other null trm
.CG notCG

.IL ACCELLP lnk
The following loop is to be targeted for the accelerator
.AT other null trm
.CG notCG

.IL ACCSLOOP lnk
The following loop in a serial region is to be targeted for the accelerator
.AT other null trm
.CG notCG

.IL ACCKLOOP lnk
The following loop in a kernels region is to be targeted for the accelerator
.AT other null trm
Expand Down
18 changes: 18 additions & 0 deletions tools/flang2/utils/ilitp/x86_64/ilitp.n
Original file line number Diff line number Diff line change
Expand Up @@ -5129,12 +5129,30 @@ Link to a list of clauses.
.AT other null trm
.CG notCG

.IL ACCSERIAL lnk
Start a block of code to run as a serial kernel on the accelerator.
Link to a list of clauses.
.AT other null trm
.CG notCG

.IL ACCENDSERIAL
End a block of code to run as a serial kernel on the accelerator.
Link to a list of clauses.
.AT other null trm
.CG notCG

.IL ACCELLP lnk
The following loop is to be targeted for the accelerator.
Link to a list of clauses.
.AT other null trm
.CG notCG

.IL ACCSLOOP lnk
The following loop in a serial region is to be targeted for the accelerator.
Link to a list of clauses.
.AT other null trm
.CG notCG

.IL ACCKLOOP lnk
The following loop in a kernels region is to be targeted for the accelerator.
Link to a list of clauses.
Expand Down
4 changes: 3 additions & 1 deletion tools/shared/pragma.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,10 @@ typedef enum {
PR_ACCSETDIR = 116, /* accelerator set directive */
PR_ACCUSEDEVICEIFP = 117, /* accelerator use device clause combined with if present */
PR_ACCNO_CREATE = 118, /* no_create clause */
PR_ACCSERIAL = 119, /* accelerator serial construct */
PR_ACCENDSERIAL = 120, /* end accelerator serial construct */
PR_ACCSLOOP = 121, /* loop in accelerator serial region */
} PR_PRAGMA;

/* Ignore data movement pragmas */
#define ACC_DATAMOVEMENT_DISABLED XBIT(195, 0x400)

0 comments on commit 2b20297

Please sign in to comment.