Skip to content

Commit

Permalink
Merge pull request #690 from ThePortlandGroup/nv_stage
Browse files Browse the repository at this point in the history
Pull 2019-03-20T10-58 Recent NVIDIA Changes
  • Loading branch information
sscalpone committed Mar 20, 2019
2 parents cc04c6e + 5670a43 commit d5db06d
Show file tree
Hide file tree
Showing 15 changed files with 203 additions and 162 deletions.
28 changes: 22 additions & 6 deletions runtime/flang/format-double.c
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -218,6 +218,22 @@ div_by_billion(uint32_t le_x[32], int *words)
return remainder;
}

static inline uint64_t
double_to_uint64 (double x) {

#if defined(TARGET_LLVM) && defined(TARGET_LINUX_X8664)
/*
* LLVM emulates 'vcvttsd2usi' (a new AVX-512F instruction) with 'vcvttsd2si'
* on non AVX-512F machines to cast double to unsigned long. With -Ktrap=fp
* option, this generates a floating point exception when the converted number
* is >= 9223372036854775808 (1<<63).
*/
if (x >= SIGN_BIT)
return (uint64_t) (x - SIGN_BIT) + SIGN_BIT;
#endif
return (uint64_t) x;
}

/*
* Convert a nonnegative integer represented as a double
* into a sequence of decimal digit characters ('0' to '9').
Expand All @@ -234,7 +250,7 @@ format_int_part(char *buff, int width, double x)
* arithmetic below.
*/
if (x <= MAX_EXACTLY_REPRESENTABLE_UINT64) {
out = reversed_uint64(out, buff, x);
out = reversed_uint64(out, buff, double_to_uint64(x));
if (!out)
return width + 1; /* overflow */
} else {
Expand Down Expand Up @@ -361,7 +377,7 @@ format_fraction(char buff[MAX_FRACTION_SIGNIFICANT_DECIMAL_DIGITS],
return;
}

absx -= (uint64_t) absx;
absx -= double_to_uint64(absx);
if (absx == 0.0) {
fill(buff, '0', width);
return;
Expand Down Expand Up @@ -453,7 +469,7 @@ fraction_digits(char buff[MAX_FRACTION_SIGNIFICANT_DECIMAL_DIGITS],

if (absx >= MIN_ENTIRELY_INTEGER)
return -1;
absx -= (uint64_t) absx;
absx -= double_to_uint64(absx);
if (absx == 0.0)
return -1;

Expand Down Expand Up @@ -657,7 +673,7 @@ F_format(char *output_buffer, int width,
/* |x| is an integer (no bits worth < 2**0) */
fill(frac, '0', frac_digits);
} else {
uint64_t int_absx = (uint64_t) absx;
uint64_t int_absx = double_to_uint64(absx);
int next_digit_for_rounding = 0;
bool is_inexact = false;
format_fraction(frac, &next_digit_for_rounding, &is_inexact,
Expand Down Expand Up @@ -885,7 +901,7 @@ ED_format(char *out_buffer, int width, const struct formatting_control *control,
} else if (frac_part_digits < 0) {
expo = int_part_digits;
is_inexact = absx < MAX_EXACTLY_REPRESENTABLE_UINT64 &&
absx != (uint64_t) absx;
absx != double_to_uint64(absx);
while (int_part_digits > significant_digits) {
is_inexact |= next_digit_for_rounding != 0;
next_digit_for_rounding = payload[--int_part_digits] - '0';
Expand Down
63 changes: 53 additions & 10 deletions runtime/flang/type.c
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2010-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2010-2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -30,6 +30,8 @@ static struct type_desc *I8(__f03_ty_to_id)[];
void ENTF90(SET_INTRIN_TYPE, set_intrin_type)(F90_Desc *dd,
__INT_T intrin_type);

static TYPE_DESC * get_parent_pointer(TYPE_DESC *src_td, __INT_T level);

#define ARG1_PTR 0x1
#define ARG1_ALLOC 0x2
#define ARG2_PTR 0x4
Expand Down Expand Up @@ -135,8 +137,7 @@ ENTF90(EXTENDS_TYPE_OF, extends_type_of)
return GET_DIST_TRUE_LOG;

if (atd->obj.level > btd->obj.level) {
__INT_T offset = (btd->obj.level + 1) * sizeof(__POINT_T);
TYPE_DESC *parent = *((TYPE_DESC **)(((char *)atd) - offset));
TYPE_DESC *parent = get_parent_pointer(atd, btd->obj.level+1);
if (btd == parent)
return GET_DIST_TRUE_LOG;
}
Expand Down Expand Up @@ -245,8 +246,7 @@ ENTF90(KEXTENDS_TYPE_OF, kextends_type_of)
return GET_DIST_TRUE_LOG;

if (atd->obj.level > btd->obj.level) {
__INT_T offset = (btd->obj.level + 1) * sizeof(__POINT_T);
TYPE_DESC *parent = *((TYPE_DESC **)(((char *)atd) - offset));
TYPE_DESC *parent = get_parent_pointer(atd, btd->obj.level+1);
if (btd == parent)
return GET_DIST_TRUE_LOG;
}
Expand Down Expand Up @@ -310,6 +310,50 @@ ENTF90(KGET_OBJECT_SIZE, kget_object_size)(F90_Desc *d)
return (__INT8_T)(td ? td->obj.size : od->size);
}

/** \brief Returns a type descriptor pointer of a specified ancestor of
* a type descriptor.
*
* \param src_td is the type descriptor used to locate the ancestor type
* type descriptor.
* \param level specifies the heirarchical position in the inheritance graph
* of the desired ancestor type descriptor. To find its immediate
* parent, specify a level equal to src_td's level.
*
* \return a type descriptor representing the ancestor or NULL if there is no
* ancestor.
*/
static TYPE_DESC *
get_parent_pointer(TYPE_DESC *src_td, __INT_T level)
{

__INT_T offset, src_td_level;
TYPE_DESC *parent;

if (level <= 0 || src_td == NULL)
return NULL;

src_td_level = src_td->obj.level;
if (src_td_level < 0 || level > src_td_level)
return NULL;

if (src_td->parents != NULL) {
/* The parents field is filled in, so use it to get the desired parent */
offset = (src_td_level - level) * sizeof(__POINT_T);
parent = *((TYPE_DESC **)(((char *)src_td->parents) + offset));
} else {
/* The parents field is not filled in, so find the parent from the
* src_td base pointer. The parents field is not filled in
* when a type descriptor is created with an older compiler.
* Note: This method does not always work if the type descriptor is
* defined in a shared library.
*/
offset = level * sizeof(__POINT_T);
parent = *((TYPE_DESC **)(((char *)src_td) - offset));
}

return parent;

}
static void
process_final_procedures(char *area, F90_Desc *sd)
{
Expand Down Expand Up @@ -408,8 +452,9 @@ process_final_procedures(char *area, F90_Desc *sd)

if (((F90_Desc *)src_td)->tag == __POLY && src_td->obj.level > 0) {
/* process parent finals */
__INT_T offset = (src_td->obj.level) * sizeof(__POINT_T);
TYPE_DESC *parent = *((TYPE_DESC **)(((char *)src_td) - offset));
TYPE_DESC *parent = get_parent_pointer(src_td, src_td->obj.level);



if (rank > 0) {
int i;
Expand Down Expand Up @@ -910,14 +955,12 @@ void I8(__fort_dump_type)(TYPE_DESC *d)
fprintf(__io_stderr(), "Size: %d\n", d->obj.size);
fprintf(__io_stderr(), "Type Descriptor:\n\t'%s'\n", d->name);
if (d->obj.level > 0) {
TYPE_DESC *parent;
__INT_T offset, level;
fprintf(__io_stderr(), "(Child Type)\n");
fprintf(__io_stderr(), "Parent Descriptor%s\n",
(d->obj.level == 1) ? ":" : "s:");
for (level = d->obj.level - 1; level >= 0; --level) {
offset = (level + 1) * sizeof(__POINT_T);
TYPE_DESC *parent = *((TYPE_DESC **)(((char *)d) - offset));
TYPE_DESC *parent = get_parent_pointer(d, level+1);
fprintf(__io_stderr(), "\t'%s'\n", parent->name);
}

Expand Down
4 changes: 2 additions & 2 deletions runtime/flang/type.h
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2010-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2010-2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -120,7 +120,7 @@ struct object_desc {
struct type_desc /* extends(OBJECT_DESC) */ {
OBJECT_DESC obj; /**< parent object_desc */
VTABLE(func_table); /**< pointer to virtual function table */
VTABLE(constructor); /**< reserved */
POINT(TYPE_DESC, parents); /**< pointer to parent type descriptor list */
FINAL_TABLE(finals); /**< pointer to final procedures table */
POINT(LAYOUT_DESC, layout); /**< pointer to layout descriptor */
char name[MAX_TYPE_NAME + 1];/**< null terminated user defined name of type */
Expand Down
8 changes: 0 additions & 8 deletions runtime/libpgmath/lib/common/dispatch.c
Expand Up @@ -53,14 +53,6 @@
*/

#if defined(TARGET_WIN_X8664)
/*
* Defining CPP object macro _NO_CRT_STDIO_INLINE prevents the Visual Studio
* header files from generating local versions of printf(), fprintf() and
* others.
*/

#define _NO_CRT_STDIO_INLINE

/*
* The Windows system header files are missing the argument list in the
* following function declarations. Without the argument list, albeit void,
Expand Down
12 changes: 12 additions & 0 deletions runtime/libpgmath/lib/x86_64/x86id.c
Expand Up @@ -964,8 +964,20 @@ X86IDFN(init_hw_features)(uint32_t old_hw_features)
* Abort and avoid infinite loop since nothing is going to change.
*/

#if defined(TARGET_WIN_X8664) && ! defined(_NO_CRT_STDIO_INLINE)
/*
* Exception! Windows - building x86id.obj for libcpuid.lib:
* It is unclear why fprintf() can't be used when x86id.c is being
* compiled for libcpuid.lib.
*/

printf("Error: %s called twice with hw_features=%#x\n", __func__,
X86IDFN(hw_features));
#else
// All other architectures/platforms/libraries can safely use fprintf().
fprintf(stderr, "Error: %s called twice with hw_features=%#x\n", __func__,
X86IDFN(hw_features));
#endif
exit(EXIT_FAILURE); // XXX XXX - should be __abort(1, "some string");

}/* init_hw_features */
Expand Down
36 changes: 18 additions & 18 deletions tools/flang1/flang1exe/lowerexp.c
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1997-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 1997-2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -2296,6 +2296,8 @@ lower_function(int ast)
/* prefix: J K */
#define in_J_K 0x0530000
/* prefix: none A D */
#define in_R_D 0x0003300
/* prefix: R D */
#define in_r_D 0x0001300
/* prefix: R D C CD */
#define in_R_D_C_CD 0x0001333
Expand Down Expand Up @@ -2626,6 +2628,9 @@ intrinsic_arg_dtype(int intr, int ast, int args, int nargs)
case I_ANINT:
case I_DNINT:

case I_CEILING:
case I_FLOOR:

case I_CONJG:
case I_DCONJG:

Expand Down Expand Up @@ -2873,8 +2878,6 @@ intrinsic_arg_dtype(int intr, int ast, int args, int nargs)
case I_SIZE:
case I_LBOUND:
case I_UBOUND:
case I_CEILING:
case I_FLOOR:
case I_MODULO:
case I_EXPONENT:
case I_FRACTION:
Expand Down Expand Up @@ -3071,6 +3074,7 @@ lower_intrinsic(int ast)
nargs = A_ARGCNTG(ast);
args = A_ARGSG(ast);
intr = A_OPTYPEG(ast);

if (intr != NEW_INTRIN) {
symfunc = EXTSYMG(intast_sym[intr]);
} else {
Expand Down Expand Up @@ -3108,6 +3112,8 @@ lower_intrinsic(int ast)
case I_DINT:
case I_ANINT:
case I_DNINT:
case I_FLOOR:
case I_CEILING:
nargs = 1;
}
if (argdtype >= 0) {
Expand Down Expand Up @@ -3745,6 +3751,15 @@ lower_intrinsic(int ast)
}
break;

case I_CEILING:
dtype = A_NDTYPEG(ast);
ilm = intrin_name("CEIL", ast, in_R_D);
break;
case I_FLOOR:
dtype = A_NDTYPEG(ast);
ilm = intrin_name("FLOOR", ast, in_R_D);
break;

case I_AINT:
case I_DINT:
dtype = A_NDTYPEG(ast);
Expand Down Expand Up @@ -4059,21 +4074,6 @@ lower_intrinsic(int ast)
A_ILMP(ast, ilm);
return ilm;

case I_CEILING:
case I_FLOOR:
/*
* see semfunc.c for the spelling of the function name.
*/
dtype = A_NDTYPEG(ast);
symfunc = A_SPTRG(A_LOPG(ast));
for (i = 0; i < nargs; ++i) {
ilm = lower_ilm(ARGT_ARG(args, i));
ilm = plower("oi", "DPVAL", ilm);
intrinsic_args[i] = ilm;
}
ilm = plower("onsm", ltyped("FUNC", dtype), nargs, symfunc);
break;

case I_MODULO:
/*
* see semfunc.c for the spelling of the function name.
Expand Down
4 changes: 2 additions & 2 deletions tools/flang1/flang1exe/pointsto.c
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2006-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2006-2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -4247,7 +4247,7 @@ points_to(void)
if (head.stg_size > 1000000) {
/* abort */
Trace(("pointer target analysis is too expensive, abort\n"));
fini_points_to_anal();
fini_points_to_prop();
return;
}
STG_ALLOC(head, head.stg_size);
Expand Down
2 changes: 2 additions & 0 deletions tools/flang1/flang1exe/semant.c
Expand Up @@ -549,6 +549,7 @@ semant_init(int noparse)
sem.expect_dist_do = FALSE;
sem.expect_acc_do = 0;
sem.collapsed_acc_do = 0;
sem.seq_acc_do = 0;
sem.expect_cuf_do = 0;
sem.close_pdo = FALSE;
sem.is_hpf = FALSE;
Expand Down Expand Up @@ -1045,6 +1046,7 @@ semant1(int rednum, SST *top)
sem.expect_dist_do = FALSE;
sem.expect_acc_do = 0;
sem.collapsed_acc_do = 0;
sem.seq_acc_do = 0;
sem.expect_cuf_do = 0;
sem.collapse = sem.collapse_depth = 0;
}
Expand Down
4 changes: 4 additions & 0 deletions tools/flang1/flang1exe/semant.h
Expand Up @@ -180,6 +180,7 @@ typedef struct { /* DO-IF stack entries */
int count; /* var=triplet control count -- outermost=1 */
int kind; /* temp: 1) curr locality kind; 2) loop component kind */
bool no_default; /* loop has a DEFAULT(NONE) locality spec? */
int popindex; /* do pop the index symbol */
int block_sym; /* loop body block sym */
int syms; /* list of index, local, local_init, and shared syms */
int last_sym; /* last sym in syms list */
Expand Down Expand Up @@ -378,6 +379,7 @@ typedef struct { /* DO-IF stack entries */
#define DI_TOP_LABEL(d) sem.doif_base[d].u.u1.top_label
#define DI_DO_AST(d) sem.doif_base[d].u.u1.ast
#define DI_DOINFO(d) sem.doif_base[d].u.u1.doinfo
#define DI_DO_POPINDEX(d) sem.doif_base[d].u.u1.popindex
#define DI_CONC_SYMAVL(d) sem.doif_base[d].u.u1.symavl
#define DI_CONC_COUNT(d) sem.doif_base[d].u.u1.count
#define DI_CONC_KIND(d) sem.doif_base[d].u.u1.kind
Expand Down Expand Up @@ -1159,6 +1161,7 @@ typedef struct {
int doif_size; /* size in records of DOIF stack area. */
DOIF *doif_base; /* base pointer for DOIF stack area. */
int doif_depth; /* current DO-IF nesting level */
SPTR index_sym_to_pop; /* DO index symbol to pop off hash link at end of loop */
SPTR doconcurrent_symavl; /* stb.stg_avail value at start of do concurrent */
DTYPE doconcurrent_dtype; /* explicit do concurrent index data type */
int eqvlist; /* head of list of equivalences */
Expand Down Expand Up @@ -1300,6 +1303,7 @@ typedef struct {
* needs to be a DO.
*/
int collapsed_acc_do; /* value of collapse clause for acc loop */
int seq_acc_do; /* acc loop with 'seq' clause */
int expect_cuf_do; /* next statement after CUF KERNELS needs to be a DO. */
LOGICAL close_pdo; /* A DO loop for a PDO, PARALLELDO, or DOACROSS
* has been processed and its removal from the
Expand Down

0 comments on commit d5db06d

Please sign in to comment.