Merge pull request #690 from ThePortlandGroup/nv_stage

Pull 2019-03-20T10-58 Recent NVIDIA Changes
flang-compiler · Mar 20, 2019 · d5db06d · d5db06d
2 parents cc04c6e + 5670a43
commit d5db06d
Show file tree

Hide file tree

Showing 15 changed files with 203 additions and 162 deletions.
diff --git a/runtime/flang/format-double.c b/runtime/flang/format-double.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -218,6 +218,22 @@ div_by_billion(uint32_t le_x[32], int *words)
   return remainder;
 }
 
+static inline uint64_t
+double_to_uint64 (double x) {
+
+#if defined(TARGET_LLVM) && defined(TARGET_LINUX_X8664)
+/*  
+ *  LLVM emulates 'vcvttsd2usi' (a new AVX-512F instruction) with 'vcvttsd2si' 
+ *  on non AVX-512F machines to cast double to unsigned long. With -Ktrap=fp
+ *  option, this generates a floating point exception when the converted number
+ *  is >= 9223372036854775808 (1<<63).
+ */
+  if (x >= SIGN_BIT)
+    return (uint64_t) (x - SIGN_BIT) + SIGN_BIT;
+#endif
+  return (uint64_t) x;
+}
+
 /*
  *  Convert a nonnegative integer represented as a double
  *  into a sequence of decimal digit characters ('0' to '9').
@@ -234,7 +250,7 @@ format_int_part(char *buff, int width, double x)
    * arithmetic below.
    */
   if (x <= MAX_EXACTLY_REPRESENTABLE_UINT64) {
-    out = reversed_uint64(out, buff, x);
+    out = reversed_uint64(out, buff, double_to_uint64(x));
     if (!out)
       return width + 1; /* overflow */
   } else {
@@ -361,7 +377,7 @@ format_fraction(char buff[MAX_FRACTION_SIGNIFICANT_DECIMAL_DIGITS],
     return;
   }
 
-  absx -= (uint64_t) absx;
+  absx -= double_to_uint64(absx);
   if (absx == 0.0) {
     fill(buff, '0', width);
     return;
@@ -453,7 +469,7 @@ fraction_digits(char buff[MAX_FRACTION_SIGNIFICANT_DECIMAL_DIGITS],
 
   if (absx >= MIN_ENTIRELY_INTEGER)
     return -1;
-  absx -= (uint64_t) absx;
+  absx -= double_to_uint64(absx);
   if (absx == 0.0)
     return -1;
 
@@ -657,7 +673,7 @@ F_format(char *output_buffer, int width,
       /* |x| is an integer (no bits worth < 2**0) */
       fill(frac, '0', frac_digits);
     } else {
-      uint64_t int_absx = (uint64_t) absx;
+      uint64_t int_absx = double_to_uint64(absx);
       int next_digit_for_rounding = 0;
       bool is_inexact = false;
       format_fraction(frac, &next_digit_for_rounding, &is_inexact,
@@ -885,7 +901,7 @@ ED_format(char *out_buffer, int width, const struct formatting_control *control,
     } else if (frac_part_digits < 0) {
       expo = int_part_digits;
       is_inexact = absx < MAX_EXACTLY_REPRESENTABLE_UINT64 &&
-                   absx != (uint64_t) absx;
+                   absx != double_to_uint64(absx);
       while (int_part_digits > significant_digits) {
         is_inexact |= next_digit_for_rounding != 0;
         next_digit_for_rounding = payload[--int_part_digits] - '0';

diff --git a/runtime/flang/type.c b/runtime/flang/type.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2010-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,6 +30,8 @@ static struct type_desc *I8(__f03_ty_to_id)[];
 void ENTF90(SET_INTRIN_TYPE, set_intrin_type)(F90_Desc *dd,
                                               __INT_T intrin_type);
 
+static TYPE_DESC * get_parent_pointer(TYPE_DESC *src_td, __INT_T level);
+
 #define ARG1_PTR 0x1
 #define ARG1_ALLOC 0x2
 #define ARG2_PTR 0x4
@@ -135,8 +137,7 @@ ENTF90(EXTENDS_TYPE_OF, extends_type_of)
     return GET_DIST_TRUE_LOG;
 
   if (atd->obj.level > btd->obj.level) {
-    __INT_T offset = (btd->obj.level + 1) * sizeof(__POINT_T);
-    TYPE_DESC *parent = *((TYPE_DESC **)(((char *)atd) - offset));
+    TYPE_DESC *parent = get_parent_pointer(atd, btd->obj.level+1);
     if (btd == parent)
       return GET_DIST_TRUE_LOG;
   }
@@ -245,8 +246,7 @@ ENTF90(KEXTENDS_TYPE_OF, kextends_type_of)
     return GET_DIST_TRUE_LOG;
 
   if (atd->obj.level > btd->obj.level) {
-    __INT_T offset = (btd->obj.level + 1) * sizeof(__POINT_T);
-    TYPE_DESC *parent = *((TYPE_DESC **)(((char *)atd) - offset));
+    TYPE_DESC *parent = get_parent_pointer(atd, btd->obj.level+1);
     if (btd == parent)
       return GET_DIST_TRUE_LOG;
   }
@@ -310,6 +310,50 @@ ENTF90(KGET_OBJECT_SIZE, kget_object_size)(F90_Desc *d)
   return (__INT8_T)(td ? td->obj.size : od->size);
 }
 
+/** \brief Returns a type descriptor pointer of a specified ancestor of 
+ *         a type descriptor.
+ *
+ *  \param src_td is the type descriptor used to locate the ancestor type
+ *                type descriptor.
+ *  \param level specifies the heirarchical position in the inheritance graph
+ *               of the desired ancestor type descriptor. To find its immediate
+ *               parent, specify a level equal to src_td's level.
+ *
+ *  \return a type descriptor representing the ancestor or NULL if there is no
+ *          ancestor.
+ */
+static TYPE_DESC *
+get_parent_pointer(TYPE_DESC *src_td, __INT_T level)
+{
+
+  __INT_T offset, src_td_level;
+  TYPE_DESC *parent;
+
+  if (level <= 0 || src_td == NULL)
+    return NULL;
+
+  src_td_level = src_td->obj.level;
+  if (src_td_level < 0 || level > src_td_level)
+    return NULL;
+
+  if (src_td->parents != NULL) {
+    /* The parents field is filled in, so use it to get the desired parent */
+    offset = (src_td_level - level) * sizeof(__POINT_T);
+    parent = *((TYPE_DESC **)(((char *)src_td->parents) + offset));
+  } else {
+    /* The parents field is not filled in, so find the parent from the
+     * src_td base pointer. The parents field is not filled in
+     * when a type descriptor is created with an older compiler.
+     * Note: This method does not always work if the type descriptor is
+     * defined in a shared library.
+     */
+    offset = level * sizeof(__POINT_T);
+    parent = *((TYPE_DESC **)(((char *)src_td) - offset));
+  }
+
+  return parent;    
+
+}
 static void
 process_final_procedures(char *area, F90_Desc *sd)
 {
@@ -408,8 +452,9 @@ process_final_procedures(char *area, F90_Desc *sd)
 
   if (((F90_Desc *)src_td)->tag == __POLY && src_td->obj.level > 0) {
     /* process parent finals */
-    __INT_T offset = (src_td->obj.level) * sizeof(__POINT_T);
-    TYPE_DESC *parent = *((TYPE_DESC **)(((char *)src_td) - offset));
+    TYPE_DESC *parent = get_parent_pointer(src_td, src_td->obj.level);
+
+
 
     if (rank > 0) {
       int i;
@@ -910,14 +955,12 @@ void I8(__fort_dump_type)(TYPE_DESC *d)
   fprintf(__io_stderr(), "Size: %d\n", d->obj.size);
   fprintf(__io_stderr(), "Type Descriptor:\n\t'%s'\n", d->name);
   if (d->obj.level > 0) {
-    TYPE_DESC *parent;
     __INT_T offset, level;
     fprintf(__io_stderr(), "(Child Type)\n");
     fprintf(__io_stderr(), "Parent Descriptor%s\n",
             (d->obj.level == 1) ? ":" : "s:");
     for (level = d->obj.level - 1; level >= 0; --level) {
-      offset = (level + 1) * sizeof(__POINT_T);
-      TYPE_DESC *parent = *((TYPE_DESC **)(((char *)d) - offset));
+      TYPE_DESC *parent = get_parent_pointer(d, level+1);
       fprintf(__io_stderr(), "\t'%s'\n", parent->name);
     }
 

diff --git a/runtime/flang/type.h b/runtime/flang/type.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2010-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -120,7 +120,7 @@ struct object_desc {
 struct type_desc /* extends(OBJECT_DESC) */ {
   OBJECT_DESC obj;             /**< parent object_desc */
   VTABLE(func_table);          /**< pointer to virtual function table */
-  VTABLE(constructor);         /**< reserved */
+  POINT(TYPE_DESC, parents);   /**< pointer to parent type descriptor list */
   FINAL_TABLE(finals);         /**< pointer to final procedures table */
   POINT(LAYOUT_DESC, layout);  /**< pointer to layout descriptor */
   char name[MAX_TYPE_NAME + 1];/**< null terminated user defined name of type */

diff --git a/runtime/libpgmath/lib/common/dispatch.c b/runtime/libpgmath/lib/common/dispatch.c
@@ -53,14 +53,6 @@
  */
 
 #if     defined(TARGET_WIN_X8664)
-/*
- * Defining CPP object macro _NO_CRT_STDIO_INLINE prevents the Visual Studio
- * header files from generating local versions of printf(), fprintf() and
- * others.
- */
-
-#define _NO_CRT_STDIO_INLINE
-
 /*
  * The Windows system header files are missing the argument list in the
  * following function declarations.  Without the argument list, albeit void,

diff --git a/runtime/libpgmath/lib/x86_64/x86id.c b/runtime/libpgmath/lib/x86_64/x86id.c
@@ -964,8 +964,20 @@ X86IDFN(init_hw_features)(uint32_t old_hw_features)
      * Abort and avoid infinite loop since nothing is going to change.
      */
 
+#if defined(TARGET_WIN_X8664) && ! defined(_NO_CRT_STDIO_INLINE)
+    /*
+     * Exception! Windows - building x86id.obj for libcpuid.lib:
+     * It is unclear why fprintf() can't be used when x86id.c is being
+     * compiled for libcpuid.lib.
+     */
+
     printf("Error: %s called twice with hw_features=%#x\n", __func__,
         X86IDFN(hw_features));
+#else
+    // All other architectures/platforms/libraries can safely use fprintf().
+    fprintf(stderr, "Error: %s called twice with hw_features=%#x\n", __func__,
+        X86IDFN(hw_features));
+#endif
     exit(EXIT_FAILURE);     // XXX XXX - should be __abort(1, "some string");
 
 }/* init_hw_features */

diff --git a/tools/flang1/flang1exe/lowerexp.c b/tools/flang1/flang1exe/lowerexp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 1997-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -2296,6 +2296,8 @@ lower_function(int ast)
 /*                       prefix:  J    K                                   */
 #define in_J_K 0x0530000
 /*                       prefix:  none      A      D                       */
+#define in_R_D 0x0003300
+/*                       prefix:            R      D                       */
 #define in_r_D 0x0001300
 /*                       prefix:            R      D       C     CD        */
 #define in_R_D_C_CD 0x0001333
@@ -2626,6 +2628,9 @@ intrinsic_arg_dtype(int intr, int ast, int args, int nargs)
   case I_ANINT:
   case I_DNINT:
 
+  case I_CEILING:
+  case I_FLOOR:
+
   case I_CONJG:
   case I_DCONJG:
 
@@ -2873,8 +2878,6 @@ intrinsic_arg_dtype(int intr, int ast, int args, int nargs)
   case I_SIZE:
   case I_LBOUND:
   case I_UBOUND:
-  case I_CEILING:
-  case I_FLOOR:
   case I_MODULO:
   case I_EXPONENT:
   case I_FRACTION:
@@ -3071,6 +3074,7 @@ lower_intrinsic(int ast)
   nargs = A_ARGCNTG(ast);
   args = A_ARGSG(ast);
   intr = A_OPTYPEG(ast);
+
   if (intr != NEW_INTRIN) {
     symfunc = EXTSYMG(intast_sym[intr]);
   } else {
@@ -3108,6 +3112,8 @@ lower_intrinsic(int ast)
   case I_DINT:
   case I_ANINT:
   case I_DNINT:
+  case I_FLOOR:
+  case I_CEILING:
     nargs = 1;
   }
   if (argdtype >= 0) {
@@ -3745,6 +3751,15 @@ lower_intrinsic(int ast)
     }
     break;
 
+  case I_CEILING:
+    dtype = A_NDTYPEG(ast);
+    ilm = intrin_name("CEIL", ast, in_R_D);
+    break;
+  case I_FLOOR:
+    dtype = A_NDTYPEG(ast);
+    ilm = intrin_name("FLOOR", ast, in_R_D);
+    break;
+
   case I_AINT:
   case I_DINT:
     dtype = A_NDTYPEG(ast);
@@ -4059,21 +4074,6 @@ lower_intrinsic(int ast)
     A_ILMP(ast, ilm);
     return ilm;
 
-  case I_CEILING:
-  case I_FLOOR:
-    /*
-     * see semfunc.c for the spelling of the function name.
-     */
-    dtype = A_NDTYPEG(ast);
-    symfunc = A_SPTRG(A_LOPG(ast));
-    for (i = 0; i < nargs; ++i) {
-      ilm = lower_ilm(ARGT_ARG(args, i));
-      ilm = plower("oi", "DPVAL", ilm);
-      intrinsic_args[i] = ilm;
-    }
-    ilm = plower("onsm", ltyped("FUNC", dtype), nargs, symfunc);
-    break;
-
   case I_MODULO:
     /*
      * see semfunc.c for the spelling of the function name.

diff --git a/tools/flang1/flang1exe/pointsto.c b/tools/flang1/flang1exe/pointsto.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2006-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -4247,7 +4247,7 @@ points_to(void)
   if (head.stg_size > 1000000) {
     /* abort */
     Trace(("pointer target analysis is too expensive, abort\n"));
-    fini_points_to_anal();
+    fini_points_to_prop();
     return;
   }
   STG_ALLOC(head, head.stg_size);

diff --git a/tools/flang1/flang1exe/semant.c b/tools/flang1/flang1exe/semant.c
@@ -549,6 +549,7 @@ semant_init(int noparse)
     sem.expect_dist_do = FALSE;
     sem.expect_acc_do = 0;
     sem.collapsed_acc_do = 0;
+    sem.seq_acc_do = 0;
     sem.expect_cuf_do = 0;
     sem.close_pdo = FALSE;
     sem.is_hpf = FALSE;
@@ -1045,6 +1046,7 @@ semant1(int rednum, SST *top)
         sem.expect_dist_do = FALSE;
         sem.expect_acc_do = 0;
         sem.collapsed_acc_do = 0;
+        sem.seq_acc_do = 0;
         sem.expect_cuf_do = 0;
         sem.collapse = sem.collapse_depth = 0;
       }

diff --git a/tools/flang1/flang1exe/semant.h b/tools/flang1/flang1exe/semant.h
@@ -180,6 +180,7 @@ typedef struct { /* DO-IF stack entries */
       int count;       /* var=triplet control count -- outermost=1 */
       int kind;        /* temp: 1) curr locality kind; 2) loop component kind */
       bool no_default; /* loop has a DEFAULT(NONE) locality spec? */
+      int popindex;    /* do pop the index symbol */
       int block_sym;   /* loop body block sym */
       int syms;        /* list of index, local, local_init, and shared syms */
       int last_sym;    /* last sym in syms list */
@@ -378,6 +379,7 @@ typedef struct { /* DO-IF stack entries */
 #define DI_TOP_LABEL(d) sem.doif_base[d].u.u1.top_label
 #define DI_DO_AST(d) sem.doif_base[d].u.u1.ast
 #define DI_DOINFO(d) sem.doif_base[d].u.u1.doinfo
+#define DI_DO_POPINDEX(d) sem.doif_base[d].u.u1.popindex
 #define DI_CONC_SYMAVL(d) sem.doif_base[d].u.u1.symavl
 #define DI_CONC_COUNT(d) sem.doif_base[d].u.u1.count
 #define DI_CONC_KIND(d) sem.doif_base[d].u.u1.kind
@@ -1159,6 +1161,7 @@ typedef struct {
   int doif_size;      /* size in records of DOIF stack area.  */
   DOIF *doif_base;    /* base pointer for DOIF stack area. */
   int doif_depth;     /* current DO-IF nesting level */
+  SPTR index_sym_to_pop;    /* DO index symbol to pop off hash link at end of loop */
   SPTR doconcurrent_symavl; /* stb.stg_avail value at start of do concurrent */
   DTYPE doconcurrent_dtype; /* explicit do concurrent index data type */
   int eqvlist;        /* head of list of equivalences */
@@ -1300,6 +1303,7 @@ typedef struct {
                             * needs to be a DO.
                             */
   int collapsed_acc_do;    /* value of collapse clause for acc loop */
+  int seq_acc_do;    /* acc loop with 'seq' clause */
   int expect_cuf_do; /* next statement after CUF KERNELS needs to be a DO.  */
   LOGICAL close_pdo; /* A DO loop for a PDO, PARALLELDO, or DOACROSS
                       * has been processed and its removal from the