Permalink
Browse files

Initial implementation of OpenMP GPU Offload in Flang.

  • Loading branch information...
gklimowicz committed Nov 16, 2018
1 parent 52275c6 commit cf1869414eb021f6255a340bac803ecd67c5acec
Showing with 6,087 additions and 275 deletions.
  1. +5 −0 CMakeLists.txt
  2. +6 −0 include/flang/Error/errmsg-in.n
  3. +4 −1 include/flang/Error/errmsg.n
  4. +6 −1 tools/flang1/CMakeLists.txt
  5. +12 −0 tools/flang1/flang1exe/ast.c
  6. +15 −0 tools/flang1/flang1exe/astout.c
  7. +7 −0 tools/flang1/flang1exe/dump.c
  8. +7 −0 tools/flang1/flang1exe/func.c
  9. +2 −0 tools/flang1/flang1exe/global.h
  10. +1 −1 tools/flang1/flang1exe/kwddf.h
  11. +1 −1 tools/flang1/flang1exe/lowerexp.c
  12. +100 −1 tools/flang1/flang1exe/lowerilm.c
  13. +20 −14 tools/flang1/flang1exe/main.c
  14. +12 −0 tools/flang1/flang1exe/rest.c
  15. +13 −1 tools/flang1/flang1exe/semant.c
  16. +13 −1 tools/flang1/flang1exe/semant3.c
  17. +283 −47 tools/flang1/flang1exe/semsmp.c
  18. +12 −0 tools/flang1/flang1exe/semutil.c
  19. +103 −1 tools/flang1/utils/ast/ast.n
  20. +2 −1 tools/flang1/utils/ast/astutil.c
  21. +1 −0 tools/flang1/utils/prstab/gram.tki
  22. +3 −3 tools/flang1/utils/prstab/gram.txt
  23. +6 −1 tools/flang2/CMakeLists.txt
  24. +17 −0 tools/flang2/docs/xflag.n
  25. +2 −1 tools/flang2/flang2exe/CMakeLists.txt
  26. +1 −0 tools/flang2/flang2exe/aarch64-Linux/flgdf.h
  27. +1 −0 tools/flang2/flang2exe/assem.h
  28. +3 −1 tools/flang2/flang2exe/cgllvm.h
  29. +236 −47 tools/flang2/flang2exe/cgmain.cpp
  30. +58 −12 tools/flang2/flang2exe/expand.cpp
  31. +99 −11 tools/flang2/flang2exe/expsmp.cpp
  32. +3 −0 tools/flang2/flang2exe/iliutil.cpp
  33. +202 −2 tools/flang2/flang2exe/kmpcutil.cpp
  34. +45 −0 tools/flang2/flang2exe/kmpcutil.h
  35. +130 −8 tools/flang2/flang2exe/ll_structure.cpp
  36. +60 −12 tools/flang2/flang2exe/ll_structure.h
  37. +36 −1 tools/flang2/flang2exe/ll_write.cpp
  38. +4 −0 tools/flang2/flang2exe/ll_write.h
  39. +117 −18 tools/flang2/flang2exe/llassem.cpp
  40. +12 −0 tools/flang2/flang2exe/llassem.h
  41. +17 −5 tools/flang2/flang2exe/llassem_common.cpp
  42. +114 −30 tools/flang2/flang2exe/llutil.cpp
  43. +58 −33 tools/flang2/flang2exe/llutil.h
  44. +126 −14 tools/flang2/flang2exe/main.cpp
  45. +20 −0 tools/flang2/flang2exe/mwd.cpp
  46. +2,066 −0 tools/flang2/flang2exe/ompaccel.cpp
  47. +343 −0 tools/flang2/flang2exe/ompaccel.h
  48. +397 −4 tools/flang2/flang2exe/outliner.cpp
  49. +33 −0 tools/flang2/flang2exe/outliner.h
  50. +1 −0 tools/flang2/flang2exe/ppc64le-Linux/flgdf.h
  51. +1 −1 tools/flang2/flang2exe/ppc64le-Linux/ll_abi.cpp
  52. +961 −0 tools/flang2/flang2exe/tgtutil.cpp
  53. +101 −0 tools/flang2/flang2exe/tgtutil.h
  54. +1 −0 tools/flang2/flang2exe/x86_64-Linux/flgdf.h
  55. +1 −1 tools/flang2/flang2exe/x86_64-Linux/ll_abi.cpp
  56. +34 −0 tools/flang2/utils/ilmtp/aarch64/ilmtp.n
  57. +34 −0 tools/flang2/utils/ilmtp/ppc64le/ilmtp.n
  58. +34 −0 tools/flang2/utils/ilmtp/x86_64/ilmtp.n
  59. +22 −0 tools/flang2/utils/symtab/symtab.n
  60. +7 −0 tools/flang2/utils/upper/upperilm.in
  61. +1 −0 tools/shared/ccffinfo.h
  62. +47 −0 tools/shared/llmputil.h
  63. +8 −0 tools/shared/utils/global.h
@@ -391,6 +391,11 @@ add_subdirectory(tools)
#endif()
#add_subdirectory(examples)
option(FLANG_OPENMP_GPU_NVIDIA "Enable OpenMP Accelerator Offload." OFF)
if (FLANG_OPENMP_GPU_NVIDIA)
add_definitions("-DOMP_OFFLOAD_LLVM")
endif()
if( FLANG_INCLUDE_TESTS )
# if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include/gtest/gtest.h)
# add_subdirectory(unittests)
@@ -1508,3 +1508,9 @@ A DO CONCURRENT or FORALL construct or statement may not specify an index name m
.MS S 1059 "The definition of subprogram $ does not have the same number of arguments as its declaration"
.MS S 1060 "The $ of the definition and declaration of subprogram $ must match"
.MS S 1061 "The definition of function return type of $ does not match its declaration type"
.MS S 1200 "OpenMP GPU - [$] is used, it is not implemented yet."
.MS S 1201 "OpenMP GPU - [$] is used with [$], this usage is not implemented yet."
.MS S 1202 "OpenMP GPU - [$] is used independently than [$], this usage is not implemented yet."
.MS S 1203 "OpenMP GPU - Directive target exit data is used, map type [$] cannot be used. It should be one of 'from', 'release', or 'delete'"
.MS S 1204 "OpenMP GPU - Directive target data is used, map type [$] cannot be used. It should be one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'"
.MS S 1205 "OpenMP GPU - Directive target enter data is used, map type [$] cannot be used. It should be one of 'to', or 'alloc'"
@@ -1,5 +1,5 @@
.\"/*
.\" * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
.\" * Copyright (c) 1994-2018, NVIDIA CORPORATION. All rights reserved.
.\" *
.\" * Licensed under the Apache License, Version 2.0 (the "License");
.\" * you may not use this file except in compliance with the License.
@@ -714,3 +714,6 @@ In integer shift, ISHFT(I, SHIFT), absolute value of SHIFT cannot be bigger than
The accelerator feature in the PGI compilers is separately licensed;
that license was not found.
Please update your licenses to use this feature.
.MS S 1200 "OpenMP GPU - [$] is used, it is not implemented yet."
.MS S 1201 "OpenMP GPU - [$] is used with [$], this usage is not implemented yet."
.MS S 1202 "OpenMP GPU - [$] is used independently than [$], this usage is not implemented yet."
@@ -1,5 +1,5 @@
#
# Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -31,6 +31,11 @@ set(FLANG1_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/include)
include_directories(${FLANG1_INCLUDE_DIR})
option(FLANG_OPENMP_GPU_NVIDIA "Enable OpenMP Accelerator Offload." OFF)
if (FLANG_OPENMP_GPU_NVIDIA)
add_definitions("-DOMP_OFFLOAD_LLVM")
endif()
add_subdirectory(include)
add_subdirectory(utils)
add_subdirectory(flang1exe)
@@ -4971,6 +4971,12 @@ ast_rewrite(int ast)
case A_MP_ATOMICREAD:
case A_MP_ATOMICUPDATE:
case A_MP_ATOMICCAPTURE:
case A_MP_MAP:
case A_MP_EMAP:
case A_MP_TARGETLOOPTRIPCOUNT:
case A_MP_EREDUCTION:
case A_MP_BREDUCTION:
case A_MP_REDUCTIONITEM:
break;
case A_MP_ATOMICWRITE:
rop = ast_rewrite(A_ROPG(ast));
@@ -5827,6 +5833,12 @@ ast_trav_recurse(int ast, int *extra_arg)
case A_MP_TASKREG:
case A_MP_TASKDUP:
case A_MP_ETASKLOOPREG:
case A_MP_MAP:
case A_MP_EMAP:
case A_MP_TARGETLOOPTRIPCOUNT:
case A_MP_EREDUCTION:
case A_MP_BREDUCTION:
case A_MP_REDUCTIONITEM:
break;
case A_MP_BMPSCOPE:
#if DEBUG
@@ -2243,6 +2243,21 @@ print_ast(int ast)
lbuff[0] = '!';
put_string(astb.atypes[atype]);
break;
case A_MP_TARGETLOOPTRIPCOUNT:
put_string("target loop tripcount");
break;
case A_MP_MAP:
put_string("map");
break;
case A_MP_EMAP:
put_string("end map");
break;
case A_MP_BREDUCTION:
put_string("begin reduction");
break;
case A_MP_EREDUCTION:
put_string("end reduction");
break;
case A_MP_CRITICAL:
case A_MP_ENDCRITICAL:
lbuff[0] = '!';
@@ -1685,6 +1685,13 @@ dastreex(int astx, int l, int notlast)
case A_MP_BCOPYPRIVATE:
case A_MP_COPYPRIVATE:
case A_MP_ECOPYPRIVATE:
case A_MP_MAP:
case A_MP_EMAP:
case A_MP_TARGETLOOPTRIPCOUNT:
case A_MP_DISTRIBUTE:
case A_MP_EREDUCTION:
case A_MP_BREDUCTION:
case A_MP_REDUCTIONITEM:
break;
default:
fprintf(gbl.dbgfil, "NO DUMP AVL");
@@ -4135,6 +4135,13 @@ rewrite_calls(void)
a = rewrite_sub_ast(A_ROPG(ast), 0);
A_ROPP(ast, a);
break;
case A_MP_EMAP:
case A_MP_MAP:
case A_MP_TARGETLOOPTRIPCOUNT:
case A_MP_EREDUCTION:
case A_MP_BREDUCTION:
case A_MP_REDUCTIONITEM:
break;
default:
interr("rewrite_subroutine: unknown stmt found", ast, 4);
break;
@@ -143,6 +143,7 @@ typedef struct {
* preprocessed (can be more general if we choose).
*/
LOGICAL denorm; /* enforce denorm for the current subprogram */
LOGICAL inomptarget; /* set if it is OpenMP's target region*/
} GBL;
#undef MAXCPUS
@@ -214,6 +215,7 @@ typedef struct {
LOGICAL defaulthpf;
LOGICAL defaultsequence;
int errorlimit;
LOGICAL omptarget; /* TRUE => allow omp accel directives */
LOGICAL smp; /* TRUE => allow smp directives */
int tpcount;
int tpvalue[TPNVERSION]; /* target processor(s), for unified binary */
@@ -390,7 +390,7 @@ static KWORD t5[] = {
{"linear", TK_LINEAR},
{"link", TK_LINK},
{"local", TK_PRIVATE},
{"map", TK_MAP},
{"map", TK_MP_MAP},
{"mergeable", TK_MERGEABLE},
{"mp_schedtype", TK_MP_SCHEDTYPE},
{"nogroup", TK_NOGROUP},
@@ -4484,7 +4484,7 @@ lower_intrinsic(int ast)
return ilm;
} /* lower_intrinsic */
#if AST_MAX != 159
#if AST_MAX != 165
#error "Need to edit lowerexp.c to add or delete A_... AST types"
#endif
@@ -2853,6 +2853,68 @@ lower_omp_atomic_capture(int ast, int lineno)
plower("oiinnn", "MP_ATOMICCAPTURE", lilm, rilm, mem_order, aop, flag);
}
static void
lower_omp_target_tripcount(int ast, int std)
{
int lop,dovar,doinitast,doendast,dtype,doincast, doinc, doinitilm, doendilm, doincilm, dotrip;
lop = A_DOVARG(ast);
if (A_TYPEG(lop) != A_ID) {
lerror("unsupported DO variable");
return;
}
dovar = A_SPTRG(lop);
dtype = DTYPEG(dovar);
/* treat logical like integer */
switch (dtype) {
case DT_BLOG:
dtype = DT_BINT;
break;
case DT_SLOG:
dtype = DT_SINT;
break;
case DT_LOG4:
dtype = DT_INT4;
break;
case DT_LOG8:
dtype = DT_INT8;
break;
}
/* KMPC only permits 4 or 8 byte loop inductions */
if (A_TYPEG(ast) == A_MP_PDO)
dtype = (size_of(dtype) <= 4) ? DT_INT : DT_INT8;
if (XBIT(68, 0x1)) {
if (dtype == DT_INT8)
dotrip = dotemp('T', DT_INT8, std);
else
dotrip = dotemp('T', DT_INT4, std);
} else {
if (XBIT(49, 0x100) && dtype == DT_INT8)
dotrip = dotemp('T', DT_INT8, std);
else
dotrip = dotemp('T', DT_INT4, std);
}
PTRSAFEP(dotrip, 1);
doinitast = A_M1G(ast);
doendast = A_M2G(ast);
doincast = A_M3G(ast);
lower_expression(doinitast);
doinitilm = lower_ilm(doinitast);
lower_expression(doendast);
doendilm = lower_ilm(doendast);
lower_expression(doincast);
doincilm = lower_ilm(doincast);
doinc = dotemp('i', dtype, std);
compute_dotrip(std, FALSE, doinitilm, doendilm,
doinc, doincilm, dtype, dotrip);
plower("oS", "MP_TARGETLOOPTRIPCOUNT", dotrip);
return;
}
void
lower_stmt(int std, int ast, int lineno, int label)
{
@@ -5139,10 +5201,47 @@ lower_stmt(int std, int ast, int lineno, int label)
} else {
ilm = plower("oS", "ICON", lowersym.intone);
}
if(flg.omptarget) {
if(A_LOOPTRIPCOUNTG(ast) != 0) {
lower_omp_target_tripcount(A_LOOPTRIPCOUNTG(ast), std);
}
plower("on", "MP_TARGETMODE", A_COMBINEDTYPEG(ast));
}
//pragmatype specifies combined type of target.
ilm = plower("oin", "BTARGET", ilm, flag);
lower_end_stmt(std);
break;
case A_MP_MAP:
lower_start_stmt(lineno, label, TRUE, std);
lop = A_LOPG(ast);
lower_expression(lop);
//todo ompaccel need to pass size and base
flag = A_PRAGMATYPEG(STD_AST(std));
plower("oin", "MP_MAP", lower_base(lop), flag);
lower_end_stmt(std);
break;
case A_MP_BREDUCTION:
lower_start_stmt(lineno, label, TRUE, std);
ilm = plower("o", "MP_BREDUCTION");
lower_end_stmt(std);
break;
case A_MP_EREDUCTION:
lower_start_stmt(lineno, label, TRUE, std);
ilm = plower("o", "MP_EREDUCTION");
lower_end_stmt(std);
break;
case A_MP_REDUCTIONITEM:
lower_start_stmt(lineno, label, TRUE, std);
ilm = plower("ossn", "MP_REDUCTIONITEM", A_SHSYMG(ast), A_PRVSYMG(ast), A_REDOPRG(ast));
lower_end_stmt(std);
break;
case A_MP_EMAP:
lower_start_stmt(lineno, label, TRUE, std);
ilm = plower("o", "MP_EMAP");
lower_end_stmt(std);
break;
case A_MP_ENDTARGET:
lower_start_stmt(lineno, label, TRUE, std);
ilm = plower("o", "ETARGET");
@@ -159,7 +159,7 @@ main(int argc, char *argv[])
getcpu();
init(argc, argv); /* initialize */
if (gbl.fn == NULL)
gbl.fn = gbl.src_file;
gbl.fn = gbl.src_file;
#if DEBUG
if (debugfunconly > 0)
@@ -523,7 +523,7 @@ main(int argc, char *argv[])
xref(); /* write cross reference map */
xtimes[7] += getcpu();
}
skip_compile:
skip_compile:
(void)summary(FALSE, FALSE);
errini();
@@ -659,20 +659,20 @@ init(int argc, char *argv[])
int form; /* 0 = fixed, 1 = form */
int fpp; /* 0 = don't preprocess, 1 = preprocess */
} suffixes[] = {
{".hpf", 0, 0}, {".f", 0, 0}, {".F", 0, 1}, {".f90", 1, 0},
{".F90", 1, 1}, {".f95", 1, 0}, {".F95", 1, 1}, {".for", 0, 0},
{".fpp", 0, 1}, {0, 0, 0},
{".hpf", 0, 0}, {".f", 0, 0}, {".F", 0, 1}, {".f90", 1, 0},
{".F90", 1, 1}, {".f95", 1, 0}, {".F95", 1, 1}, {".for", 0, 0},
{".fpp", 0, 1}, {0, 0, 0},
};
char *followval;
int followindex;
time_t now;
flg.freeform = -1;
file_suffix = ".f90"; /* default suffix for source files */
/*
* initialize error and symbol table modules in case error messages are
* issued:
*/
/*
* initialize error and symbol table modules in case error messages are
* issued:
*/
errini();
gbl.curr_file = NULL;
gbl.fn = NULL;
@@ -706,6 +706,7 @@ init(int argc, char *argv[])
goto empty_cl;
char *tp; /* Target architecture */
char *omptp = NULL; /* OpenMP Target architecture */
int vect_val; /* Vectorizer settings */
char *modexport_val; /* Modexport file name */
char *modindex_val; /* Modindex file name */
@@ -786,6 +787,7 @@ init(int argc, char *argv[])
/* Other flags */
register_boolean_arg(arg_parser, "mp", (bool *)&(flg.smp), false);
register_string_arg(arg_parser, "fopenmp-targets", &omptp, NULL);
register_boolean_arg(arg_parser, "preprocess", &arg_preproc, true);
register_boolean_arg(arg_parser, "reentrant", &arg_reentrant, false);
register_integer_arg(arg_parser, "terse", &(flg.terse), 1);
@@ -877,7 +879,10 @@ init(int argc, char *argv[])
/* Postprocess target architecture */
do_set_tp(tp);
#ifdef OMP_OFFLOAD_LLVM
if(omptp != NULL)
flg.omptarget = TRUE;
#endif
/* Vectorizer settings */
flg.vect |= vect_val;
if (flg.vect & 0x10)
@@ -940,7 +945,7 @@ init(int argc, char *argv[])
if (flg.es && !flg.p)
flg.x[123] |= 0x100;
empty_cl:
empty_cl:
if (sourcefile == NULL) {
if (flg.ipa & 0x0a) {
/* for IPA propagation or when generating static$init, no sourcefile */
@@ -1023,12 +1028,12 @@ init(int argc, char *argv[])
}
/* not found */
error(2, 4, 0, sourcefile, CNULL);
is_open:
is_open:
if (preproc == 1)
fpp_ = TRUE; /* -preproc forces preprocessing */
}
do_curr_file:
do_curr_file:
if (gbl.file_name == NULL)
gbl.file_name = gbl.src_file;
@@ -1249,7 +1254,7 @@ reptime(void)
} else if (gbl.dbgfil)
fprintf(gbl.dbgfil, "%s\n", buf);
xbitcheck:
xbitcheck:
if (!XBIT(0, 1))
return;
fprintf(stderr, " Timing stats:\n");
@@ -1291,6 +1296,7 @@ datastructure_reinit(void)
gbl.p_adjarr = NOSYM;
gbl.p_adjstr = NOSYM;
gbl.denorm = FALSE;
gbl.inomptarget = false;
/* restore opt flag to its original value */
flg.opt = saveoptflag;
flg.vect = savevectflag;
Oops, something went wrong.

0 comments on commit cf18694

Please sign in to comment.