Permalink
Browse files

Merge pull request #358 from ThePortlandGroup/nv_stage

Pull 2018-01-10T16-54 Recent NVIDIA Changes
  • Loading branch information...
sscalpone committed Jan 11, 2018
2 parents 66e17ec + e7d268f commit 33f0f74b09e491af9af12da37b8986d41c5789a4
View
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2010-2018, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -483,22 +483,6 @@ void ENTF90(DEALLOC_POLY_MBR03,
if (!g1 && !I8(__fort_allocated)(cb)) {
continue;
}
if (fd) {
__fort_bcopy(area + ld->offset, (char *)ptr1, sizeof(char *));
db = ptr1[0];
if (ld->tag == 'T' || ld->tag == 'D') {
ENTF90(DEALLOC_POLY_MBR03, dealloc_poly_mbr03)
(fd, stat, db, firsttime, CADR(errmsg), CLEN(errmsg));
if (I8(__fort_allocated)(db)) {
ENTF90(DEALLOC_MBR03, dealloc_mbr03)
(stat, db, firsttime, CADR(errmsg), CLEN(errmsg));
}
}
else if (I8(__fort_allocated)(db)) {
ENTF90(DEALLOC_MBR03, dealloc_mbr03)
(stat, db, firsttime, CADR(errmsg), CLEN(errmsg));
}
}
}
}
ENTF90(DEALLOC_MBR03, dealloc_mbr03)
@@ -529,7 +513,7 @@ void ENTF90(DEALLOC_POLY03, dealloc_poly03)(F90_Desc *sd, __STAT_T *stat,
F90_Desc *fd;
char *ptr1[1] = {0};
char *ptr2[1] = {0};
char *cb, *db;
char *cb;
__LOG_T g1;
for (; ld->tag != 0; ld++) {
@@ -555,18 +539,9 @@ void ENTF90(DEALLOC_POLY03, dealloc_poly03)(F90_Desc *sd, __STAT_T *stat,
continue;
}
if (fd) {
__fort_bcopy(area + ld->offset, (char *)ptr1, sizeof(char *));
db = ptr1[0];
if (ld->tag == 'T' || ld->tag == 'D') {
ENTF90(DEALLOC_POLY_MBR03, dealloc_poly_mbr03)
(fd, stat, db, firsttime, CADR(errmsg), CLEN(errmsg));
if (I8(__fort_allocated)(db)) {
ENTF90(DEALLOC_MBR03, dealloc_mbr03)
(stat, db, firsttime, CADR(errmsg), CLEN(errmsg));
}
} else if (I8(__fort_allocated)(db)) {
ENTF90(DEALLOC_MBR03, dealloc_mbr03)
(stat, db, firsttime, CADR(errmsg), CLEN(errmsg));
if (ld->tag == 'T' && src_td->obj.tag == __POLY) {
ENTF90(DEALLOC_POLY_MBR03, dealloc_poly_mbr03)
(fd, stat, cb, firsttime, CADR(errmsg), CLEN(errmsg));
}
}
}
@@ -1,5 +1,5 @@
#
# Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -25,30 +25,30 @@ ifeq ($(findstring ppc64le, $(UNAME)), ppc64le)
INSN=bl
endif
fs11: run
build: $(SRC)/fs11.f90
-$(RM) fs11.$(EXESUFFIX) core *.d *.mod FOR*.DAT FTN* ftn* fort.*
@echo ------------------------------------ building test $@
-$(CC) -c $(CFLAGS) $(SRC)/check.c -o check.$(OBJX)
-$(FC) $(FFLAGS) $(LDFLAGS) $(SRC)/fs11.f90 -S
-$(FC) $(FFLAGS) $(LDFLAGS) $(SRC)/fs11.f90 -S
-$(FC) -c $(FFLAGS) $(LDFLAGS) $(SRC)/fs11.f90 -o fs11.$(OBJX)
-$(FC) $(FFLAGS) $(LDFLAGS) fs11.$(OBJX) check.$(OBJX) $(LIBS) -o fs11.$(EXESUFFIX)
# rank2 should not be inlined (except with -Minline=reshape).
# Verify that by checking for exactly 3 calls to mmul.
# This check isn't valid for flang because it allows LLVM to inline.
run:
@echo ------------------------------------ executing test fs11
@if [ $(shell grep mmul fs11.s | grep $(INSN)|tr -s ' ' '\n' |grep -c $(INSN)) = "3" ] ; \
then \
fs11.$(EXESUFFIX) ; \
else \
echo 'RESULT: FAIL - mmul not used' ; \
ifneq ($(FC), flang)
@mmul_calls=`grep -c '$(INSN).*mmul' fs11.s`; \
if [ $$mmul_calls -ne 3 ]; then \
echo "RESULT: FAIL - expected exactly 3 calls to mmul, got $$mmul_calls" ; \
exit 1; \
fi
endif
fs11.$(EXESUFFIX)
verify: ;
fs11.run: run
@@ -1,5 +1,5 @@
#
# Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
########## Make rule for test fs12 ########
# Determine call instruction used
INSN=call
INSN=\(call\|jmp\)
ifeq ($(findstring aarch64, $(UNAME)), aarch64)
INSN=bl
endif
@@ -35,16 +35,19 @@ build: $(SRC)/fs12.f90
-$(FC) -c $(FFLAGS) $(LDFLAGS) $(SRC)/fs12.f90 -o fs12.$(OBJX)
-$(FC) $(FFLAGS) $(LDFLAGS) fs12.$(OBJX) check.$(OBJX) $(LIBS) -o fs12.$(EXESUFFIX)
# contig_cpy should not be inlined (except with -Minline=reshape).
# Verify that by checking for exactly 3 calls to f90_mcopy.
# This check isn't valid for flang because it allows LLVM to inline.
run:
@echo ------------------------------------ executing modified test fs12
@if [ $(shell grep f90_mcopy fs12.s | grep -i $(INSN) | tr -s ' ' '\n' | grep -ci $(INSN)) = "3" ] ; \
then \
fs12.$(EXESUFFIX) ; \
else \
echo 'RESULT: FAIL - f90_mcopy not used' ; \
ifneq ($(FC), flang)
@mcopy_calls=`grep -ci '$(INSN).*f90_mcopy' fs12.s`; \
if [ $$mcopy_calls -ne 3 ]; then \
echo "RESULT: FAIL - expected exactly 3 calls to f90_mcopy, got $$mcopy_calls" ; \
exit 1; \
fi
endif
fs12.$(EXESUFFIX)
verify: ;
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1994-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 1994-2018, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -7366,7 +7366,7 @@ get_arr_temp(DTYPE dtype, LOGICAL nodesc, LOGICAL alloc_deferred)
* o the size is not constant.
*
* If an allocatable temp is needed, its storage class is always
* SC_LOCAL.
* SC_LOCAL or SC_PRIVATE.
*/
needalloc = 0;
if (ADD_DEFER(dtype)) {
@@ -7388,7 +7388,7 @@ get_arr_temp(DTYPE dtype, LOGICAL nodesc, LOGICAL alloc_deferred)
}
}
}
if (needalloc)
if (needalloc && sc != SC_PRIVATE)
sc = SC_LOCAL;
do {
@@ -2666,7 +2666,10 @@ write_instructions(LL_Module *module)
write_type(llvm_info.abi_info->extend_abi_return
? make_lltype_from_dtype(DT_INT)
: llvm_info.return_ll_type);
if ((p->ot_type != OT_NONE) && (p->ll_type->data_type != LL_VOID)) {
/* If a function return type is VOID, we don't have to
* append any operands after LLVM instruction "ret void" */
if (llvm_info.return_ll_type->data_type != LL_VOID &&
(p->ot_type != OT_NONE) && (p->ll_type->data_type != LL_VOID)) {
print_space(1);
write_operand(p, "", FLG_OMIT_OP_TYPE);
assert(p->next == NULL, "write_instructions(), bad next ptr", 0, 4);
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -1454,7 +1454,7 @@ ll_make_outlined_task_call(int func_sptr, int task_sptr)
int altili, ilix;
int arg1, arg2, args[2] = {0};
arg1 = args[1] = ad_icon(0);
arg1 = args[1] = ll_get_gtid_val_ili();
arg2 = args[0] = ad2ili(IL_LDA, ad_acon(task_sptr, 0),
addnme(NT_VAR, task_sptr, 0, (INT)0));
ilix = ll_ad_outlined_func2(0, IL_JSR, func_sptr, 2, args);
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -167,7 +167,7 @@ is_known_bug(ILI_OP opc, int j, ILI_OP j_opc)
return true;
if ((opc == IL_KMUL || opc == IL_IADD || opc == IL_IKMV) && j_opc == IL_ACCLDSYM)
return true;
if ((opc == IL_ACMPZ || opc == IL_ACJMPZ) && o == ILIO_ARLNK && r == ILIA_KR && j == 1)
if ((opc == IL_ACMPZ || opc == IL_ACJMPZ || opc == IL_LDA) && o == ILIO_ARLNK && r == ILIA_KR && j == 1)
return true;
if (opc == IL_IMUL && o == ILIO_IRLNK && r == ILIA_KR && j == 1)
return true;

0 comments on commit 33f0f74

Please sign in to comment.