Permalink
Browse files

Insert more architecture-specific constants and types

  • Loading branch information...
gklimowicz committed Sep 30, 2018
1 parent ea7efc9 commit a8ae1a0a8ee330d68e90cdc03be5232ff305e638
View
@@ -49,7 +49,7 @@
#define F3 % xmm2
#define F4 % xmm3
#else
#elif defined(LINUX_ELF) || defined(TARGET_LINUX_X86) || defined(TARGET_LINUX_X8664)
#define ENT(n) n
#define ALN_WORD .align 4
#define ALN_FUNC .align 16
@@ -71,6 +71,31 @@
#define F3 % xmm2
#define F4 % xmm3
#elif defined(TARGET_OSX_X8664)
#define ENT(n) ASM_CONCAT(_,n)
#define ALN_WORD .align 2
#define ALN_FUNC .align 4
#define ALN_DBLE .align 3
#define ALN_QUAD .align 4
#define ELF_FUNC(s)
#define ELF_OBJ(s)
#define ELF_SIZE(s)
#define AS_VER
#define I1 % rdi
#define I1W % edi
#define I2 % rsi
#define I2W % esi
#define I3 % rdx
#define I3W % edx
#define I4 % rcx
#define F1 % xmm0
#define F2 % xmm1
#define F3 % xmm2
#define F4 % xmm3
#else
#error X8664 TARGET platform not defined.
#error TARGET must be one of TARGET_LINUX_X8664, TARGET_OSX_X8664, or TARGET_WIN_X8664.
#endif
/* macros for handling pic and non-pic code */
View
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 1995-2017, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -563,6 +563,7 @@ __fortio_fmt_g(__BIGREAL_T val, int w, int d, int e, int sf, int type,
{
int sign_char;
int newd;
#if defined(TARGET_X8664)
/*
* the following guarded IF may look like a no-op, but is
* needed when val is a denorm and DAZ is enabled. In this case, the
@@ -580,6 +581,7 @@ __fortio_fmt_g(__BIGREAL_T val, int w, int d, int e, int sf, int type,
((int *)&val)[1] |= 0x80000000;
}
}
#endif
field_overflow = FALSE;
/*
fp_canon(val, type, round);
View
@@ -307,6 +307,10 @@ typedef __INT_T dtype;
* which can be either a 64-bit or 32-bit type depending on DESC_I8
*/
#if defined(TARGET_X8664)
#define __NELEM_T __INT8_T
#else
#define __NELEM_T __INT_T
#endif
#endif /*_PGHPF_TYPES_H_*/
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 1993-2018, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -384,8 +384,13 @@ Ftn_str_free(char **first)
#define __HAVE_LONGLONG_T
#if defined(LINUX8664) || defined(OSX8664)
typedef long _LONGLONG_T;
typedef unsigned long _ULONGLONG_T;
#else
typedef long long _LONGLONG_T;
typedef unsigned long long _ULONGLONG_T;
#endif
/* ***********************************************************************/
/** \brief
View
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 1997-2018, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -24,8 +24,13 @@
#define __HAVE_LONGLONG_T
#if defined(LINUX8664) || defined(OSX8664)
typedef long _LONGLONG_T;
typedef unsigned long _ULONGLONG_T;
#else
typedef long long _LONGLONG_T;
typedef unsigned long long _ULONGLONG_T;
#endif
/* now defined if BaseTsd10.h included */
typedef int INT64[2];
@@ -44,6 +49,7 @@ typedef union {
_LONGLONG_T lv;
} INT64D;
#if defined(LINUX8664) || defined(OSX8664)
#define __I8RET_T long
#define UTL_I_I64RET(m, l) \
{ \
@@ -52,3 +58,18 @@ typedef union {
I64_LSH(int64d.i) = l; \
return int64d.lv; \
}
#elif defined(WIN64)
/* Someday, should only care if TM_I8 is defined */
#define __I8RET_T long long
#define UTL_I_I64RET(m, l) \
{ \
INT64D int64d; \
I64_MSH(int64d.i) = m; \
I64_LSH(int64d.i) = l; \
return int64d.lv; \
}
#else
#define __I8RET_T void
#define UTL_I_I64RET __utl_i_i64ret
extern VOID UTL_I_I64RET();
#endif
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 1993-2018, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -160,8 +160,13 @@ int a2_len; /* length of a2 */
#define __HAVE_LONGLONG_T
#if defined(LINUX8664) || defined(OSX8664)
typedef long _LONGLONG_T;
typedef unsigned long _ULONGLONG_T;
#else
typedef long long _LONGLONG_T;
typedef unsigned long long _ULONGLONG_T;
#endif
/* ***********************************************************************/
@@ -816,7 +816,11 @@ ENTFTN(SYSCLK, sysclk)(__STAT_T *count, __STAT_T *count_rate,
if (resol == 0) {
int def;
#if defined(TARGET_X8664)
def = 1000000;
#else
def = sizeof(__STAT_T) < 8 ? 1000 : 1000000;
#endif
resol = __fort_getoptn("-system_clock_rate", def);
if (resol <= 0)
__fort_abort("invalid value given for system_clock rate");
@@ -2880,6 +2884,7 @@ ENTF90(TRIMA, trima)
i = CLEN(expr);
while (i > 0) {
if (CADR(expr)[i - 1] != ' ') {
#if defined(TARGET_X8664)
if (i <= 11) {
int *rptr = ((int *)CADR(res));
int *eptr = ((int *)CADR(expr));
@@ -2899,6 +2904,11 @@ ENTF90(TRIMA, trima)
}
rcptr = (char *)rptr;
ecptr = (char *)eptr;
#else
if (i <= 3) {
rcptr = ((char *)CADR(res));
ecptr = ((char *)CADR(expr));
#endif
j = i & 3;
if (j > 2)
*rcptr++ = *ecptr++;
@@ -4823,7 +4833,11 @@ ENTF90(SPACINGD, spacingd)(__REAL8_T *d)
#ifndef DESC_I8
#if defined(TARGET_X8664)
typedef __INT8_T SZ_T;
#else
typedef __INT4_T SZ_T;
#endif
#undef _MZERO
#define _MZERO(n, t) \
@@ -17,7 +17,11 @@
!
! Global variables
!
#ifdef TARGET_X8664
integer*8 :: mra, ncb, kab, lda, ldb, ldc
#else
integer :: mra, ncb, kab, lda, ldb, ldc
#endif
complex*16, dimension( lda, * )::a
complex*16, dimension( ldb, * )::b
complex*16, dimension( ldc, * )::c
@@ -26,6 +30,7 @@
!
! local variables
!
#ifdef TARGET_X8664
integer*8 :: colsa, rowsa, rowsb, colsb
integer*8 :: i, j, jb, k, ak, bk, jend
integer*8 :: ar, ar_sav, ac, ac_sav, br, bc
@@ -36,6 +41,18 @@
integer*8 :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
integer*8 :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
integer*8 :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
#else
integer :: colsa, rowsa, rowsb, colsb
integer :: i, j, jb, k, ak, bk, jend
integer :: ar, ar_sav, ac, ac_sav, br, bc
integer :: ndxa, ndxasav
integer :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3
integer :: colachunk, colachunks, colbchunk, colbchunks
integer :: rowchunk, rowchunks
integer :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
integer :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
integer :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
#endif
integer :: ta, tb
complex*16 :: temp, temp0, temp1, temp2, temp3
real*8 :: temprr0, temprr1, temprr2, temprr3
@@ -52,7 +69,14 @@
complex*16, allocatable, dimension(:) :: buffera, bufferb
!Minimun number of multiplications needed to activate the blocked optimization.
#ifdef TARGET_X8664
integer, parameter :: min_blocked_mult = 15000
#elif TARGET_LINUX_POWER
integer, parameter :: min_blocked_mult = 15000 !Complex calculations not vectorized on OpenPower.
#else
#warning untuned matrix multiplication parameter
integer, parameter :: min_blocked_mult = 15000
#endif
#undef DCMPLX
#define DCMPLX(r,i) cmplx(r,i,kind=8)
@@ -17,7 +17,11 @@
!
! Global variables
!
#ifdef TARGET_X8664
integer*8 :: mra, ncb, kab, lda, ldb, ldc
#else
integer :: mra, ncb, kab, lda, ldb, ldc
#endif
complex*8, dimension( lda, * )::a
complex*8, dimension( ldb, * )::b
complex*8, dimension( ldc, * )::c
@@ -26,6 +30,7 @@
!
! local variables
!
#ifdef TARGET_X8664
integer*8 :: colsa, rowsa, rowsb, colsb
integer*8 :: i, j, jb, k, ak, bk, jend
integer*8 :: ar, ar_sav, ac, ac_sav, br, bc
@@ -36,6 +41,18 @@
integer*8 :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
integer*8 :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
integer*8 :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
#else
integer :: colsa, rowsa, rowsb, colsb
integer :: i, j, jb, k, ak, bk, jend
integer :: ar, ar_sav, ac, ac_sav, br, bc
integer :: ndxa, ndxasav
integer :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3
integer :: colachunk, colachunks, colbchunk, colbchunks
integer :: rowchunk, rowchunks
integer :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
integer :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
integer :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
#endif
integer :: ta, tb
complex*8 :: temp, temp0, temp1, temp2, temp3
real*4 :: temprr0, temprr1, temprr2, temprr3
@@ -52,5 +69,12 @@
complex*8, allocatable, dimension(:) :: buffera, bufferb
!Minimun number of multiplications needed to activate the blocked optimization.
#ifdef TARGET_X8664
integer, parameter :: min_blocked_mult = 1750
#elif TARGET_LINUX_POWER
integer, parameter :: min_blocked_mult = 1750 !Complex calculations not vectorized on OpenPower.
#else
#warning untuned matrix multiplication parameter
integer, parameter :: min_blocked_mult = 1750
#endif
@@ -21,7 +21,11 @@
!
! Global variables
!
#ifdef TARGET_X8664
integer*8 :: mra, ncb, kab, lda, ldb, ldc
#else
integer :: mra, ncb, kab, lda, ldb, ldc
#endif
real*4, dimension( lda, * )::a
real*4, dimension( ldb, * )::b
real*4, dimension( ldc, * )::c
@@ -30,6 +34,7 @@
!
! local variables
!
#ifdef TARGET_X8664
integer*8 :: colsa, rowsa, rowsb, colsb
integer*8 :: i, j, jb, k, ak, bk, jend
integer*8 :: ar, ar_sav, ac, ac_sav, br, bc
@@ -40,6 +45,18 @@
integer*8 :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
integer*8 :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
integer*8 :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
#else
integer :: colsa, rowsa, rowsb, colsb
integer :: i, j, jb, k, ak, bk, jend
integer :: ar, ar_sav, ac, ac_sav, br, bc
integer :: ndxa, ndxasav
integer :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3
integer :: colachunk, colachunks, colbchunk, colbchunks
integer :: rowchunk, rowchunks
integer :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end
integer :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end
integer :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav
#endif
real*4 :: temp, temp0, temp1, temp2, temp3
real*4 :: bufatemp, bufbtemp
real*8 :: time_start, time_end, ttime, all_time
@@ -50,4 +67,11 @@
real*4, allocatable, dimension(:) :: buffera, bufferb
!Minimun number of multiplications needed to activate the blocked optimization.
#ifdef TARGET_X8664
integer, parameter :: min_blocked_mult = 5000
#elif TARGET_LINUX_POWER
integer, parameter :: min_blocked_mult = 10000
#else
#warning untuned matrix multiplication parameter
integer, parameter :: min_blocked_mult = 5000
#endif
Oops, something went wrong.

0 comments on commit a8ae1a0

Please sign in to comment.