Skip to content

Commit

Permalink
Drop -ffast-math everywhere
Browse files Browse the repository at this point in the history
Check the discussion and latest failures with LAPACK using gcc-8.2.0
and `-ffast-math` here:

  #193

Just. Not. Worth. It.
  • Loading branch information
dev-zero committed Feb 17, 2019
1 parent 2b6a144 commit 9843799
Show file tree
Hide file tree
Showing 25 changed files with 37 additions and 37 deletions.
2 changes: 1 addition & 1 deletion arch/CRAY-XC30-gfortran-cuda.psmp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ CPPFLAGS =
CXXFLAGS = -O3 -I${CUDA_PATH}/include -std=c++11
DFLAGS = -D__FFTW3 -D__parallel -D__SCALAPACK -D__HAS_smm_dnn -D__ACC -D__DBCSR_ACC
CFLAGS = $(DFLAGS)
FCFLAGS = $(DFLAGS) -O3 -fopenmp -mavx -funroll-loops -ffast-math -ftree-vectorize \
FCFLAGS = $(DFLAGS) -O3 -fopenmp -mavx -funroll-loops -ftree-vectorize \
-ffree-form -ffree-line-length-512
LDFLAGS = $(FCFLAGS)
NVFLAGS = $(DFLAGS) -O3 -arch sm_35 -Xcompiler='-fopenmp' --std=c++11
Expand Down
2 changes: 1 addition & 1 deletion arch/CRAY-XC30-gfortran-mkl.psmp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ AR = ar -r
CPPFLAGS =
DFLAGS = -D__MKL -D__FFTW3 -D__parallel -D__SCALAPACK -D__HAS_smm_dnn
CFLAGS = $(DFLAGS)
FCFLAGS = $(DFLAGS) -O3 -fopenmp -mavx -funroll-loops -ffast-math -ftree-vectorize \
FCFLAGS = $(DFLAGS) -O3 -fopenmp -mavx -funroll-loops -ftree-vectorize \
-ffree-form -ffree-line-length-512 \
-I${MKLROOT}/include -I${MKLROOT}/include/fftw
LDFLAGS = $(FCFLAGS)
Expand Down
2 changes: 1 addition & 1 deletion arch/CRAY-XC30-gfortran.psmp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ AR = ar -r
CPPFLAGS =
DFLAGS = -D__FFTW3 -D__parallel -D__SCALAPACK -D__HAS_smm_dnn
CFLAGS = $(DFLAGS)
FCFLAGS = $(DFLAGS) -O3 -fopenmp -mavx -funroll-loops -ffast-math -ftree-vectorize \
FCFLAGS = $(DFLAGS) -O3 -fopenmp -mavx -funroll-loops -ftree-vectorize \
-ffree-form -ffree-line-length-512
LDFLAGS = $(FCFLAGS)
LIBS = -lfftw3 -lfftw3_threads
Expand Down
2 changes: 1 addition & 1 deletion arch/CRAY-XK7-gfortran-cuda.psmp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ CPPFLAGS =
CXXFLAGS = -O3 -I${CUDA_PATH}/include -std=c++11
DFLAGS = -D__GEMINI -D__FFTW3 -D__parallel -D__SCALAPACK -D__HAS_smm_dnn -D__ACC -D__DBCSR_ACC
CFLAGS = $(DFLAGS)
FCFLAGS = $(DFLAGS) -O3 -fopenmp -march=bdver1 -funroll-loops -ffast-math -ftree-vectorize \
FCFLAGS = $(DFLAGS) -O3 -fopenmp -march=bdver1 -funroll-loops -ftree-vectorize \
-ffree-form -ffree-line-length-512
LDFLAGS = $(FCFLAGS)
NVFLAGS = $(DFLAGS) -O3 -arch sm_35 -Xcompiler='-fopenmp' --std=c++11
Expand Down
2 changes: 1 addition & 1 deletion arch/Darwin-IntelMacintosh-gfortran.popt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ AR = ar -r
RANLIB = ranlib
DFLAGS = -D__MPI_VERSION=3 -D__parallel -D__SCALAPACK \
-D__NO_STATM_ACCESS -D__ACCELERATE -D__LIBXC -D__LIBINT
FCFLAGS = -I $(LIBXC_INCLUDE_DIR) -O2 -ffast-math -funroll-loops \
FCFLAGS = -I $(LIBXC_INCLUDE_DIR) -O2 -funroll-loops \
-ftree-vectorize -ffree-form $(DFLAGS)
LDFLAGS = $(FCFLAGS)
LIBS = -framework Accelerate -lscalapack \
Expand Down
2 changes: 1 addition & 1 deletion arch/Darwin-IntelMacintosh-gfortran.psmp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ AR = ar -r
RANLIB = ranlib
DFLAGS = -D__MPI_VERSION=3 -D__parallel -D__SCALAPACK \
-D__NO_STATM_ACCESS -D__ACCELERATE -D__LIBXC -D__LIBINT
FCFLAGS = -I $(LIBXC_INCLUDE_DIR) -fopenmp -O2 -ffast-math \
FCFLAGS = -I $(LIBXC_INCLUDE_DIR) -fopenmp -O2 \
-funroll-loops -ftree-vectorize -ffree-form $(DFLAGS)
LDFLAGS = $(FCFLAGS)
LIBS = -framework Accelerate -lscalapack \
Expand Down
2 changes: 1 addition & 1 deletion arch/Darwin-IntelMacintosh-gfortran.sopt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ LD = gfortran
AR = ar -r
RANLIB = ranlib
DFLAGS = -D__NO_STATM_ACCESS -D__ACCELERATE -D__LIBXC -D__LIBINT
FCFLAGS = -I $(LIBXC_INCLUDE_DIR) -O2 -ffast-math -funroll-loops \
FCFLAGS = -I $(LIBXC_INCLUDE_DIR) -O2 -funroll-loops \
-ftree-vectorize -ffree-form $(DFLAGS)
LDFLAGS = $(FCFLAGS)
LIBS = -framework Accelerate \
Expand Down
2 changes: 1 addition & 1 deletion arch/Darwin-IntelMacintosh-gfortran.ssmp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ LD = gfortran
AR = ar -r
RANLIB = ranlib
DFLAGS = -D__NO_STATM_ACCESS -D__ACCELERATE -D__LIBXC -D__LIBINT
FCFLAGS = -I $(LIBXC_INCLUDE_DIR) -fopenmp -ffast-math -funroll-loops \
FCFLAGS = -I $(LIBXC_INCLUDE_DIR) -fopenmp -funroll-loops \
-ftree-vectorize -ffree-form -O2 $(DFLAGS)
LDFLAGS = $(FCFLAGS)
LIBS = -framework Accelerate \
Expand Down
2 changes: 1 addition & 1 deletion arch/FreeBSD-gfortran.popt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ LD = mpif90
AR = ar -r
CPPFLAGS =
DFLAGS = -D__FFTW3 -D__NO_STATM_ACCESS -D__MPI_VERSION=2 -D__parallel -D__SCALAPACK
FCFLAGS = -O2 -ffast-math -funroll-loops -ftree-vectorize -march=native -ffree-form $(DFLAGS)
FCFLAGS = -O2 -funroll-loops -ftree-vectorize -march=native -ffree-form $(DFLAGS)
LDFLAGS = $(FCFLAGS)
LIBS = -llapack -lblas -lscalapack -lfftw3 -lmpich
2 changes: 1 addition & 1 deletion arch/FreeBSD-gfortran.psmp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ LD = mpif90
AR = ar -r
CPPFLAGS =
DFLAGS = -D__FFTW3 -D__NO_STATM_ACCESS -D__MPI_VERSION=2 -D__parallel -D__SCALAPACK
FCFLAGS = -O2 -fopenmp -ffast-math -funroll-loops -ftree-vectorize -march=native -ffree-form $(DFLAGS)
FCFLAGS = -O2 -fopenmp -funroll-loops -ftree-vectorize -march=native -ffree-form $(DFLAGS)
LDFLAGS = $(FCFLAGS)
LIBS = -llapack -lblas -lscalapack -lfftw3 -lfftw3_omp -lmpich
2 changes: 1 addition & 1 deletion arch/FreeBSD-gfortran.sopt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ LD = gfortran
AR = ar -r
CPPFLAGS =
DFLAGS = -D__FFTW3 -D__NO_STATM_ACCESS
FCFLAGS = -O2 -ffast-math -funroll-loops -ftree-vectorize -march=native -ffree-form $(DFLAGS)
FCFLAGS = -O2 -funroll-loops -ftree-vectorize -march=native -ffree-form $(DFLAGS)
LDFLAGS = $(FCFLAGS)
LIBS = -llapack -lblas -lfftw3

2 changes: 1 addition & 1 deletion arch/FreeBSD-gfortran.ssmp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ LD = gfortran
AR = ar -r
CPPFLAGS =
DFLAGS = -D__FFTW3 -D__NO_STATM_ACCESS
FCFLAGS = -O2 -fopenmp -ffast-math -funroll-loops -ftree-vectorize -march=native -ffree-form $(DFLAGS)
FCFLAGS = -O2 -fopenmp -funroll-loops -ftree-vectorize -march=native -ffree-form $(DFLAGS)
LDFLAGS = $(FCFLAGS)
LIBS = -llapack -lblas -lfftw3 -lfftw3_omp

2 changes: 1 addition & 1 deletion arch/Linux-x86-64-cuda-ma.psmp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ GPUVER = K20X
CPPFLAGS =
CXXFLAGS = -O3 -I${CUDA_PATH}/include -std=c++11
DFLAGS = -D__GFORTRAN -D__parallel -D__SCALAPACK -D__BLACS -D__FFTSG -D__LIBINT -D__ACC -D__DBCSR_ACC -D__HAS_smm_dnn -D__HWLOC
GFLAGS = -g -fopenmp -O3 -ffast-math $(DFLAGS)
GFLAGS = -g -fopenmp -O3 $(DFLAGS)
FCFLAGS = -ffree-form -fcray-pointer $(GFLAGS)
CFLAGS = $(GFLAGS) -I$(HWLOC_BASE)/include
LDFLAGS = $(FCFLAGS) -mkl=sequential
Expand Down
2 changes: 1 addition & 1 deletion arch/Linux-x86-64-dbcsr-cuda.popt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ GPUVER = K20X
CPPFLAGS =
CXXFLAGS = -O3 -I${CUDA_PATH}/include -std=c++11
DFLAGS = -D__parallel -D__SCALAPACK -D__ACC -D__DBCSR_ACC
FCFLAGS = -g -O3 -ffast-math -ffree-form $(DFLAGS) -I$(GFORTRAN_INC)
FCFLAGS = -g -O3 -ffree-form $(DFLAGS) -I$(GFORTRAN_INC)
LDFLAGS = $(FCFLAGS)
NVFLAGS = $(DFLAGS) -g -O3 -arch sm_35 -Xcompiler='-fopenmp' --std=c++11

Expand Down
2 changes: 1 addition & 1 deletion arch/Linux-x86-64-gfortran.warn
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ WFLAGS = -pedantic\


DFLAGS = -D__LIBINT -D__FFTW3 -D__LIBXC2 -D__LIBINT_MAX_AM=6 -D__LIBDERIV_MAX_AM1=5 -D__ACC -D__DBCSR_ACC -D__PW_CUDA -D__parallel -D__SCALAPACK -D__LIBPEXSI -D__MPI_VERSION=3
FCFLAGS = -fopenmp -I$(CP2KINSTALLDIR)/include -std=f2008 -fimplicit-none -ffree-form -fno-omit-frame-pointer -g -O1 -O3 -march=native -ffast-math $(PROFOPT) $(DFLAGS) $(WFLAGS)
FCFLAGS = -fopenmp -I$(CP2KINSTALLDIR)/include -std=f2008 -fimplicit-none -ffree-form -fno-omit-frame-pointer -g -O1 -O3 -march=native $(PROFOPT) $(DFLAGS) $(WFLAGS)
LDFLAGS = -L$(CP2KINSTALLDIR)/lib/ -L/usr/local/cuda/lib64 $(FCFLAGS)
NVFLAGS = $(DFLAGS) -g -O2 -arch sm_35 -arch sm_35 -Xcompiler='-fopenmp' --std=c++11
CFLAGS = $(DFLAGS) -I$(CP2KINSTALLDIR)/include -fno-omit-frame-pointer -g -O1
Expand Down
2 changes: 1 addition & 1 deletion arch/Linux-x86-64-gfortran_mkl_elpa.popt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ LD = mpif90
AR = ar -r
CPPFLAGS =
DFLAGS = -D__parallel -D__SCALAPACK -D__LIBINT -D__LIBXC2 -D__FFTW3 -D__ELPA=201308
FCFLAGS = -O3 -ffast-math -funroll-loops -ftree-vectorize -march=native -ffree-form $(DFLAGS) -g -I${FFTW3_PATH}/include -I${ELPA_INC}/modules -I${ELPA_INC}/elpa
FCFLAGS = -O3 -funroll-loops -ftree-vectorize -march=native -ffree-form $(DFLAGS) -g -I${FFTW3_PATH}/include -I${ELPA_INC}/modules -I${ELPA_INC}/elpa
LDFLAGS = $(FCFLAGS) -L$(LIBINT_PATH) -L${FFTW3_PATH}/lib64 -L$(INTEL_MKL_LIB) -L$(LIBXC_PATH)/lib64
LIBS = \
-lmkl_scalapack_lp64 -lmkl_blacs_openmpi_lp64 -lmkl_gf_lp64 -lmkl_sequential -lmkl_core \
Expand Down
2 changes: 1 addition & 1 deletion arch/Linux-x86-64-gfortran_opencl_nvidia.sopt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ DFLAGS = -D__FFTW3 \
-D__HAS_ISO_C_BINDING


AFLAGS = $(DFLAGS) -O3 -mtune=native -ffast-math -funroll-loops -ftree-vectorize
AFLAGS = $(DFLAGS) -O3 -mtune=native -funroll-loops -ftree-vectorize
CPPFLAGS = $(DFLAGS) -traditional -C
CFLAGS = $(AFLAGS)
FCFLAGS = $(AFLAGS) -ffree-form -ffree-line-length-none
Expand Down
2 changes: 1 addition & 1 deletion arch/Linux-x86-64-ma.sopt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ LD = $(FC)
AR = ar -r
CPPFLAGS =
DFLAGS = -D__LIBINT -D__HWLOC
GFLAGS = -g -O3 -ffast-math $(DFLAGS)
GFLAGS = -g -O3 $(DFLAGS)
FCFLAGS = -ffree-form -fcray-pointer $(GFLAGS)
CFLAGS = $(GFLAGS) -I$(HWLOC_BASE)/include
LDFLAGS = $(FCFLAGS) -mkl=sequential
Expand Down
4 changes: 2 additions & 2 deletions arch/Linux-x86-64-mingw64-minimal.sopt
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ LD = x86_64-w64-mingw32-gfortran
AR = x86_64-w64-mingw32-ar -r
DFLAGS = -D__NO_STATM_ACCESS -D__NO_IPI_DRIVER -D__MINGW
CPPFLAGS = -P -traditional $(DFLAGS)
CFLAGS = $(DFLAGS) -O2 -ffast-math
FCFLAGS = $(DFLAGS) -O2 -ffast-math -ffree-form -ffree-line-length-none \
CFLAGS = $(DFLAGS) -O2
FCFLAGS = $(DFLAGS) -O2 -ffree-form -ffree-line-length-none \
-ftree-vectorize -funroll-loops -std=f2008
LDFLAGS = $(FCFLAGS) -static
LIBS = $(OPENBLAS_LIBPATH)/libopenblas.a
2 changes: 1 addition & 1 deletion arch/PR59336.sdbg
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ AR = gcc-ar -r

CPPFLAGS =
DFLAGS = -D__FFTW3
FCFLAGS = -fno-prefetch-loop-arrays -O3 -march=native -funroll-loops -ffast-math -ffree-form $(DFLAGS)
FCFLAGS = -fno-prefetch-loop-arrays -O3 -march=native -funroll-loops -ffree-form $(DFLAGS)
LDFLAGS = $(FCFLAGS)
LIBS = -llapack -lblas

2 changes: 1 addition & 1 deletion arch/test_tsan.sdbg
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ AR = ar -r

CPPFLAGS =
DFLAGS = -D__LIBINT
FCFLAGS = -gdwarf-3 -fPIE -pie -fsanitize=thread -fno-omit-frame-pointer -fopenmp -O0 -march=native -ffast-math -ffree-form $(DFLAGS)
FCFLAGS = -gdwarf-3 -fPIE -pie -fsanitize=thread -fno-omit-frame-pointer -fopenmp -O0 -march=native -ffree-form $(DFLAGS)
LDFLAGS = $(FCFLAGS) -L/data/vjoost/libint/sanitize/install/lib/
LIBS = -llapack -lblas -lderiv -lint -lstdc++

2 changes: 1 addition & 1 deletion tools/autotune_grid/config.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Nrun=3

# #### Gfortran flags
FC_comp="ftn -ffree-form -pipe"
FCFLAGS_OPT=" -O3 -ffast-math -funroll-loops -ftree-vectorize -fno-omit-frame-pointer -g -march=ivybridge -fgcse-sm -fgcse-las -fmerge-all-constants "
FCFLAGS_OPT=" -O3 -funroll-loops -ftree-vectorize -fno-omit-frame-pointer -g -march=ivybridge -fgcse-sm -fgcse-las -fmerge-all-constants "
FCFLAGS_NATIVE="-march=native"

# Cray flags
Expand Down
16 changes: 8 additions & 8 deletions tools/toolchain/install_cp2k_toolchain.sh
Original file line number Diff line number Diff line change
Expand Up @@ -922,12 +922,12 @@ done

# setup compiler flags, leading to nice stack traces on crashes but
# still optimised
CFLAGS="-O2 -ftree-vectorize -g -fno-omit-frame-pointer -march=native -ffast-math $TSANFLAGS"
FFLAGS="-O2 -ftree-vectorize -g -fno-omit-frame-pointer -march=native -ffast-math $TSANFLAGS"
F77FLAGS="-O2 -ftree-vectorize -g -fno-omit-frame-pointer -march=native -ffast-math $TSANFLAGS"
F90FLAGS="-O2 -ftree-vectorize -g -fno-omit-frame-pointer -march=native -ffast-math $TSANFLAGS"
FCFLAGS="-O2 -ftree-vectorize -g -fno-omit-frame-pointer -march=native -ffast-math $TSANFLAGS"
CXXFLAGS="-O2 -ftree-vectorize -g -fno-omit-frame-pointer -march=native -ffast-math $TSANFLAGS"
CFLAGS="-O2 -ftree-vectorize -g -fno-omit-frame-pointer -march=native $TSANFLAGS"
FFLAGS="-O2 -ftree-vectorize -g -fno-omit-frame-pointer -march=native $TSANFLAGS"
F77FLAGS="-O2 -ftree-vectorize -g -fno-omit-frame-pointer -march=native $TSANFLAGS"
F90FLAGS="-O2 -ftree-vectorize -g -fno-omit-frame-pointer -march=native $TSANFLAGS"
FCFLAGS="-O2 -ftree-vectorize -g -fno-omit-frame-pointer -march=native $TSANFLAGS"
CXXFLAGS="-O2 -ftree-vectorize -g -fno-omit-frame-pointer -march=native $TSANFLAGS"

export CFLAGS=$(allowed_gcc_flags $CFLAGS)
export FFLAGS=$(allowed_gfortran_flags $FFLAGS)
Expand Down Expand Up @@ -1035,12 +1035,12 @@ LD_arch="IF_MPI(${MPIFC}|${FC})"

# we always want good line information and backtraces
BASEFLAGS="-march=native -fno-omit-frame-pointer -g ${TSANFLAGS}"
OPT_FLAGS="-O3 -funroll-loops -ffast-math"
OPT_FLAGS="-O3 -funroll-loops"
NOOPT_FLAGS="-O1"

# those flags that do not influence code generation are used always, the others if debug
FCDEB_FLAGS="-ffree-form -std=f2008 -fimplicit-none"
FCDEB_FLAGS_DEBUG="-fsanitize=leak -fcheck=all -ffpe-trap=invalid,zero,overflow -finit-derived -finit-real=snan -finit-integer=-42 -fno-fast-math -Werror=realloc-lhs-all -finline-matmul-limit=0"
FCDEB_FLAGS_DEBUG="-fsanitize=leak -fcheck=all -ffpe-trap=invalid,zero,overflow -finit-derived -finit-real=snan -finit-integer=-42 -Werror=realloc-lhs-all -finline-matmul-limit=0"

# code coverage generation flags
COVERAGE_FLAGS="-O1 -coverage -fkeep-static-functions"
Expand Down
10 changes: 5 additions & 5 deletions tools/toolchain/scripts/install_reflapack.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@ case "$with_reflapack" in
cat <<EOF > make.inc
SHELL = /bin/sh
FORTRAN = $FC
OPTS = $FFLAGS -frecursive -fno-fast-math
DRVOPTS = $FFLAGS -frecursive -fno-fast-math
NOOPT = $FFLAGS -O0 -frecursive -fno-fast-math
OPTS = $FFLAGS -frecursive
DRVOPTS = $FFLAGS -frecursive
NOOPT = $FFLAGS -O0 -frecursive
LOADER = $FC
LOADOPTS = $FFLAGS -Wl,--enable-new-dtags -fno-fast-math
LOADOPTS = $FFLAGS -Wl,--enable-new-dtags
TIMER = INT_ETIME
CC = $CC
CFLAGS = $CFLAGS -fno-fast-math
CFLAGS = $CFLAGS
ARCH = ar
ARCHFLAGS= cr
RANLIB = ranlib
Expand Down
2 changes: 1 addition & 1 deletion tools/toolchain/scripts/install_scalapack.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ case "$with_scalapack" in
CDEFS = -DAdd_
FC = ${MPIFC}
CC = ${MPICC}
NOOPT = ${FFLAGS} -O0 -fno-fast-math
NOOPT = ${FFLAGS} -O0
FCFLAGS = ${FFLAGS} ${MATH_CFLAGS}
CCFLAGS = ${CFLAGS} ${MATH_CFLAGS}
FCLOADER = \$(FC)
Expand Down

0 comments on commit 9843799

Please sign in to comment.