Skip to content

Commit

Permalink
move forward kernels in elastic.c into separate functions so that the…
Browse files Browse the repository at this point in the history
…y are vectorized even when using OpenMP
  • Loading branch information
ar4 committed Jul 21, 2023
1 parent 3e6de3c commit be15287
Show file tree
Hide file tree
Showing 9 changed files with 317 additions and 242 deletions.
199 changes: 107 additions & 92 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
name: Build and test
on: push
jobs:
Linux-build:
runs-on: ubuntu-latest
container: quay.io/pypa/manylinux2014_x86_64
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Install NVCC
run: |
yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
yum install -y cuda-nvcc-11-1-11.1.105-1 cuda-cudart-devel-11-1-11.1.74-1
- name: Compile
run: |
PATH=$PATH:/usr/local/cuda-11.1/bin
CUDA_HOME=/usr/local/cuda-11.1
CUDA_ROOT=/usr/local/cuda-11.1
CUDA_PATH=/usr/local/cuda-11.1
CUDADIR=/usr/local/cuda-11.1
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.1/lib64
cd src/deepwave
cp /lib64/libgomp.so.1 .
./build_linux.sh
- name: Archive built libraries
uses: actions/upload-artifact@v3
with:
name: linux_libraries
path: src/deepwave/*.so*
# Linux-build:
# runs-on: ubuntu-latest
# container: quay.io/pypa/manylinux2014_x86_64
# steps:
# - name: Checkout
# uses: actions/checkout@v3
# - name: Install NVCC
# run: |
# yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
# yum install -y cuda-nvcc-11-1-11.1.105-1 cuda-cudart-devel-11-1-11.1.74-1
# - name: Compile
# run: |
# PATH=$PATH:/usr/local/cuda-11.1/bin
# CUDA_HOME=/usr/local/cuda-11.1
# CUDA_ROOT=/usr/local/cuda-11.1
# CUDA_PATH=/usr/local/cuda-11.1
# CUDADIR=/usr/local/cuda-11.1
# LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.1/lib64
# cd src/deepwave
# cp /lib64/libgomp.so.1 .
# ./build_linux.sh
# - name: Archive built libraries
# uses: actions/upload-artifact@v3
# with:
# name: linux_libraries
# path: src/deepwave/*.so*
MacOS-build:
runs-on: macos-11
steps:
Expand All @@ -36,79 +36,94 @@ jobs:
uses: actions/setup-python@v3
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install torch
nuget install intelopenmp.devel.osx -DirectDownload -NonInteractive
#ls -R
#python -m pip install torch
- name: Compile
run: |
cd src/deepwave
cp `python -c "import torch; print(torch.__path__[0])"`/lib/libiomp5.dylib .
#cp `python -c "import torch; print(torch.__path__[0])"`/lib/libiomp5.dylib .
mv intelopenmp.devel.osx*/lib/native/osx-x64/libiomp5.dylib .
brew install libomp
./build_macos.sh
- name: Archive built libraries
uses: actions/upload-artifact@v3
with:
name: macos_libraries
path: src/deepwave/*.dylib
Windows-build:
runs-on: windows-2019
defaults:
run:
shell: bash
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
- name: Install NVCC
run: |
curl https://developer.download.nvidia.com/compute/cuda/11.1.1/network_installers/cuda_11.1.1_win10_network.exe -o cuda_11.1.1_win10_network.exe
chmod +x ./cuda_11.1.1_win10_network.exe
./cuda_11.1.1_win10_network.exe -s nvcc_11.1 cudart_11.1
echo "CUDA_PATH=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1" >> $GITHUB_ENV
echo "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1\\bin" >> $GITHUB_PATH
- name: Setup MSVC
uses: ilammy/msvc-dev-cmd@v1
- name: Compile
run: |
cd src/deepwave
./build_windows.sh
- name: Archive built libraries
uses: actions/upload-artifact@v3
with:
name: windows_libraries
path: src/deepwave/*.dll
Test:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
fail-fast: false
runs-on: ${{ matrix.os }}
needs: [Linux-build, MacOS-build, Windows-build]
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Download built Linux libraries
uses: actions/download-artifact@v3
with:
name: linux_libraries
path: src/deepwave/
- name: Download built MacOS libraries
uses: actions/download-artifact@v3
with:
name: macos_libraries
path: src/deepwave/
- name: Download built Windows libraries
uses: actions/download-artifact@v3
with:
name: windows_libraries
path: src/deepwave/
- name: Set up Python
uses: actions/setup-python@v3
- name: Install dependencies
run: |
cd ../../
python -m pip install --upgrade pip
python -m pip install pytest scipy
python -m pip install .
- name: Test with pytest
run: |
pytest
#pytest -s
cd tests
PYTHONVERBOSE=3 python -c "import test_elastic; test_wavefield_decays()"
# - name: Archive built libraries
# uses: actions/upload-artifact@v3
# with:
# name: macos_libraries
# path: src/deepwave/*.dylib
# Windows-build:
# runs-on: windows-2019
# defaults:
# run:
# shell: bash
# steps:
# - name: Checkout
# uses: actions/checkout@v3
# - name: Set up Python
# uses: actions/setup-python@v3
# - name: Install NVCC
# run: |
# curl https://developer.download.nvidia.com/compute/cuda/11.1.1/network_installers/cuda_11.1.1_win10_network.exe -o cuda_11.1.1_win10_network.exe
# chmod +x ./cuda_11.1.1_win10_network.exe
# ./cuda_11.1.1_win10_network.exe -s nvcc_11.1 cudart_11.1
# echo "CUDA_PATH=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1" >> $GITHUB_ENV
# echo "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1\\bin" >> $GITHUB_PATH
# - name: Setup MSVC
# uses: ilammy/msvc-dev-cmd@v1
# - name: Compile
# run: |
# cd src/deepwave
# nuget install intelopenmp.devel.win -DirectDownload -NonInteractive
# nuget install intelopenmp.redist.win -DirectDownload -NonInteractive
# mv intelopenmp.devel.win*/lib/native/win-x64/libiomp5md.lib .
# mv intelopenmp.redist.win*/runtimes/win-x86/native/libiomp5md.dll .
# ./build_windows.sh
# - name: Archive built libraries
# uses: actions/upload-artifact@v3
# with:
# name: windows_libraries
# path: src/deepwave/*.dll
# Test:
# strategy:
# matrix:
# os: [ubuntu-latest, macos-latest, windows-latest]
# fail-fast: false
# runs-on: ${{ matrix.os }}
# needs: [Linux-build, MacOS-build, Windows-build]
# steps:
# - name: Checkout
# uses: actions/checkout@v3
# - name: Download built Linux libraries
# uses: actions/download-artifact@v3
# with:
# name: linux_libraries
# path: src/deepwave/
# - name: Download built MacOS libraries
# uses: actions/download-artifact@v3
# with:
# name: macos_libraries
# path: src/deepwave/
# - name: Download built Windows libraries
# uses: actions/download-artifact@v3
# with:
# name: windows_libraries
# path: src/deepwave/
# - name: Set up Python
# uses: actions/setup-python@v3
# - name: Install dependencies
# run: |
# python -m pip install --upgrade pip
# python -m pip install pytest scipy
# python -m pip install .
# - name: Test with pytest
# run: |
# PYTHONVERBOSE=3 pytest
3 changes: 3 additions & 0 deletions src/deepwave/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,11 @@
try:
dll_cpu.omp_get_num_threads
use_openmp = True
import torch
print('USING OPENMP', torch.get_num_threads())
except AttributeError:
use_openmp = False
print('NOT USING OPENMP')
dll_cpu.scalar_iso_2_float_forward.restype = None
dll_cpu.scalar_iso_4_float_forward.restype = None
dll_cpu.scalar_iso_6_float_forward.restype = None
Expand Down
2 changes: 1 addition & 1 deletion src/deepwave/build_linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -e

DW_OMP_NAME=libgomp.so.1
CFLAGS="-Wall -Wextra -pedantic -DDW_USE_OPENMP -fPIC -fopenmp -Ofast -mavx2"
CFLAGS="-Wall -Wextra -pedantic -fPIC -fopenmp -Ofast -mavx2"
CUDAFLAGS="--restrict --use_fast_math -O3 -gencode=arch=compute_52,code=sm_52, -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 --compiler-options -fPIC"
gcc $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_2_float.o
gcc $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_4_float.o
Expand Down
48 changes: 24 additions & 24 deletions src/deepwave/build_macos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -e

DW_OMP_NAME=iomp5
CFLAGS="-Wall -Wextra -pedantic -DDW_USE_OPENMP -fPIC -Ofast -Xpreprocessor -fopenmp -I`brew --prefix libomp`/include"
CFLAGS="-Wall -Wextra -pedantic -fPIC -Ofast -Xpreprocessor -fopenmp -I`brew --prefix libomp`/include"
clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_2_float.o
clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_4_float.o
clang $CFLAGS -DDW_ACCURACY=6 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_6_float.o
Expand All @@ -26,26 +26,26 @@ clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=double -c elastic.c -o elastic_cpu_iso_
clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=double -c elastic.c -o elastic_cpu_iso_4_double.o
clang $CFLAGS -dynamiclib scalar_born_cpu_iso_2_float.o scalar_born_cpu_iso_4_float.o scalar_born_cpu_iso_6_float.o scalar_born_cpu_iso_8_float.o scalar_born_cpu_iso_2_double.o scalar_born_cpu_iso_4_double.o scalar_born_cpu_iso_6_double.o scalar_born_cpu_iso_8_double.o scalar_cpu_iso_2_float.o scalar_cpu_iso_4_float.o scalar_cpu_iso_6_float.o scalar_cpu_iso_8_float.o scalar_cpu_iso_2_double.o scalar_cpu_iso_4_double.o scalar_cpu_iso_6_double.o scalar_cpu_iso_8_double.o elastic_cpu_iso_2_float.o elastic_cpu_iso_4_float.o elastic_cpu_iso_2_double.o elastic_cpu_iso_4_double.o -L. -l$DW_OMP_NAME -rpath @loader_path/ -o libdeepwave_cpu_macos_x86_64.dylib
rm *.o
CFLAGS="-Wall -Wextra -pedantic -fPIC -Ofast -arch arm64"
clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_2_float.o
clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_4_float.o
clang $CFLAGS -DDW_ACCURACY=6 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_6_float.o
clang $CFLAGS -DDW_ACCURACY=8 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_8_float.o
clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=double -c scalar.c -o scalar_cpu_iso_2_double.o
clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=double -c scalar.c -o scalar_cpu_iso_4_double.o
clang $CFLAGS -DDW_ACCURACY=6 -DDW_DTYPE=double -c scalar.c -o scalar_cpu_iso_6_double.o
clang $CFLAGS -DDW_ACCURACY=8 -DDW_DTYPE=double -c scalar.c -o scalar_cpu_iso_8_double.o
clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=float -c scalar_born.c -o scalar_born_cpu_iso_2_float.o
clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=float -c scalar_born.c -o scalar_born_cpu_iso_4_float.o
clang $CFLAGS -DDW_ACCURACY=6 -DDW_DTYPE=float -c scalar_born.c -o scalar_born_cpu_iso_6_float.o
clang $CFLAGS -DDW_ACCURACY=8 -DDW_DTYPE=float -c scalar_born.c -o scalar_born_cpu_iso_8_float.o
clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=double -c scalar_born.c -o scalar_born_cpu_iso_2_double.o
clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=double -c scalar_born.c -o scalar_born_cpu_iso_4_double.o
clang $CFLAGS -DDW_ACCURACY=6 -DDW_DTYPE=double -c scalar_born.c -o scalar_born_cpu_iso_6_double.o
clang $CFLAGS -DDW_ACCURACY=8 -DDW_DTYPE=double -c scalar_born.c -o scalar_born_cpu_iso_8_double.o
clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=float -c elastic.c -o elastic_cpu_iso_2_float.o
clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=float -c elastic.c -o elastic_cpu_iso_4_float.o
clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=double -c elastic.c -o elastic_cpu_iso_2_double.o
clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=double -c elastic.c -o elastic_cpu_iso_4_double.o
clang $CFLAGS -shared scalar_born_cpu_iso_2_float.o scalar_born_cpu_iso_4_float.o scalar_born_cpu_iso_6_float.o scalar_born_cpu_iso_8_float.o scalar_born_cpu_iso_2_double.o scalar_born_cpu_iso_4_double.o scalar_born_cpu_iso_6_double.o scalar_born_cpu_iso_8_double.o scalar_cpu_iso_2_float.o scalar_cpu_iso_4_float.o scalar_cpu_iso_6_float.o scalar_cpu_iso_8_float.o scalar_cpu_iso_2_double.o scalar_cpu_iso_4_double.o scalar_cpu_iso_6_double.o scalar_cpu_iso_8_double.o elastic_cpu_iso_2_float.o elastic_cpu_iso_4_float.o elastic_cpu_iso_2_double.o elastic_cpu_iso_4_double.o -o libdeepwave_cpu_macos_arm64.dylib
rm *.o
#CFLAGS="-Wall -Wextra -pedantic -fPIC -Ofast -arch arm64"
#clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_2_float.o
#clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_4_float.o
#clang $CFLAGS -DDW_ACCURACY=6 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_6_float.o
#clang $CFLAGS -DDW_ACCURACY=8 -DDW_DTYPE=float -c scalar.c -o scalar_cpu_iso_8_float.o
#clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=double -c scalar.c -o scalar_cpu_iso_2_double.o
#clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=double -c scalar.c -o scalar_cpu_iso_4_double.o
#clang $CFLAGS -DDW_ACCURACY=6 -DDW_DTYPE=double -c scalar.c -o scalar_cpu_iso_6_double.o
#clang $CFLAGS -DDW_ACCURACY=8 -DDW_DTYPE=double -c scalar.c -o scalar_cpu_iso_8_double.o
#clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=float -c scalar_born.c -o scalar_born_cpu_iso_2_float.o
#clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=float -c scalar_born.c -o scalar_born_cpu_iso_4_float.o
#clang $CFLAGS -DDW_ACCURACY=6 -DDW_DTYPE=float -c scalar_born.c -o scalar_born_cpu_iso_6_float.o
#clang $CFLAGS -DDW_ACCURACY=8 -DDW_DTYPE=float -c scalar_born.c -o scalar_born_cpu_iso_8_float.o
#clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=double -c scalar_born.c -o scalar_born_cpu_iso_2_double.o
#clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=double -c scalar_born.c -o scalar_born_cpu_iso_4_double.o
#clang $CFLAGS -DDW_ACCURACY=6 -DDW_DTYPE=double -c scalar_born.c -o scalar_born_cpu_iso_6_double.o
#clang $CFLAGS -DDW_ACCURACY=8 -DDW_DTYPE=double -c scalar_born.c -o scalar_born_cpu_iso_8_double.o
#clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=float -c elastic.c -o elastic_cpu_iso_2_float.o
#clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=float -c elastic.c -o elastic_cpu_iso_4_float.o
#clang $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=double -c elastic.c -o elastic_cpu_iso_2_double.o
#clang $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=double -c elastic.c -o elastic_cpu_iso_4_double.o
#clang $CFLAGS -shared scalar_born_cpu_iso_2_float.o scalar_born_cpu_iso_4_float.o scalar_born_cpu_iso_6_float.o scalar_born_cpu_iso_8_float.o scalar_born_cpu_iso_2_double.o scalar_born_cpu_iso_4_double.o scalar_born_cpu_iso_6_double.o scalar_born_cpu_iso_8_double.o scalar_cpu_iso_2_float.o scalar_cpu_iso_4_float.o scalar_cpu_iso_6_float.o scalar_cpu_iso_8_float.o scalar_cpu_iso_2_double.o scalar_cpu_iso_4_double.o scalar_cpu_iso_6_double.o scalar_cpu_iso_8_double.o elastic_cpu_iso_2_float.o elastic_cpu_iso_4_float.o elastic_cpu_iso_2_double.o elastic_cpu_iso_4_double.o -o libdeepwave_cpu_macos_arm64.dylib
#rm *.o
4 changes: 2 additions & 2 deletions src/deepwave/build_windows.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

set -e

CFLAGS="-Wall -O2 -fp:fast -arch:AVX2"
CFLAGS="-Wall -O2 -fp:fast -arch:AVX2 -openmp"
CUDAFLAGS="--restrict --use_fast_math -O3 -gencode=arch=compute_52,code=sm_52, -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80"
cl $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=float -c scalar.c -Foscalar_cpu_iso_2_float.obj
cl $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=float -c scalar.c -Foscalar_cpu_iso_4_float.obj
Expand All @@ -24,7 +24,7 @@ cl $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=float -c elastic.c -Foelastic_cpu_iso_2_fl
cl $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=float -c elastic.c -Foelastic_cpu_iso_4_float.obj
cl $CFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=double -c elastic.c -Foelastic_cpu_iso_2_double.obj
cl $CFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=double -c elastic.c -Foelastic_cpu_iso_4_double.obj
cl $CFLAGS -LD scalar_born_cpu_iso_2_float.obj scalar_born_cpu_iso_4_float.obj scalar_born_cpu_iso_6_float.obj scalar_born_cpu_iso_8_float.obj scalar_born_cpu_iso_2_double.obj scalar_born_cpu_iso_4_double.obj scalar_born_cpu_iso_6_double.obj scalar_born_cpu_iso_8_double.obj scalar_cpu_iso_2_float.obj scalar_cpu_iso_4_float.obj scalar_cpu_iso_6_float.obj scalar_cpu_iso_8_float.obj scalar_cpu_iso_2_double.obj scalar_cpu_iso_4_double.obj scalar_cpu_iso_6_double.obj scalar_cpu_iso_8_double.obj elastic_cpu_iso_2_float.obj elastic_cpu_iso_4_float.obj elastic_cpu_iso_2_double.obj elastic_cpu_iso_4_double.obj -Felibdeepwave_cpu_windows_x86_64.dll
cl $CFLAGS -LD scalar_born_cpu_iso_2_float.obj scalar_born_cpu_iso_4_float.obj scalar_born_cpu_iso_6_float.obj scalar_born_cpu_iso_8_float.obj scalar_born_cpu_iso_2_double.obj scalar_born_cpu_iso_4_double.obj scalar_born_cpu_iso_6_double.obj scalar_born_cpu_iso_8_double.obj scalar_cpu_iso_2_float.obj scalar_cpu_iso_4_float.obj scalar_cpu_iso_6_float.obj scalar_cpu_iso_8_float.obj scalar_cpu_iso_2_double.obj scalar_cpu_iso_4_double.obj scalar_cpu_iso_6_double.obj scalar_cpu_iso_8_double.obj elastic_cpu_iso_2_float.obj elastic_cpu_iso_4_float.obj elastic_cpu_iso_2_double.obj elastic_cpu_iso_4_double.obj -nodefaultlib:vcomp libiomp5md.lib -Felibdeepwave_cpu_windows_x86_64.dll
nvcc $CUDAFLAGS -DDW_ACCURACY=2 -DDW_DTYPE=float -c scalar.cu -o scalar_cuda_iso_2_float.obj
nvcc $CUDAFLAGS -DDW_ACCURACY=4 -DDW_DTYPE=float -c scalar.cu -o scalar_cuda_iso_4_float.obj
nvcc $CUDAFLAGS -DDW_ACCURACY=6 -DDW_DTYPE=float -c scalar.cu -o scalar_cuda_iso_6_float.obj
Expand Down
6 changes: 0 additions & 6 deletions src/deepwave/common_cpu.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
#ifndef DW_COMMON_CPU_H
#define DW_COMMON_CPU_H

//#ifdef DW_USE_OPENMP
//int dw_use_openmp = 1;
//#else
//int dw_use_openmp = 0;
//#endif /* DW_USE_OPENMP */

static void add_sources(DW_DTYPE *__restrict const wf,
DW_DTYPE const *__restrict const f,
int64_t const *__restrict const sources_i,
Expand Down

0 comments on commit be15287

Please sign in to comment.