-
Notifications
You must be signed in to change notification settings - Fork 1
/
conv_kernmul.go
45 lines (36 loc) · 1.76 KB
/
conv_kernmul.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
package cuda
// Kernel multiplication for purely real kernel, symmetric around Y axis (apart from first row).
// Launch configs range over all complex elements of fft input. This could be optimized: range only over kernel.
import (
"github.com/mumax/3/data"
"github.com/mumax/3/util"
)
// kernel multiplication for 3D demag convolution, exploiting full kernel symmetry.
func kernMulRSymm3D_async(fftM [3]*data.Slice, Kxx, Kyy, Kzz, Kyz, Kxz, Kxy *data.Slice, Nx, Ny, Nz int) {
util.Argument(fftM[X].NComp() == 1 && Kxx.NComp() == 1)
cfg := make3DConf([3]int{Nx, Ny, Nz})
k_kernmulRSymm3D_async(fftM[X].DevPtr(0), fftM[Y].DevPtr(0), fftM[Z].DevPtr(0),
Kxx.DevPtr(0), Kyy.DevPtr(0), Kzz.DevPtr(0), Kyz.DevPtr(0), Kxz.DevPtr(0), Kxy.DevPtr(0),
Nx, Ny, Nz, cfg)
}
// kernel multiplication for 2D demag convolution on X and Y, exploiting full kernel symmetry.
func kernMulRSymm2Dxy_async(fftMx, fftMy, Kxx, Kyy, Kxy *data.Slice, Nx, Ny int) {
util.Argument(fftMy.NComp() == 1 && Kxx.NComp() == 1)
cfg := make3DConf([3]int{Nx, Ny, 1})
k_kernmulRSymm2Dxy_async(fftMx.DevPtr(0), fftMy.DevPtr(0),
Kxx.DevPtr(0), Kyy.DevPtr(0), Kxy.DevPtr(0),
Nx, Ny, cfg)
}
// kernel multiplication for 2D demag convolution on Z, exploiting full kernel symmetry.
func kernMulRSymm2Dz_async(fftMz, Kzz *data.Slice, Nx, Ny int) {
util.Argument(fftMz.NComp() == 1 && Kzz.NComp() == 1)
cfg := make3DConf([3]int{Nx, Ny, 1})
k_kernmulRSymm2Dz_async(fftMz.DevPtr(0), Kzz.DevPtr(0), Nx, Ny, cfg)
}
// kernel multiplication for general 1D convolution. Does not assume any symmetry.
// Used for MFM images.
func kernMulC_async(fftM, K *data.Slice, Nx, Ny int) {
util.Argument(fftM.NComp() == 1 && K.NComp() == 1)
cfg := make3DConf([3]int{Nx, Ny, 1})
k_kernmulC_async(fftM.DevPtr(0), K.DevPtr(0), Nx, Ny, cfg)
}