/
cconv.go
110 lines (96 loc) · 2.78 KB
/
cconv.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package conv2
// The flags chosen below optimize for the following platforms:
// - amd64: Compiler host
// - arm: Raspberry Pi Zero (W)
// - arm64: Raspberry Pi 4
//
// To show the flags which -march=native would produce, run
//
// gcc -march=native -E -v - </dev/null 2>&1 | grep cc1
//
// For more details, see
// - https://gist.github.com/fm4dd/c663217935dc17f0fc73c9c81b0aa845
// - https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html
// - https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
// - https://gcc.gnu.org/onlinedocs/gcc/AArch64-Options.html
// #cgo CFLAGS: -Wall -Werror -Wextra -pedantic -std=c99
// #cgo CFLAGS: -O2
//
// #cgo amd64 CFLAGS: -march=native
// #cgo amd64 CFLAGS: -fopenmp
// #cgo amd64 LDFLAGS: -fopenmp
//
// #cgo arm CFLAGS: -mfloat-abi=hard -mfpu=vfp -marm -march=armv6kz+fp
//
// #cgo arm64 CFLAGS: -mcpu=cortex-a72 -mtune=cortex-a72
// #cgo arm64 CFLAGS: -fopenmp
// #cgo arm64 LDFLAGS: -fopenmp
//
// #include "cconv.h"
import "C"
import (
"gonum.org/v1/gonum/mat"
)
// FullFillC returns the 2-dimensional convolution of f and g.
//
// Implemented in C (cgo).
// It is equivalent to scipy.signal.convolve2d(f, g, mode="full", boundary="fill", fillvalue=0).
// See https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.convolve2d.html.
func FullFillC(f, g *mat.Dense) *mat.Dense {
dy, dx := f.Dims()
dv, du := g.Dims()
n, m := dx+du-1, dy+dv-1
out := mat.NewDense(m, n, nil)
fd, fs := f.RawMatrix().Data, f.RawMatrix().Stride
gd, gs := g.RawMatrix().Data, g.RawMatrix().Stride
outd, outs := out.RawMatrix().Data, out.RawMatrix().Stride
C.FullFillC(
C.int(dy),
C.int(dx),
C.int(dv),
C.int(du),
C.int(n),
C.int(m),
C.int(fs),
C.int(gs),
C.int(outs),
(*C.float64)(&fd[0]),
(*C.float64)(&gd[0]),
(*C.float64)(&outd[0]),
)
return out
}
// ValidFillC returns the 2-dimensional convolution of f and g.
//
// Implemented in C (cgo).
// It is equivalent to scipy.signal.convolve2d(f, g, mode="valid", boundary="fill", fillvalue=0).
// See https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.convolve2d.html.
func ValidFillC(f, g *mat.Dense) *mat.Dense {
// make sure f is always larger than g
if g.RawMatrix().Cols > f.RawMatrix().Cols {
f, g = g, f
}
if g.RawMatrix().Rows > f.RawMatrix().Rows {
panic("none of the inputs is at least as large as the other in every dimension")
}
dy, dx := f.Dims()
dv, du := g.Dims()
n, m := dx-du+1, dy-dv+1
out := mat.NewDense(m, n, nil)
fd, fs := f.RawMatrix().Data, f.RawMatrix().Stride
gd, gs := g.RawMatrix().Data, g.RawMatrix().Stride
outd, outs := out.RawMatrix().Data, out.RawMatrix().Stride
C.ValidFillC(
C.int(dv),
C.int(du),
C.int(n),
C.int(m),
C.int(fs),
C.int(gs),
C.int(outs),
(*C.float64)(&fd[0]),
(*C.float64)(&gd[0]),
(*C.float64)(&outd[0]),
)
return out
}