-
Notifications
You must be signed in to change notification settings - Fork 1
/
nn.go
158 lines (140 loc) · 4.82 KB
/
nn.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
package nn
import (
"bufio"
"encoding/json"
"os"
"strings"
"unsafe"
)
// Package nn is a Neural Network interface layer
// To load a model, use the nnload package.
// Results of an NN object detection run
type DetectionResult struct {
CameraID int64 `json:"cameraID"`
ImageWidth int `json:"imageWidth"`
ImageHeight int `json:"imageHeight"`
Objects []ObjectDetection `json:"objects"`
}
// NN object detection parameters
type DetectionParams struct {
ProbabilityThreshold float32 // Value between 0 and 1. Lower values will find more objects. Zero value will use the default.
NmsThreshold float32 // Value between 0 and 1. Lower values will merge more objects together into one. Zero value will use the default.
Unclipped bool // If true, don't clip boxes to the neural network boundaries
}
// Create a default DetectionParams object
func NewDetectionParams() *DetectionParams {
return &DetectionParams{
ProbabilityThreshold: 0.5,
NmsThreshold: 0.45,
Unclipped: false,
}
}
// ImageCrop is a crop of an image.
// In C we would represent this as a pointer and a stride, but since that's not memory safe,
// we must resort to this kind of thing. Once we get into the C world for NN inference, then
// we can use strides etc.
type ImageCrop struct {
NChan int // Number of channels (eg 3 for RGB)
Pixels []byte // The whole image
ImageWidth int // The width of the original image, held in Pixels
ImageHeight int // The height of the original image, held in Pixels
CropX int // Origin of crop X
CropY int // Origin of crop Y
CropWidth int // The width of this crop
CropHeight int // The height of this crop
}
// Return a pointer to the start of the crop
func (c ImageCrop) Pointer() unsafe.Pointer {
ptr := unsafe.Pointer(&c.Pixels[0])
ptr = unsafe.Add(ptr, (c.CropY*c.ImageWidth+c.CropX)*c.NChan)
return ptr
}
func (c ImageCrop) Stride() int {
return c.ImageWidth * c.NChan
}
// Return a crop of the crop (new crop is relative to existing).
// If any parameter is out of bounds, we panic
func (c ImageCrop) Crop(x1, y1, x2, y2 int) ImageCrop {
nc := ImageCrop{
NChan: c.NChan,
Pixels: c.Pixels,
ImageWidth: c.ImageWidth,
ImageHeight: c.ImageHeight,
CropX: c.CropX + x1,
CropY: c.CropY + y1,
CropWidth: x2 - x1,
CropHeight: y2 - y1,
}
if nc.CropX < 0 || nc.CropY < 0 || nc.CropWidth < 0 || nc.CropHeight < 0 || nc.CropX+nc.CropWidth > c.ImageWidth || nc.CropY+nc.CropHeight > c.ImageHeight {
panic("Crop out of bounds")
}
return nc
}
// Return a 'crop' of the entire image
func WholeImage(nchan int, pixels []byte, width, height int) ImageCrop {
return ImageCrop{
NChan: nchan,
Pixels: pixels,
ImageWidth: width,
ImageHeight: height,
CropX: 0,
CropY: 0,
CropWidth: width,
CropHeight: height,
}
}
type ThreadingMode int
const (
ThreadingModeSingle ThreadingMode = iota // Force the NN library to run inference on a single thread
ThreadingModeParallel // Allow the NN library to run multiple threads while executing a model
)
// ObjectDetector is given an image, and returns zero or more detected objects
type ObjectDetector interface {
// Close closes the detector (you MUST call this when finished, because it's a C++ object underneath)
Close()
// DetectObjects returns a list of objects detected in the image
// nchan is expected to be 3, and image is a 24-bit RGB image.
// You can create a default DetectionParams with NewDetectionParams()
DetectObjects(img ImageCrop, params *DetectionParams) ([]ObjectDetection, error)
// Model Config.
// Callers assume that ModelConfig will remain constant, so don't change it
// once the detector has been created.
Config() *ModelConfig
}
// ModelConfig is saved in a JSON file along with the weights of the NN model
type ModelConfig struct {
Architecture string `json:"architecture"` // eg "yolov8"
Width int `json:"width"` // eg 320
Height int `json:"height"` // eg 256
Classes []string `json:"classes"` // eg ["person", "bicycle", "car", ...]
}
// Load model config from a JSON file
func LoadModelConfig(filename string) (*ModelConfig, error) {
b, err := os.ReadFile(filename)
if err != nil {
return nil, err
}
config := &ModelConfig{}
err = json.Unmarshal(b, config)
if err != nil {
return nil, err
}
return config, nil
}
// Load a text file with class names on each line
func LoadClassFile(filename string) ([]string, error) {
f, err := os.Open(filename)
if err != nil {
return nil, err
}
defer f.Close()
classes := []string{}
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line != "" {
classes = append(classes, line)
}
}
return classes, nil
}