Skip to content

Commit a7db77b

Browse files
committed
Access features through direct memory access for speed
1 parent 5452b7f commit a7db77b

File tree

1 file changed

+28
-1
lines changed

1 file changed

+28
-1
lines changed

TinyYOLO-CoreML/TinyYOLO-CoreML/YOLO.swift

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,41 @@ class YOLO {
4444
// values for each grid cell, i.e. 125 channels. The total features array
4545
// contains 125x13x13 elements.
4646

47+
// NOTE: It turns out that accessing the elements in the multi-array as
48+
// `features[[channel, cy, cx] as [NSNumber]].floatValue` is kinda slow.
49+
// It's much faster to use direct memory access to the features.
50+
let featurePointer = UnsafeMutablePointer<Double>(OpaquePointer(features.dataPointer))
51+
let channelStride = features.strides[0].intValue
52+
let yStride = features.strides[1].intValue
53+
let xStride = features.strides[2].intValue
54+
55+
func offset(_ channel: Int, _ x: Int, _ y: Int) -> Int {
56+
return channel*channelStride + y*yStride + x*xStride
57+
}
58+
4759
for cy in 0..<gridHeight {
4860
for cx in 0..<gridWidth {
4961
for b in 0..<boxesPerCell {
5062

5163
// For the first bounding box (b=0) we have to read channels 0-24,
5264
// for b=1 we have to read channels 25-49, and so on.
5365
let channel = b*(numClasses + 5)
66+
67+
// The slow way:
68+
/*
5469
let tx = features[[channel , cy, cx] as [NSNumber]].floatValue
5570
let ty = features[[channel + 1, cy, cx] as [NSNumber]].floatValue
5671
let tw = features[[channel + 2, cy, cx] as [NSNumber]].floatValue
5772
let th = features[[channel + 3, cy, cx] as [NSNumber]].floatValue
5873
let tc = features[[channel + 4, cy, cx] as [NSNumber]].floatValue
74+
*/
75+
76+
// The fast way:
77+
let tx = Float(featurePointer[offset(channel , cx, cy)])
78+
let ty = Float(featurePointer[offset(channel + 1, cx, cy)])
79+
let tw = Float(featurePointer[offset(channel + 2, cx, cy)])
80+
let th = Float(featurePointer[offset(channel + 3, cx, cy)])
81+
let tc = Float(featurePointer[offset(channel + 4, cx, cy)])
5982

6083
// The predicted tx and ty coordinates are relative to the location
6184
// of the grid cell; we use the logistic sigmoid to constrain these
@@ -80,7 +103,11 @@ class YOLO {
80103
// so we can interpret these numbers as percentages.
81104
var classes = [Float](repeating: 0, count: numClasses)
82105
for c in 0..<numClasses {
83-
classes[c] = features[[channel + 5 + c, cy, cx] as [NSNumber]].floatValue
106+
// The slow way:
107+
//classes[c] = features[[channel + 5 + c, cy, cx] as [NSNumber]].floatValue
108+
109+
// The fast way:
110+
classes[c] = Float(featurePointer[offset(channel + 5 + c, cx, cy)])
84111
}
85112
classes = softmax(classes)
86113

0 commit comments

Comments
 (0)