@@ -44,18 +44,41 @@ class YOLO {
4444 // values for each grid cell, i.e. 125 channels. The total features array
4545 // contains 125x13x13 elements.
4646
47+ // NOTE: It turns out that accessing the elements in the multi-array as
48+ // `features[[channel, cy, cx] as [NSNumber]].floatValue` is kinda slow.
49+ // It's much faster to use direct memory access to the features.
50+ let featurePointer = UnsafeMutablePointer < Double > ( OpaquePointer ( features. dataPointer) )
51+ let channelStride = features. strides [ 0 ] . intValue
52+ let yStride = features. strides [ 1 ] . intValue
53+ let xStride = features. strides [ 2 ] . intValue
54+
55+ func offset( _ channel: Int , _ x: Int , _ y: Int ) -> Int {
56+ return channel*channelStride + y*yStride + x*xStride
57+ }
58+
4759 for cy in 0 ..< gridHeight {
4860 for cx in 0 ..< gridWidth {
4961 for b in 0 ..< boxesPerCell {
5062
5163 // For the first bounding box (b=0) we have to read channels 0-24,
5264 // for b=1 we have to read channels 25-49, and so on.
5365 let channel = b* ( numClasses + 5 )
66+
67+ // The slow way:
68+ /*
5469 let tx = features[[channel , cy, cx] as [NSNumber]].floatValue
5570 let ty = features[[channel + 1, cy, cx] as [NSNumber]].floatValue
5671 let tw = features[[channel + 2, cy, cx] as [NSNumber]].floatValue
5772 let th = features[[channel + 3, cy, cx] as [NSNumber]].floatValue
5873 let tc = features[[channel + 4, cy, cx] as [NSNumber]].floatValue
74+ */
75+
76+ // The fast way:
77+ let tx = Float ( featurePointer [ offset ( channel , cx, cy) ] )
78+ let ty = Float ( featurePointer [ offset ( channel + 1 , cx, cy) ] )
79+ let tw = Float ( featurePointer [ offset ( channel + 2 , cx, cy) ] )
80+ let th = Float ( featurePointer [ offset ( channel + 3 , cx, cy) ] )
81+ let tc = Float ( featurePointer [ offset ( channel + 4 , cx, cy) ] )
5982
6083 // The predicted tx and ty coordinates are relative to the location
6184 // of the grid cell; we use the logistic sigmoid to constrain these
@@ -80,7 +103,11 @@ class YOLO {
80103 // so we can interpret these numbers as percentages.
81104 var classes = [ Float] ( repeating: 0 , count: numClasses)
82105 for c in 0 ..< numClasses {
83- classes [ c] = features [ [ channel + 5 + c, cy, cx] as [ NSNumber ] ] . floatValue
106+ // The slow way:
107+ //classes[c] = features[[channel + 5 + c, cy, cx] as [NSNumber]].floatValue
108+
109+ // The fast way:
110+ classes [ c] = Float ( featurePointer [ offset ( channel + 5 + c, cx, cy) ] )
84111 }
85112 classes = softmax ( classes)
86113
0 commit comments