@@ -75,7 +75,11 @@ class ViewController: UIViewController {
7575 }
7676
7777 request = VNCoreMLRequest ( model: visionModel, completionHandler: visionRequestDidComplete)
78- request. imageCropAndScaleOption = . centerCrop
78+
79+ // NOTE: If you choose another crop/scale option, then you must also
80+ // change how the BoundingBox objects get scaled when they are drawn.
81+ // Currently they assume the full input image is used.
82+ request. imageCropAndScaleOption = . scaleFill
7983 }
8084
8185 func setUpCamera( ) {
@@ -119,7 +123,7 @@ class ViewController: UIViewController {
119123 // MARK: - Doing inference
120124
121125 func predict( image: UIImage ) {
122- if let pixelBuffer = image. pixelBuffer ( width: 416 , height: 416 ) {
126+ if let pixelBuffer = image. pixelBuffer ( width: YOLO . inputWidth , height: YOLO . inputHeight ) {
123127 predict ( pixelBuffer: pixelBuffer)
124128 }
125129 }
@@ -145,34 +149,38 @@ class ViewController: UIViewController {
145149 // Resize the input to 416x416 and give it to our model.
146150 if let boundingBoxes = try ? yolo. predict ( image: resizedPixelBuffer) {
147151 let elapsed = CACurrentMediaTime ( ) - startTime
148-
149- DispatchQueue . main. async {
150- // For debugging, to make sure the resized CVPixelBuffer is correct.
151- //var debugImage: CGImage?
152- //VTCreateCGImageFromCVPixelBuffer(resizedPixelBuffer, nil, &debugImage)
153- //self.debugImageView.image = UIImage(cgImage: debugImage!)
154-
155- self . show ( predictions: boundingBoxes)
156- self . timeLabel. text = String ( format: " Elapsed %.5f seconds (%.2f FPS) " , elapsed, 1 / elapsed)
157- }
152+ showOnMainThread ( boundingBoxes, elapsed)
158153 }
159154 }
160155
161156 func predictUsingVision( pixelBuffer: CVPixelBuffer ) {
162- // TODO: This ought to work but the request contains no results!
163- // For some reason Vision does not understand it needs to return
164- // a VNCoreMLFeatureValueObservation object.
157+ // Measure how long it takes to predict a single video frame.
158+ startTime = CACurrentMediaTime ( )
159+
160+ // Vision will automatically resize the input image.
165161 let handler = VNImageRequestHandler ( cvPixelBuffer: pixelBuffer)
166162 try ? handler. perform ( [ request] )
167163 }
168164
169165 func visionRequestDidComplete( request: VNRequest , error: Error ? ) {
170- if let observations = request. results as? [ VNCoreMLFeatureValueObservation ] {
171- print ( observations. count )
166+ if let observations = request. results as? [ VNCoreMLFeatureValueObservation ] ,
167+ let features = observations. first ? . featureValue . multiArrayValue {
172168
173- DispatchQueue . main. async {
174- // TODO: show the results
175- }
169+ let boundingBoxes = yolo. computeBoundingBoxes ( features: features)
170+ let elapsed = CACurrentMediaTime ( ) - startTime
171+ showOnMainThread ( boundingBoxes, elapsed)
172+ }
173+ }
174+
175+ func showOnMainThread( _ boundingBoxes: [ YOLO . Prediction ] , _ elapsed: CFTimeInterval ) {
176+ DispatchQueue . main. async {
177+ // For debugging, to make sure the resized CVPixelBuffer is correct.
178+ //var debugImage: CGImage?
179+ //VTCreateCGImageFromCVPixelBuffer(resizedPixelBuffer, nil, &debugImage)
180+ //self.debugImageView.image = UIImage(cgImage: debugImage!)
181+
182+ self . show ( predictions: boundingBoxes)
183+ self . timeLabel. text = String ( format: " Elapsed %.5f seconds (%.2f FPS) " , elapsed, 1 / elapsed)
176184 }
177185 }
178186
@@ -188,8 +196,8 @@ class ViewController: UIViewController {
188196 // and bottom.
189197 let width = view. bounds. width
190198 let height = width * 4 / 3
191- let scaleX = width / 416
192- let scaleY = height / 416
199+ let scaleX = width / CGFloat ( YOLO . inputWidth )
200+ let scaleY = height / CGFloat ( YOLO . inputHeight )
193201 let top = ( view. bounds. height - height) / 2
194202
195203 // Translate and scale the rectangle to our own coordinate system.
@@ -218,8 +226,8 @@ extension ViewController: VideoCaptureDelegate {
218226
219227 // Perform the prediction on VideoCapture's queue.
220228 if let pixelBuffer = pixelBuffer {
221- predict ( pixelBuffer: pixelBuffer)
222- // predictUsingVision(pixelBuffer: pixelBuffer)
229+ // predict(pixelBuffer: pixelBuffer)
230+ predictUsingVision ( pixelBuffer: pixelBuffer)
223231 }
224232 }
225233}
0 commit comments