Skip to content

Commit 5452b7f

Browse files
committed
Running the model with Vision works now in beta 3
1 parent 6f9ebe2 commit 5452b7f

1 file changed

Lines changed: 32 additions & 24 deletions

File tree

TinyYOLO-CoreML/TinyYOLO-CoreML/ViewController.swift

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,11 @@ class ViewController: UIViewController {
7575
}
7676

7777
request = VNCoreMLRequest(model: visionModel, completionHandler: visionRequestDidComplete)
78-
request.imageCropAndScaleOption = .centerCrop
78+
79+
// NOTE: If you choose another crop/scale option, then you must also
80+
// change how the BoundingBox objects get scaled when they are drawn.
81+
// Currently they assume the full input image is used.
82+
request.imageCropAndScaleOption = .scaleFill
7983
}
8084

8185
func setUpCamera() {
@@ -119,7 +123,7 @@ class ViewController: UIViewController {
119123
// MARK: - Doing inference
120124

121125
func predict(image: UIImage) {
122-
if let pixelBuffer = image.pixelBuffer(width: 416, height: 416) {
126+
if let pixelBuffer = image.pixelBuffer(width: YOLO.inputWidth, height: YOLO.inputHeight) {
123127
predict(pixelBuffer: pixelBuffer)
124128
}
125129
}
@@ -145,34 +149,38 @@ class ViewController: UIViewController {
145149
// Resize the input to 416x416 and give it to our model.
146150
if let boundingBoxes = try? yolo.predict(image: resizedPixelBuffer) {
147151
let elapsed = CACurrentMediaTime() - startTime
148-
149-
DispatchQueue.main.async {
150-
// For debugging, to make sure the resized CVPixelBuffer is correct.
151-
//var debugImage: CGImage?
152-
//VTCreateCGImageFromCVPixelBuffer(resizedPixelBuffer, nil, &debugImage)
153-
//self.debugImageView.image = UIImage(cgImage: debugImage!)
154-
155-
self.show(predictions: boundingBoxes)
156-
self.timeLabel.text = String(format: "Elapsed %.5f seconds (%.2f FPS)", elapsed, 1/elapsed)
157-
}
152+
showOnMainThread(boundingBoxes, elapsed)
158153
}
159154
}
160155

161156
func predictUsingVision(pixelBuffer: CVPixelBuffer) {
162-
// TODO: This ought to work but the request contains no results!
163-
// For some reason Vision does not understand it needs to return
164-
// a VNCoreMLFeatureValueObservation object.
157+
// Measure how long it takes to predict a single video frame.
158+
startTime = CACurrentMediaTime()
159+
160+
// Vision will automatically resize the input image.
165161
let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer)
166162
try? handler.perform([request])
167163
}
168164

169165
func visionRequestDidComplete(request: VNRequest, error: Error?) {
170-
if let observations = request.results as? [VNCoreMLFeatureValueObservation] {
171-
print(observations.count)
166+
if let observations = request.results as? [VNCoreMLFeatureValueObservation],
167+
let features = observations.first?.featureValue.multiArrayValue {
172168

173-
DispatchQueue.main.async {
174-
// TODO: show the results
175-
}
169+
let boundingBoxes = yolo.computeBoundingBoxes(features: features)
170+
let elapsed = CACurrentMediaTime() - startTime
171+
showOnMainThread(boundingBoxes, elapsed)
172+
}
173+
}
174+
175+
func showOnMainThread(_ boundingBoxes: [YOLO.Prediction], _ elapsed: CFTimeInterval) {
176+
DispatchQueue.main.async {
177+
// For debugging, to make sure the resized CVPixelBuffer is correct.
178+
//var debugImage: CGImage?
179+
//VTCreateCGImageFromCVPixelBuffer(resizedPixelBuffer, nil, &debugImage)
180+
//self.debugImageView.image = UIImage(cgImage: debugImage!)
181+
182+
self.show(predictions: boundingBoxes)
183+
self.timeLabel.text = String(format: "Elapsed %.5f seconds (%.2f FPS)", elapsed, 1/elapsed)
176184
}
177185
}
178186

@@ -188,8 +196,8 @@ class ViewController: UIViewController {
188196
// and bottom.
189197
let width = view.bounds.width
190198
let height = width * 4 / 3
191-
let scaleX = width / 416
192-
let scaleY = height / 416
199+
let scaleX = width / CGFloat(YOLO.inputWidth)
200+
let scaleY = height / CGFloat(YOLO.inputHeight)
193201
let top = (view.bounds.height - height) / 2
194202

195203
// Translate and scale the rectangle to our own coordinate system.
@@ -218,8 +226,8 @@ extension ViewController: VideoCaptureDelegate {
218226

219227
// Perform the prediction on VideoCapture's queue.
220228
if let pixelBuffer = pixelBuffer {
221-
predict(pixelBuffer: pixelBuffer)
222-
//predictUsingVision(pixelBuffer: pixelBuffer)
229+
//predict(pixelBuffer: pixelBuffer)
230+
predictUsingVision(pixelBuffer: pixelBuffer)
223231
}
224232
}
225233
}

0 commit comments

Comments
 (0)