Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
coreml-survival-guide/MobileNetV2+SSDLite/ObjectDetection/ObjectDetection/ViewController.swift
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
177 lines (144 sloc)
5.7 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import CoreMedia | |
import CoreML | |
import UIKit | |
import Vision | |
class ViewController: UIViewController { | |
@IBOutlet var videoPreview: UIView! | |
var videoCapture: VideoCapture! | |
var currentBuffer: CVPixelBuffer? | |
let coreMLModel = MobileNetV2_SSDLite() | |
lazy var visionModel: VNCoreMLModel = { | |
do { | |
return try VNCoreMLModel(for: coreMLModel.model) | |
} catch { | |
fatalError("Failed to create VNCoreMLModel: \(error)") | |
} | |
}() | |
lazy var visionRequest: VNCoreMLRequest = { | |
let request = VNCoreMLRequest(model: visionModel, completionHandler: { | |
[weak self] request, error in | |
self?.processObservations(for: request, error: error) | |
}) | |
// NOTE: If you use another crop/scale option, you must also change | |
// how the BoundingBoxView objects get scaled when they are drawn. | |
// Currently they assume the full input image is used. | |
request.imageCropAndScaleOption = .scaleFill | |
return request | |
}() | |
let maxBoundingBoxViews = 10 | |
var boundingBoxViews = [BoundingBoxView]() | |
var colors: [String: UIColor] = [:] | |
override func viewDidLoad() { | |
super.viewDidLoad() | |
setUpBoundingBoxViews() | |
setUpCamera() | |
} | |
func setUpBoundingBoxViews() { | |
for _ in 0..<maxBoundingBoxViews { | |
boundingBoxViews.append(BoundingBoxView()) | |
} | |
// The label names are stored inside the MLModel's metadata. | |
guard let userDefined = coreMLModel.model.modelDescription.metadata[MLModelMetadataKey.creatorDefinedKey] as? [String: String], | |
let allLabels = userDefined["classes"] else { | |
fatalError("Missing metadata") | |
} | |
let labels = allLabels.components(separatedBy: ",") | |
// Assign random colors to the classes. | |
for label in labels { | |
colors[label] = UIColor(red: CGFloat.random(in: 0...1), | |
green: CGFloat.random(in: 0...1), | |
blue: CGFloat.random(in: 0...1), | |
alpha: 1) | |
} | |
} | |
func setUpCamera() { | |
videoCapture = VideoCapture() | |
videoCapture.delegate = self | |
videoCapture.setUp(sessionPreset: .hd1280x720) { success in | |
if success { | |
// Add the video preview into the UI. | |
if let previewLayer = self.videoCapture.previewLayer { | |
self.videoPreview.layer.addSublayer(previewLayer) | |
self.resizePreviewLayer() | |
} | |
// Add the bounding box layers to the UI, on top of the video preview. | |
for box in self.boundingBoxViews { | |
box.addToLayer(self.videoPreview.layer) | |
} | |
// Once everything is set up, we can start capturing live video. | |
self.videoCapture.start() | |
} | |
} | |
} | |
override func viewWillLayoutSubviews() { | |
super.viewWillLayoutSubviews() | |
resizePreviewLayer() | |
} | |
func resizePreviewLayer() { | |
videoCapture.previewLayer?.frame = videoPreview.bounds | |
} | |
func predict(sampleBuffer: CMSampleBuffer) { | |
if currentBuffer == nil, let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) { | |
currentBuffer = pixelBuffer | |
// Get additional info from the camera. | |
var options: [VNImageOption : Any] = [:] | |
if let cameraIntrinsicMatrix = CMGetAttachment(sampleBuffer, key: kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, attachmentModeOut: nil) { | |
options[.cameraIntrinsics] = cameraIntrinsicMatrix | |
} | |
let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: .up, options: options) | |
do { | |
try handler.perform([self.visionRequest]) | |
} catch { | |
print("Failed to perform Vision request: \(error)") | |
} | |
currentBuffer = nil | |
} | |
} | |
func processObservations(for request: VNRequest, error: Error?) { | |
DispatchQueue.main.async { | |
if let results = request.results as? [VNRecognizedObjectObservation] { | |
self.show(predictions: results) | |
} else { | |
self.show(predictions: []) | |
} | |
} | |
} | |
func show(predictions: [VNRecognizedObjectObservation]) { | |
for i in 0..<boundingBoxViews.count { | |
if i < predictions.count { | |
let prediction = predictions[i] | |
/* | |
The predicted bounding box is in normalized image coordinates, with | |
the origin in the lower-left corner. | |
Scale the bounding box to the coordinate system of the video preview, | |
which is as wide as the screen and has a 16:9 aspect ratio. The video | |
preview also may be letterboxed at the top and bottom. | |
Based on code from https://github.com/Willjay90/AppleFaceDetection | |
NOTE: If you use a different .imageCropAndScaleOption, or a different | |
video resolution, then you also need to change the math here! | |
*/ | |
let width = view.bounds.width | |
let height = width * 16 / 9 | |
let offsetY = (view.bounds.height - height) / 2 | |
let scale = CGAffineTransform.identity.scaledBy(x: width, y: height) | |
let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -height - offsetY) | |
let rect = prediction.boundingBox.applying(scale).applying(transform) | |
// The labels array is a list of VNClassificationObservation objects, | |
// with the highest scoring class first in the list. | |
let bestClass = prediction.labels[0].identifier | |
let confidence = prediction.labels[0].confidence | |
// Show the bounding box. | |
let label = String(format: "%@ %.1f", bestClass, confidence * 100) | |
let color = colors[bestClass] ?? UIColor.red | |
boundingBoxViews[i].show(frame: rect, label: label, color: color) | |
} else { | |
boundingBoxViews[i].hide() | |
} | |
} | |
} | |
} | |
extension ViewController: VideoCaptureDelegate { | |
func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame sampleBuffer: CMSampleBuffer) { | |
predict(sampleBuffer: sampleBuffer) | |
} | |
} |