Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
325 lines (230 sloc) 13.5 KB
//
// ViewController.swift
// SwiftMetalForOSX
//
// Created by Amund Tveit on 10/06/15.
// Copyright © 2015 Amund Tveit. All rights reserved.
//
import Cocoa
import Metal
@available(OSX 10.11, *)
class ViewController: MetalViewController {
override func viewDidLoad() {
super.viewDidLoad()
let start = NSDate()
// Create input and output vectors, and corresponding metal buffer
let N = 32000000
//let in1 = createInputVector(N)
//let in2 = createInputVector(N)
//let out1 = multWithMetal(in1, in2: in1)
//print("input = \(inputVector)")
//print("output = \(outputVector)")
let M = N/2
//let in1f2 = createVector2Array(M)
//let in2f2 = createVector2Array(M)
//let out2 = multWithMetalF2(in1f2, in2: in1f2)
let K = N/4
//let in1f4 = createVector4Array(K)
//let in2f4 = createVector4Array(K)
//let out4 = multWithMetalF4(in1f4, in2: in1f4)
let L = N/16
let in1f16 = createVector16Array(L)
// let in2f16 = createVector4x4Array(L)
let out16 = multWithMetalF16(in1f16, in2: in1f16)
print("Time to run entire job: \(NSDate().timeIntervalSinceDate(start))")
exit(0)
}
func doublerWithMetal(inputVector:[Float]) -> [Float] {
print("multWithMetal Float")
// uses metal to calculate double array
setupMetal()
let (_, computePipelineState, _) = setupShaderInMetalPipeline("doubler")
let inputMetalBuffer = createMetalBuffer(inputVector)
var outputVector = [Float](count: inputVector.count, repeatedValue: 0.0)
let outputMetalBuffer = createMetalBuffer(outputVector)
// Create Metal Compute Command Encoder and add input and output buffers to it
metalComputeCommandEncoder = metalCommandBuffer.computeCommandEncoder()
metalComputeCommandEncoder.setBuffer(inputMetalBuffer, offset: 0, atIndex: 0)
metalComputeCommandEncoder.setBuffer(outputMetalBuffer, offset: 0, atIndex: 1)
// Set the shader function that Metal will use
metalComputeCommandEncoder.setComputePipelineState(computePipelineState)
// Find max number of parallel GPU threads (threadExecutionWidth) in computePipelineState
let threadExecutionWidth = computePipelineState.threadExecutionWidth
// Set up thread groups on GPU
let threadsPerGroup = MTLSize(width:threadExecutionWidth,height:1,depth:1)
let numThreadgroups = MTLSize(width:(inputVector.count+threadExecutionWidth)/threadExecutionWidth, height:1, depth:1)
metalComputeCommandEncoder.dispatchThreadgroups(numThreadgroups, threadsPerThreadgroup: threadsPerGroup)
// Finalize configuration
metalComputeCommandEncoder.endEncoding()
print("outputVector before job is running: \(outputVector.count)")
// Start job
var start = NSDate()
metalCommandBuffer.commit()
// Wait for it to finish
metalCommandBuffer.waitUntilCompleted()
print("Time to run network: \(NSDate().timeIntervalSinceDate(start))")
// Get output data from Metal/GPU into Swift
let data = NSData(bytesNoCopy: outputMetalBuffer.contents(),
length: outputVector.count*sizeof(Float), freeWhenDone: false)
data.getBytes(&outputVector, length:inputVector.count * sizeof(Float))
return outputVector
}
func multWithMetalF16(in1:[Vector16], in2:[Vector16]) -> [Vector16] {
print("multWithMetal Float16")
// uses metal to calculate double array
setupMetal()
let (_, computePipelineState, _) = setupShaderInMetalPipeline("multvfloat16")
let in1Metal = createVector16MetalBuffer(in1, metalDevice: metalDevice)
let in2Metal = createVector16MetalBuffer(in2, metalDevice: metalDevice)
var outputVector = createVector16Array(in1.count)
let outputMetalBuffer = createVector16MetalBuffer(outputVector, metalDevice: metalDevice)
// Create Metal Compute Command Encoder and add input and output buffers to it
metalComputeCommandEncoder = metalCommandBuffer.computeCommandEncoder()
metalComputeCommandEncoder.setBuffer(in1Metal, offset: 0, atIndex: 0)
metalComputeCommandEncoder.setBuffer(in2Metal, offset: 0, atIndex: 1)
metalComputeCommandEncoder.setBuffer(outputMetalBuffer, offset: 0, atIndex: 2)
// Set the shader function that Metal will use
metalComputeCommandEncoder.setComputePipelineState(computePipelineState)
// Find max number of parallel GPU threads (threadExecutionWidth) in computePipelineState
let threadExecutionWidth = computePipelineState.threadExecutionWidth
// Set up thread groups on GPU
let threadsPerGroup = MTLSize(width:threadExecutionWidth,height:1,depth:1)
let numThreadgroups = MTLSize(width:(in1.count)/threadExecutionWidth, height:1, depth:1)
metalComputeCommandEncoder.dispatchThreadgroups(numThreadgroups, threadsPerThreadgroup: threadsPerGroup)
// Finalize configuration
metalComputeCommandEncoder.endEncoding()
print("outputVector before job is running: \(outputVector.count)")
// Start job
metalCommandBuffer.enqueue()
var start = NSDate()
metalCommandBuffer.commit()
// Wait for it to finish
metalCommandBuffer.waitUntilCompleted()
print("Time to run network: \(NSDate().timeIntervalSinceDate(start))")
// Get output data from Metal/GPU into Swift
let data = NSData(bytesNoCopy: outputMetalBuffer.contents(),
length: outputVector.count*sizeof(Vector16), freeWhenDone: false)
data.getBytes(&outputVector, length:in1.count * sizeof(Vector2))
return outputVector
}
func multWithMetalF2(in1:[Vector2], in2:[Vector2]) -> [Vector2] {
print("multWithMetal Float2")
// uses metal to calculate double array
setupMetal()
let (_, computePipelineState, _) = setupShaderInMetalPipeline("multvfloat2")
let in1Metal = createVector2MetalBuffer(in1, metalDevice: metalDevice)
let in2Metal = createVector2MetalBuffer(in2, metalDevice: metalDevice)
var outputVector = createVector2Array(in1.count)
let outputMetalBuffer = createVector2MetalBuffer(outputVector, metalDevice: metalDevice)
// Create Metal Compute Command Encoder and add input and output buffers to it
metalComputeCommandEncoder = metalCommandBuffer.computeCommandEncoder()
metalComputeCommandEncoder.setBuffer(in1Metal, offset: 0, atIndex: 0)
metalComputeCommandEncoder.setBuffer(in2Metal, offset: 0, atIndex: 1)
metalComputeCommandEncoder.setBuffer(outputMetalBuffer, offset: 0, atIndex: 2)
// Set the shader function that Metal will use
metalComputeCommandEncoder.setComputePipelineState(computePipelineState)
// Find max number of parallel GPU threads (threadExecutionWidth) in computePipelineState
let threadExecutionWidth = computePipelineState.threadExecutionWidth
// Set up thread groups on GPU
let threadsPerGroup = MTLSize(width:threadExecutionWidth,height:1,depth:1)
let numThreadgroups = MTLSize(width:(in1.count)/threadExecutionWidth, height:1, depth:1)
metalComputeCommandEncoder.dispatchThreadgroups(numThreadgroups, threadsPerThreadgroup: threadsPerGroup)
// Finalize configuration
metalComputeCommandEncoder.endEncoding()
print("outputVector before job is running: \(outputVector.count)")
// Start job
metalCommandBuffer.enqueue()
var start = NSDate()
metalCommandBuffer.commit()
// Wait for it to finish
metalCommandBuffer.waitUntilCompleted()
print("Time to run network: \(NSDate().timeIntervalSinceDate(start))")
// Get output data from Metal/GPU into Swift
let data = NSData(bytesNoCopy: outputMetalBuffer.contents(),
length: outputVector.count*sizeof(Vector2), freeWhenDone: false)
data.getBytes(&outputVector, length:in1.count * sizeof(Vector2))
return outputVector
}
func multWithMetalF4(in1:[Vector4], in2:[Vector4]) -> [Vector4] {
print("multWithMetal Float4")
// uses metal to calculate double array
setupMetal()
let (_, computePipelineState, _) = setupShaderInMetalPipeline("multvfloat4")
let in1Metal = createVector4MetalBuffer(in1, metalDevice: metalDevice)
let in2Metal = createVector4MetalBuffer(in2, metalDevice: metalDevice)
var outputVector = createVector4Array(in1.count)
let outputMetalBuffer = createVector4MetalBuffer(outputVector, metalDevice: metalDevice)
// Create Metal Compute Command Encoder and add input and output buffers to it
metalComputeCommandEncoder = metalCommandBuffer.computeCommandEncoder()
metalComputeCommandEncoder.setBuffer(in1Metal, offset: 0, atIndex: 0)
metalComputeCommandEncoder.setBuffer(in2Metal, offset: 0, atIndex: 1)
metalComputeCommandEncoder.setBuffer(outputMetalBuffer, offset: 0, atIndex: 2)
// Set the shader function that Metal will use
metalComputeCommandEncoder.setComputePipelineState(computePipelineState)
// Find max number of parallel GPU threads (threadExecutionWidth) in computePipelineState
let threadExecutionWidth = computePipelineState.threadExecutionWidth
// Set up thread groups on GPU
let threadsPerGroup = MTLSize(width:threadExecutionWidth,height:1,depth:1)
let numThreadgroups = MTLSize(width:(in1.count)/threadExecutionWidth, height:1, depth:1)
metalComputeCommandEncoder.dispatchThreadgroups(numThreadgroups, threadsPerThreadgroup: threadsPerGroup)
// Finalize configuration
metalComputeCommandEncoder.endEncoding()
print("outputVector before job is running: \(outputVector.count)")
// Start job
metalCommandBuffer.enqueue()
var start = NSDate()
metalCommandBuffer.commit()
// Wait for it to finish
metalCommandBuffer.waitUntilCompleted()
print("Time to run network: \(NSDate().timeIntervalSinceDate(start))")
// Get output data from Metal/GPU into Swift
let data = NSData(bytesNoCopy: outputMetalBuffer.contents(),
length: outputVector.count*sizeof(Vector4), freeWhenDone: false)
data.getBytes(&outputVector, length:in1.count * sizeof(Vector4))
return outputVector
}
func multWithMetal(in1:[Float], in2:[Float]) -> [Float] {
// uses metal to calculate double array
setupMetal()
let (_, computePipelineState, _) = setupShaderInMetalPipeline("multvfloat")
let in1Metal = createMetalBuffer(in1)
let in2Metal = createMetalBuffer(in2)
var outputVector = [Float](count: in1.count, repeatedValue: 0.0)
let outputMetalBuffer = createMetalBuffer(outputVector)
// Create Metal Compute Command Encoder and add input and output buffers to it
metalComputeCommandEncoder = metalCommandBuffer.computeCommandEncoder()
metalComputeCommandEncoder.setBuffer(in1Metal, offset: 0, atIndex: 0)
metalComputeCommandEncoder.setBuffer(in2Metal, offset: 0, atIndex: 1)
metalComputeCommandEncoder.setBuffer(outputMetalBuffer, offset: 0, atIndex: 2)
// Set the shader function that Metal will use
metalComputeCommandEncoder.setComputePipelineState(computePipelineState)
// Find max number of parallel GPU threads (threadExecutionWidth) in computePipelineState
let threadExecutionWidth = computePipelineState.threadExecutionWidth
// Set up thread groups on GPU
let threadsPerGroup = MTLSize(width:threadExecutionWidth,height:1,depth:1)
let numThreadgroups = MTLSize(width:(in1.count)/threadExecutionWidth, height:1, depth:1)
metalComputeCommandEncoder.dispatchThreadgroups(numThreadgroups, threadsPerThreadgroup: threadsPerGroup)
// Finalize configuration
metalComputeCommandEncoder.endEncoding()
print("outputVector before job is running: \(outputVector.count)")
// Start job
metalCommandBuffer.enqueue()
var start = NSDate()
metalCommandBuffer.commit()
// Wait for it to finish
metalCommandBuffer.waitUntilCompleted()
print("Time to run network: \(NSDate().timeIntervalSinceDate(start))")
// Get output data from Metal/GPU into Swift
let data = NSData(bytesNoCopy: outputMetalBuffer.contents(),
length: outputVector.count*sizeof(Float), freeWhenDone: false)
data.getBytes(&outputVector, length:in1.count * sizeof(Float))
return outputVector
}
func createInputVector(N: Int) -> [Float] {
var vector = [Float](count: N, repeatedValue: 0.0)
for (index, _) in vector.enumerate() {
vector[index] = Float(index)
}
return vector
}
}