From a5c3f325fcce5720d3ecca5b7770e97df06323e0 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 5 Mar 2024 01:11:08 +0530 Subject: [PATCH 01/11] Updated swift c module name --- mediapipe/tasks/cc/genai/inference/c/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediapipe/tasks/cc/genai/inference/c/BUILD b/mediapipe/tasks/cc/genai/inference/c/BUILD index 12a69fea43..ee40408cd6 100644 --- a/mediapipe/tasks/cc/genai/inference/c/BUILD +++ b/mediapipe/tasks/cc/genai/inference/c/BUILD @@ -21,7 +21,7 @@ cc_library( name = "libllm_inference_engine_cpu", srcs = ["llm_inference_engine_cpu.cc"], hdrs = ["llm_inference_engine.h"], - tags = ["swift_module=LlmInferenceEngineC"], + tags = ["swift_module=MediaPipeTasksGenAIC"], deps = [ "//mediapipe/framework/port:file_helpers", "//mediapipe/framework/port:ret_check", From 59fe2c224db66510ee019799dfff6d08e0f20d84 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 5 Mar 2024 01:11:44 +0530 Subject: [PATCH 02/11] Updated mediapipe/tasks/ios/genai/core/BUILD --- mediapipe/tasks/ios/genai/core/BUILD | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/mediapipe/tasks/ios/genai/core/BUILD b/mediapipe/tasks/ios/genai/core/BUILD index 41561cfc68..4879654218 100644 --- a/mediapipe/tasks/ios/genai/core/BUILD +++ b/mediapipe/tasks/ios/genai/core/BUILD @@ -12,20 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@build_bazel_rules_swift//swift:swift.bzl", "swift_library") - licenses(["notice"]) package(default_visibility = ["//mediapipe/tasks:internal"]) -swift_library( - name = "LlmTaskRunner", - srcs = [ - "sources/GenAiInferenceError.swift", - "sources/LlmTaskRunner.swift", - ], - module_name = "LlmTaskRunner", - deps = [ - "//mediapipe/tasks/cc/genai/inference/c:libllm_inference_engine_cpu", - ], -) +exports_files(["sources/LlmTaskRunner.swift", "sources/GenAiInferenceError.swift"]) From 58d80fa72144e9dba623e803e25e7cce16b47281 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 5 Mar 2024 01:12:09 +0530 Subject: [PATCH 03/11] Added new error types to GenAiInferenceError --- .../core/sources/GenAiInferenceError.swift | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/mediapipe/tasks/ios/genai/core/sources/GenAiInferenceError.swift b/mediapipe/tasks/ios/genai/core/sources/GenAiInferenceError.swift index 20c62bdab5..80830a4df1 100644 --- a/mediapipe/tasks/ios/genai/core/sources/GenAiInferenceError.swift +++ b/mediapipe/tasks/ios/genai/core/sources/GenAiInferenceError.swift @@ -16,15 +16,22 @@ import Foundation /// Errors thrown by MediaPipe GenAI Tasks. public enum GenAiInferenceError: Error { - case invalidResponseError + case invalidResponse + case illegalMethodCall + case modelNotFound } extension GenAiInferenceError: LocalizedError { /// A localized description of the `GenAiInferenceError`. public var errorDescription: String? { switch self { - case .invalidResponseError: + case .invalidResponse: return "The response returned by the model is invalid." + case .illegalMethodCall: + return + "You cannot invoke `generateResponse` while another response generation invocation is in progress." + case .modelNotFound: + return "No file found at the `modelPath` you provided." } } } @@ -37,8 +44,12 @@ extension GenAiInferenceError: CustomNSError { public var errorCode: Int { switch self { - case .invalidResponseError: + case .invalidResponse: return 0 + case .illegalMethodCall: + return 1 + case .modelNotFound: + return 2 } } } From 4abe2e79963afac8fd66c79faf4ee4e913fedaf3 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 5 Mar 2024 01:13:48 +0530 Subject: [PATCH 04/11] Updated iOS LlmTaskRunner to be initialized from a config struct --- .../genai/core/sources/LlmTaskRunner.swift | 241 ++++++++++++++++-- mediapipe/tasks/ios/genai/inference/BUILD | 7 +- 2 files changed, 231 insertions(+), 17 deletions(-) diff --git a/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift b/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift index 7cc0c9c59b..04f8c88b9d 100644 --- a/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift +++ b/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift @@ -13,24 +13,65 @@ // limitations under the License. import Foundation -import LlmInferenceEngineC +import MediaPipeTasksGenAIC /// This class is used to create and call appropriate methods on the C `LlmInferenceEngine_Session` /// to initialize, execute and terminate any MediaPipe `LlmInference` task. -public final class LlmTaskRunner { - fileprivate typealias CLlmSession = UnsafeMutableRawPointer +/// Note: Tasks should not attempt to clear undeleted caches on initialization since user can create +/// multiple instances of the task and there is now way of knowing whether they are still +/// active. Deleting caches of active task instances will result in crashes when the C++ +/// functions are invoked. +/// Instead tasks can encapsulate `clearAllCachedFiles()` to provide a function to delete +/// any undeleted caches when the user wishes to. +final class LlmTaskRunner { + private typealias CLlmSession = UnsafeMutableRawPointer + + private static let cacheSuffix = ".cache" + private static let globalCacheDirectory = FileManager.default.temporaryDirectory + .versionIndependentAppending(component: "mediapipe.genai.inference.cache") + private static let cacheDirectory = LlmTaskRunner.globalCacheDirectory + .versionIndependentAppending(component: "\(UUID().uuidString)") private let cLlmSession: CLlmSession + private let modelCacheFile: URL /// Creates a new instance of `LlmTaskRunner` with the given session config. /// /// - Parameters: /// - sessionConfig: C session config of type `LlmSessionConfig`. - public init(sessionConfig: LlmSessionConfig) { - /// No safe guards for session creation since the C APIs only throw fatal errors. - /// `LlmInferenceEngine_CreateSession()` will always return an llm session if the call + init(config: Config) throws { + guard FileManager.default.fileExists(atPath: config.modelPath), + let modelName = config.modelPath.components(separatedBy: "/").last + else { + throw GenAiInferenceError.modelNotFound + } + + /// Adding a `UUID` prefix to the cache path to prevent the app from crashing if a model cache + /// is already found in the temporary directory. + /// Cache will be deleted when the task runner is de-allocated. Preferring deletion on + /// de-allocation to deleting all caches on initialization to prevent model caches of + /// other task runners from being de-allocated prematurely during their life time. + /// + /// Note: No safe guards for session creation since the C APIs only throw fatal errors. + /// `LlmInferenceEngine_CreateSession()` will always return a llm session if the call /// completes. - self.cLlmSession = withUnsafePointer(to: sessionConfig) { LlmInferenceEngine_CreateSession($0) } + cLlmSession = LlmTaskRunner.cacheDirectory.path.withCString { cCacheDir in + return config.modelPath.withCString { cModelPath in + let cSessionConfig = LlmSessionConfig( + model_path: cModelPath, + cache_dir: cCacheDir, + sequence_batch_size: Int(config.sequenceBatchSize), + num_decode_steps_per_sync: Int(config.numberOfDecodeStepsPerSync), + max_tokens: Int(config.maxTokens), + topk: Int(config.topk), + temperature: config.temperature, + random_seed: config.randomSeed) + return withUnsafePointer(to: cSessionConfig) { LlmInferenceEngine_CreateSession($0) } + } + } + + modelCacheFile = LlmTaskRunner.cacheDirectory.versionIndependentAppending( + component: "\(modelName)\(LlmTaskRunner.cacheSuffix)") } /// Invokes the C inference engine with the given input text to generate an array of `String` @@ -39,7 +80,7 @@ public final class LlmTaskRunner { /// - Parameters: /// - inputText: A `String` that is used to query the LLM. /// - Throws: An error if the LLM's response is invalid. - public func predict(inputText: String) throws -> [String] { + func predict(inputText: String) throws -> [String] { /// No safe guards for the call since the C++ APIs only throw fatal errors. /// `LlmInferenceEngine_Session_PredictSync()` will always return a `LlmResponseContext` if the /// call completes. @@ -53,25 +94,195 @@ public final class LlmTaskRunner { } } - /// Throw an error if the response array is `NULL`. + guard let responseStrings = LlmTaskRunner.responseStrings(from: responseContext) else { + throw GenAiInferenceError.invalidResponse + } + + return responseStrings + } + + func predict( + inputText: String, progress: @escaping (_ partialResult: [String]?, _ error: Error?) -> Void, + completion: @escaping (() -> Void) + ) { + + /// `strdup(inputText)` prevents input text from being deallocated as long as callbacks are + /// being invoked. `CallbackInfo` takes care of freeing the memory of `inputText` when it is + /// deallocated. + let callbackInfo = CallbackInfo( + inputText: strdup(inputText), progress: progress, completion: completion) + let callbackContext = UnsafeMutableRawPointer(Unmanaged.passRetained(callbackInfo).toOpaque()) + + LlmInferenceEngine_Session_PredictAsync(cLlmSession, callbackContext, callbackInfo.inputText) { + context, responseContext in + guard let cContext = context else { + return + } + + /// `takeRetainedValue()` decrements the reference count incremented by `passRetained()`. Only + /// take a retained value if the LLM has finished generating responses to prevent the context + /// from being deallocated in between response generation. + let cCallbackInfo = + responseContext.done + ? Unmanaged.fromOpaque(cContext).takeRetainedValue() + : Unmanaged.fromOpaque(cContext).takeUnretainedValue() + + if let responseStrings = LlmTaskRunner.responseStrings(from: responseContext) { + cCallbackInfo.progress(responseStrings, nil) + } else { + cCallbackInfo.progress(nil, GenAiInferenceError.invalidResponse) + } + + /// Call completion callback if LLM has generated its last response. + if responseContext.done { + cCallbackInfo.completion() + } + } + } + + /// Clears all cached files created by `LlmInference` to prevent exponential growth of your app + /// size. Please ensure that this method is not called during the lifetime of any instances of + /// `LlmTaskRunner`. + static func clearAllCachedFiles() throws { + // Delete directory + try FileManager.default.removeItem(at: LlmTaskRunner.globalCacheDirectory) + } + + deinit { + LlmInferenceEngine_Session_Delete(cLlmSession) + + /// Responsibly deleting the model cache. + /// Performing on current thread since only one file needs to be deleted. + /// + /// Note: Implementation will have to be updated if C++ core changes the cache prefix. + /// + /// Note: `deinit` does not get invoked in the following circumstances: + /// 1. If a crash occurs before the task runner is de-allocated. + /// 2. If an instance of the task is created from `main()` and the app is terminated. + /// For eg:, if the task is an instance variable of the main `ViewController` which doesn't + /// get destroyed until the app quits. + /// Task interfaces that use the task runner should additionally provide a function that + /// encapsulates `LlmTaskrRunner.clearAllCachedFiles()` to cleanup any undeleted caches to + /// avoid exponential growth in app size. OS clears these directories only if the device runs + /// out of storage space. + /// Tasks should not attempt to clear undeleted caches on initialization since user can create + /// multiple instances of the task and there is now way of knowing whether they are still + /// active. Deleting caches of active task instances will result in crashes when the C++ + /// functions are invoked. + do { + try FileManager.default.removeItem(at: modelCacheFile) + } catch { + // Could not delete file. Common cause: file not found. + } + } +} + +extension LlmTaskRunner { + /// Configuration for setting up a `LlmTaskRunner`. + struct Config { + /// The absolute path to the model asset bundle stored locally on the device. + let modelPath: String + + let sequenceBatchSize: UInt + + let numberOfDecodeStepsPerSync: UInt + + /// The total length of the kv-cache. In other words, this is the total number of input + output + /// tokens the model needs to handle. + let maxTokens: UInt + + /// The top K number of tokens to be sampled from for each decoding step. A value of 1 means + /// greedy decoding. Defaults to 40. + let topk: UInt + + /// The randomness when decoding the next token. A value of 0.0f means greedy decoding. Defaults + /// to 0.8. + let temperature: Float + + /// The random seed for sampling tokens. + let randomSeed: Int + + /// Creates a new instance of `Config` with the provided values. + /// + /// - Parameters: + /// - modelPath: The absolute path to a model asset bundle stored locally on the device. + /// - sequenceBatchSize: Sequence batch size for encoding. Used by GPU only. Number of + /// input tokens to process at a time for batch processing. Setting this value to 1 means both + /// the encoding and decoding share the same graph of sequence length of 1. Setting this value + /// to 0 means the batch size will be optimized + /// programmatically. + /// - numberOfDecodeStepsPerSync: Number of decode steps per sync. Used by GPU only. + /// The default value is 3. + /// - maxTokens: Maximum number of tokens for input and output. + /// - topk: Top K number of tokens to be sampled from for each decoding step. + /// - temperature: Randomness when decoding the next token, 0.0f means greedy decoding. + /// - random_seed: Random seed for sampling tokens. + init( + modelPath: String, sequenceBatchSize: UInt, numberOfDecodeStepsPerSync: UInt, maxTokens: UInt, + topk: UInt, temperature: Float, randomSeed: Int + ) { + self.modelPath = modelPath + self.sequenceBatchSize = sequenceBatchSize + self.numberOfDecodeStepsPerSync = numberOfDecodeStepsPerSync + self.maxTokens = maxTokens + self.topk = topk + self.temperature = temperature + self.randomSeed = randomSeed + } + } +} + +private extension LlmTaskRunner { + /// A wrapper class for whose object will be used as the C++ callback context. + /// The progress and completion callbacks cannot be invoked without a context. + class CallbackInfo { + typealias ProgressCallback = (_ partialResult: [String]?, _ error: Error?) -> Void + typealias CompletionCallback = () -> Void + + let inputText: UnsafeMutablePointer? + let progress: ProgressCallback + let completion: CompletionCallback + + init( + inputText: UnsafeMutablePointer?, progress: @escaping (ProgressCallback), + completion: @escaping (CompletionCallback) + ) { + self.inputText = inputText + self.progress = progress + self.completion = completion + } + + deinit { + free(inputText) + } + } +} + +private extension LlmTaskRunner { + class func responseStrings(from responseContext: LlmResponseContext) -> [String]? { guard let cResponseArray = responseContext.response_array else { - throw GenAiInferenceError.invalidResponseError + return nil } var responseStrings: [String] = [] - for responseIndex in 0.. URL { + if #available(iOS 16, *) { + return self.appending(component: component) + } else { + return self.appendingPathComponent(component) + } } - } diff --git a/mediapipe/tasks/ios/genai/inference/BUILD b/mediapipe/tasks/ios/genai/inference/BUILD index 9ba524262d..83ce1a47ad 100644 --- a/mediapipe/tasks/ios/genai/inference/BUILD +++ b/mediapipe/tasks/ios/genai/inference/BUILD @@ -22,9 +22,12 @@ swift_library( name = "LlmInference", srcs = [ "sources/LlmInference.swift", + "//mediapipe/tasks/ios/genai/core:sources/LlmTaskRunner.swift", + "//mediapipe/tasks/ios/genai/core:sources/GenAiInferenceError.swift", ], + copts = ["-no-verify-emitted-module-interface"], + module_name = "MediaPipeTasksGenAI", generated_header_name = "LlmInference-Swift.h", generates_header = 1, - module_name = "LlmInference", - deps = ["//mediapipe/tasks/ios/genai/core:LlmTaskRunner"], + deps = ["//mediapipe/tasks/cc/genai/inference/c:libllm_inference_engine_cpu",], ) From d4e7b402405e353a119ebbd98ed5033f2ae7705b Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 5 Mar 2024 01:14:13 +0530 Subject: [PATCH 05/11] Updated interface of iOS LlmInference --- .../inference/sources/LlmInference.swift | 150 ++++++++++++++---- 1 file changed, 117 insertions(+), 33 deletions(-) diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift index d066af9012..778ad3f9c2 100644 --- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift +++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift @@ -13,42 +13,55 @@ // limitations under the License. import Foundation -import LlmInferenceEngineC -import LlmTaskRunner /// A MediaPipe task that performs inference using a given Large Language Model. /// /// Note: Inherits from `NSObject` for Objective C interoperability. -@objc(MPPLLMInference) public final class LlmInference: NSObject { - private static let numberOfDecodeStepsPerSync = 3 - private static let sequenceBatchSize = 0 +@objc(MPPLlmInference) public final class LlmInference: NSObject { + private static let numberOfDecodeStepsPerSync: UInt = 3 + private static let sequenceBatchSize: UInt = 0 + private static let responseGenerationInProgressQueueName = + "com.google.mediapipe.genai.isResponseGenerationInProgressQueue" private let llmTaskRunner: LlmTaskRunner + private let responseGenerationInProgressQueue = DispatchQueue( + label: LlmInference.responseGenerationInProgressQueueName, + attributes: .concurrent) + + /// Tracks whether a response generation is in progress. + /// Readers writers lock to prevent race condition as this variable can be accessed from multiple + /// threads. + private var responseGenerationInProgressInternal = false + private var responseGenerationInProgress: Bool { + get { + responseGenerationInProgressQueue.sync { + return self.responseGenerationInProgressInternal + } + } + set { + responseGenerationInProgressQueue.async(flags: .barrier) { + self.responseGenerationInProgressInternal = newValue + } + } + } + /// Creates a new instance of `LlmInference` with the given options. /// /// - Parameters: /// - options: The options of type `LlmInference.Options` to use for configuring the /// `LlmInference`. - @objc public init(options: Options) { - let modelPath = strdup(options.modelPath) - let cacheDirectory = strdup(FileManager.default.temporaryDirectory.path) - - defer { - free(modelPath) - free(cacheDirectory) - } - - let sessionConfig = LlmSessionConfig( - model_path: modelPath, - cache_dir: cacheDirectory, - sequence_batch_size: LlmInference.sequenceBatchSize, - num_decode_steps_per_sync: LlmInference.numberOfDecodeStepsPerSync, - max_tokens: options.maxTokens, + @objc public init(options: Options) throws { + let taskRunnerConfig = LlmTaskRunner.Config( + modelPath: options.modelPath, + sequenceBatchSize: LlmInference.sequenceBatchSize, + numberOfDecodeStepsPerSync: LlmInference.numberOfDecodeStepsPerSync, + maxTokens: options.maxTokens, topk: options.topk, temperature: options.temperature, - random_seed: options.randomSeed) - llmTaskRunner = LlmTaskRunner(sessionConfig: sessionConfig) + randomSeed: options.randomSeed) + + llmTaskRunner = try LlmTaskRunner(config: taskRunnerConfig) super.init() } @@ -58,9 +71,9 @@ import LlmTaskRunner /// /// - Parameters: /// - modelPath: The absolute path to a model asset bundle stored locally on the device. - @objc public convenience init(modelPath: String) { + @objc public convenience init(modelPath: String) throws { let options = Options(modelPath: modelPath) - self.init(options: options) + try self.init(options: options) } /// Generates a response based on the input text. @@ -69,16 +82,85 @@ import LlmTaskRunner /// - inputText: A `String` that is used to query the LLM. /// - Throws: An error if the LLM's response is invalid. @objc public func generateResponse(inputText: String) throws -> String { + + /// Disallow response generation if another response generation call is already in progress. + try shouldContinueWithResponseGeneration() + let tokens = try llmTaskRunner.predict(inputText: inputText) + + responseGenerationInProgress = false + guard let humanReadableLlmResponse = LlmInference.humanReadableString(llmResponses: tokens) else { - throw GenAiInferenceError.invalidResponseError + throw GenAiInferenceError.invalidResponse } return humanReadableLlmResponse } - private static func humanReadableString( + /// Generates a response based on the input text asynchronously. The `progess` callback returns + /// the partial responses from the LLM or any errors. `completion` callback is invoked once the + /// LLM is done generating responses. + /// + /// - Parameters: + /// - progess: A callback invoked when a partial response is available from the LLM. + /// - completion: A callback invoked when the LLM finishes response generation. + /// - Throws: An error if the LLM's response is invalid. + @objc public func generateResponse( + inputText: String, + progress: @escaping (_ partialResponse: String?, _ error: Error?) -> Void, + completion: @escaping (() -> Void) + ) throws { + /// Disallow response generation if another response generation call is already in progress. + try shouldContinueWithResponseGeneration() + + /// Used to make a decision about whitespace stripping. + var receivedFirstToken = true + + llmTaskRunner.predict( + inputText: inputText, + progress: { partialResponseStrings, error in + + guard let responseStrings = partialResponseStrings, + let humanReadableLlmResponse = LlmInference.humanReadableString( + llmResponses: responseStrings, stripLeadingWhitespaces: receivedFirstToken) + else { + progress(nil, GenAiInferenceError.invalidResponse) + return + } + + /// Reset state after first response is processed. + receivedFirstToken = false + + progress(humanReadableLlmResponse, nil) + }, + completion: { [weak self] in + self?.responseGenerationInProgress = false + completion() + }) + } + + /// Clears all cached files created by `LlmInference` to prevent exponential growth of your app + /// size. Please ensure that this method is not called during the lifetime of any instances of + /// `LlmInference`. If the cache is deleted while an instance of `LlmInference` is in scope, + /// calling one of its methods will result in undefined behaviour and may lead to a crash. + /// + /// This method blocks the thread on which it runs. Invoke this function from a background thread + /// to avoid blocking the thread.x + public class func clearAllCachedFiles() throws { + try LlmTaskRunner.clearAllCachedFiles() + } + + /// Throw error if response generation is in progress or update response generation state. + private func shouldContinueWithResponseGeneration() throws { + if responseGenerationInProgress { + throw GenAiInferenceError.illegalMethodCall + } + + responseGenerationInProgress = true + } + + private class func humanReadableString( llmResponses: [String], stripLeadingWhitespaces: Bool = true ) -> String? { guard let llmResponse = llmResponses.first else { @@ -100,11 +182,11 @@ extension LlmInference { /// The total length of the kv-cache. In other words, this is the total number of input + output /// tokens the model needs to handle. - @objc public var maxTokens: Int = 512 + @objc public var maxTokens: UInt = 512 /// The top K number of tokens to be sampled from for each decoding step. A value of 1 means /// greedy decoding. Defaults to 40. - @objc public var topk: Int = 40 + @objc public var topk: UInt = 40 /// The randomness when decoding the next token. A value of 0.0f means greedy decoding. Defaults /// to 0.8. @@ -123,16 +205,18 @@ extension LlmInference { self.modelPath = modelPath super.init() } + } } /// An extension to `String` to add some utility functions. -extension String { - fileprivate static let tokenSplitter = "▁" /// Note this is NOT an underscore: ▁(U+2581) - fileprivate static let newLine = "<0x0A>" - fileprivate static let eod = "\\[eod\\]" +fileprivate extension String { + private static let tokenSplitter = "▁" + /// Note this is NOT an underscore: ▁(U+2581) + private static let newLine = "<0x0A>" + private static let eod = "\\[eod\\]" - fileprivate func humanReadableString(stripLeadingWhitespaces: Bool = true) -> String? { + func humanReadableString(stripLeadingWhitespaces: Bool = true) -> String? { var humanReadableString = self.replacingOccurrences(of: String.tokenSplitter, with: " ") .replacingOccurrences(of: String.newLine, with: "\n") humanReadableString = From 281907abf88e4027c413a0d0aa8f46a76458de93 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 5 Mar 2024 01:15:55 +0530 Subject: [PATCH 06/11] Added framework build scripts for iOS Gen AI frameworks --- mediapipe/tasks/ios/BUILD | 42 ++++++++++++++++++++++ mediapipe/tasks/ios/build_ios_framework.sh | 16 +++++++-- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/mediapipe/tasks/ios/BUILD b/mediapipe/tasks/ios/BUILD index ad3d6d1c17..b91466d503 100644 --- a/mediapipe/tasks/ios/BUILD +++ b/mediapipe/tasks/ios/BUILD @@ -91,6 +91,7 @@ MEDIAPIPE_TASKS_COMMON_DEPS = OBJC_TASK_COMMON_DEPS + TENSORFLOW_LITE_C_DEPS + [ strip_api_include_path_prefix( name = "strip_api_include_path", hdr_labels = [ + "//mediapipe/tasks/cc/genai/inference/c:llm_inference_engine.h", "//mediapipe/tasks/ios/common:sources/MPPCommon.h", "//mediapipe/tasks/ios/components/containers:sources/MPPCategory.h", "//mediapipe/tasks/ios/components/containers:sources/MPPClassificationResult.h", @@ -321,3 +322,44 @@ apple_static_xcframework( # built as static libraries and force loaded. deps = MEDIAPIPE_TASKS_COMMON_DEPS + select(OPENCV_DEPS), ) + +apple_static_xcframework( + name = "MediaPipeTasksGenAI_framework", + avoid_deps = ["//mediapipe/tasks/cc/genai/inference/c:libllm_inference_engine_cpu",], + bundle_name = "MediaPipeTasksGenAI", + ios = { + "simulator": [ + "arm64", + "x86_64", + ], + "device": ["arm64"], + }, + minimum_os_versions = { + "ios": MPP_TASK_MINIMUM_OS_VERSION, + }, + deps = [ + "//mediapipe/tasks/ios/genai/inference:LlmInference", + "//mediapipe/tasks/cc/genai/inference/c:libllm_inference_engine_cpu", + ], +) + +apple_static_xcframework( + name = "MediaPipeTasksGenAIC_framework", + bundle_name = "MediaPipeTasksGenAIC", + ios = { + "simulator": [ + "arm64", + "x86_64", + ], + "device": ["arm64"], + }, + minimum_os_versions = { + "ios": MPP_TASK_MINIMUM_OS_VERSION, + }, + public_hdrs = [ + ":llm_inference_engine.h", + ], + deps = [ + "//mediapipe/tasks/cc/genai/inference/c:libllm_inference_engine_cpu", + ], +) diff --git a/mediapipe/tasks/ios/build_ios_framework.sh b/mediapipe/tasks/ios/build_ios_framework.sh index ddb186ee93..4a86375ee0 100755 --- a/mediapipe/tasks/ios/build_ios_framework.sh +++ b/mediapipe/tasks/ios/build_ios_framework.sh @@ -56,8 +56,12 @@ case $FRAMEWORK_NAME in ;; "MediaPipeTasksText") ;; + "MediaPipeTasksGenAIC") + ;; + "MediaPipeTasksGenAI") + ;; *) - echo "Wrong framework name. The following framework names are allowed: MediaPipeTasksText, MediaPipeTasksVision, MediaPipeTasksCommon" + echo "Wrong framework name. The following framework names are allowed: MediaPipeTasksText, MediaPipeTasksVision, MediaPipeTasksCommon, MediaPipeTasksGenAI, MediaPipeTasksGenAIC" exit 1 ;; esac @@ -90,7 +94,6 @@ EOF function build_ios_frameworks_and_libraries { local TARGET_PREFIX="//mediapipe/tasks/ios" FULL_FRAMEWORK_TARGET="${TARGET_PREFIX}:${FRAMEWORK_NAME}_framework" - FULL_GRAPH_LIBRARY_TARGET="${TARGET_PREFIX}:${FRAMEWORK_NAME}_GraphLibrary" # .bazelrc sets --apple_generate_dsym=true by default which bloats the libraries to sizes of # the order of GBs. All iOS framework and library build commands for distribution via @@ -99,6 +102,15 @@ function build_ios_frameworks_and_libraries { # Build Task Library xcframework. local FRAMEWORK_CQUERY_COMMAND="-c opt --config=ios_sim_device_fat --apple_generate_dsym=false --define OPENCV=source ${FULL_FRAMEWORK_TARGET}" + + case $FRAMEWORK_NAME in + "MediaPipeTasksGenAI|MediaPipeTasksGenAIC") + FRAMEWORK_CQUERY_COMMAND="c opt --config=ios_sim_device_fat --apple_generate_dsym=false ${FULL_FRAMEWORK_TARGET}" + ;; + *) + ;; + esac + ${BAZEL} build ${FRAMEWORK_CQUERY_COMMAND} IOS_FRAMEWORK_PATH="$(get_output_file_path "${FRAMEWORK_CQUERY_COMMAND}")" From 92b069407196afa57c23a40978416801f7baeee8 Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 5 Mar 2024 19:56:47 +0530 Subject: [PATCH 07/11] Updated function that clears cache in LlmInference to be a non blocking call --- .../genai/core/sources/LlmTaskRunner.swift | 14 ++++++++++--- .../inference/sources/LlmInference.swift | 20 +++++++++++++------ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift b/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift index 1b496332c6..77c0664253 100644 --- a/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift +++ b/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift @@ -35,6 +35,7 @@ final class LlmTaskRunner { private let cLlmSession: CLlmSession private let modelCacheFile: URL + /// Creates a new instance of `LlmTaskRunner` with the given session config. /// /// - Parameters: @@ -144,14 +145,21 @@ final class LlmTaskRunner { /// Clears all cached files created by `LlmInference` to prevent exponential growth of your app /// size. Please ensure that this method is not called during the lifetime of any instances of /// `LlmTaskRunner`. - static func clearAllCachedFiles() throws { + class func clearAllCachedFiles() { // Delete directory - try FileManager.default.removeItem(at: LlmTaskRunner.globalCacheDirectory) + do { + try FileManager.default.removeItem(at: LlmTaskRunner.globalCacheDirectory) + print("Success on deleting") + } + catch { + print("Error in deleting") + /// Errors thrown are not relevant to the user. They are usual not found errors. + } } deinit { LlmInferenceEngine_Session_Delete(cLlmSession) - + /// Responsibly deleting the model cache. /// Performing on current thread since only one file needs to be deleted. /// diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift index 2ca29abfc6..0e91fff8aa 100644 --- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift +++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift @@ -20,11 +20,16 @@ import Foundation @objc(MPPLlmInference) public final class LlmInference: NSObject { private static let numberOfDecodeStepsPerSync: UInt = 3 private static let sequenceBatchSize: UInt = 0 + private static let cacheCleanupQueueName = "com.google.mediapipe.genai.cacheCleanupQueue.\(UUID().uuidString)" private static let responseGenerationInProgressQueueName = - "com.google.mediapipe.genai.isResponseGenerationInProgressQueue" + "com.google.mediapipe.genai.isResponseGenerationInProgressQueue.\(UUID().uuidString)" + /// Serial queue for cache cleanup. + private static let cacheCleanupQueue = DispatchQueue( + label: cacheCleanupQueueName) private let llmTaskRunner: LlmTaskRunner + /// Concurrent queue to implement readers-writers lock on `responseGenerationInProgress`. private let responseGenerationInProgressQueue = DispatchQueue( label: LlmInference.responseGenerationInProgressQueueName, attributes: .concurrent) @@ -144,11 +149,14 @@ import Foundation /// size. Please ensure that this method is not called during the lifetime of any instances of /// `LlmInference`. If the cache is deleted while an instance of `LlmInference` is in scope, /// calling one of its methods will result in undefined behaviour and may lead to a crash. - /// - /// This method blocks the thread on which it runs. Invoke this function from a background thread - /// to avoid blocking the thread.x - public class func clearAllCachedFiles() throws { - try LlmTaskRunner.clearAllCachedFiles() + public class func clearAllCachedFiles(completion: @escaping(() -> Void)) { + /// Asynchronously deleting the files to prevent blocking the current thread as there may be + /// multiple undeleted weight caches. Choosing a serial queue to let callers wait until the + // previous call for deletion is completed. + cacheCleanupQueue.async { + LlmTaskRunner.clearAllCachedFiles() + completion() + } } /// Throw error if response generation is in progress or update response generation state. From 05ab9b405162daf9f1ef5a5e7d57fc29706d3b7e Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 5 Mar 2024 20:01:25 +0530 Subject: [PATCH 08/11] Removed logs in iOS LlmTaskRunner --- mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift | 2 -- 1 file changed, 2 deletions(-) diff --git a/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift b/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift index 77c0664253..c50be05f2d 100644 --- a/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift +++ b/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift @@ -149,10 +149,8 @@ final class LlmTaskRunner { // Delete directory do { try FileManager.default.removeItem(at: LlmTaskRunner.globalCacheDirectory) - print("Success on deleting") } catch { - print("Error in deleting") /// Errors thrown are not relevant to the user. They are usual not found errors. } } From dcc2b1b17a9504a21dd51c2d257dab72f6e3f37a Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 5 Mar 2024 20:02:12 +0530 Subject: [PATCH 09/11] Updated scope of string extension in iOS LlmInference --- mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift index 0e91fff8aa..5c2a8ac3e3 100644 --- a/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift +++ b/mediapipe/tasks/ios/genai/inference/sources/LlmInference.swift @@ -218,7 +218,7 @@ extension LlmInference { } /// An extension to `String` to add some utility functions. -extension String { +fileprivate extension String { private static let tokenSplitter = "▁" /// Note this is NOT an underscore: ▁(U+2581) private static let newLine = "<0x0A>" From 850f26931d9d338900306d9729c8d77a9ff0996d Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 5 Mar 2024 20:09:25 +0530 Subject: [PATCH 10/11] Removed duplicate entry of header file in tasks/ios/BUILD --- mediapipe/tasks/ios/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/mediapipe/tasks/ios/BUILD b/mediapipe/tasks/ios/BUILD index 41f04ab2b8..9bb2c7c999 100644 --- a/mediapipe/tasks/ios/BUILD +++ b/mediapipe/tasks/ios/BUILD @@ -91,7 +91,6 @@ MEDIAPIPE_TASKS_COMMON_DEPS = OBJC_TASK_COMMON_DEPS + TENSORFLOW_LITE_C_DEPS + [ strip_api_include_path_prefix( name = "strip_api_include_path", hdr_labels = [ - "//mediapipe/tasks/cc/genai/inference/c:llm_inference_engine.h", "//mediapipe/tasks/ios/common:sources/MPPCommon.h", "//mediapipe/tasks/ios/components/containers:sources/MPPCategory.h", "//mediapipe/tasks/ios/components/containers:sources/MPPClassificationResult.h", From 49c2a2fc6919f109eba74e283cfdca5494f16fea Mon Sep 17 00:00:00 2001 From: Prianka Liz Kariat Date: Tue, 5 Mar 2024 23:32:34 +0530 Subject: [PATCH 11/11] Removed unwanted comments from iOS LlmTaskRunner --- mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift | 1 - 1 file changed, 1 deletion(-) diff --git a/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift b/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift index c50be05f2d..93199b362b 100644 --- a/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift +++ b/mediapipe/tasks/ios/genai/core/sources/LlmTaskRunner.swift @@ -273,7 +273,6 @@ private extension LlmTaskRunner { var responseStrings: [String] = [] for responseIndex in 0..