From fdea53f759cb7a8e6167af8740ca9cfc850bfae9 Mon Sep 17 00:00:00 2001 From: Google APIs Date: Tue, 14 Nov 2023 10:20:51 -0800 Subject: [PATCH] feat: add ComputeTokens and CountTokens API PiperOrigin-RevId: 582364082 --- google/cloud/aiplatform/v1/BUILD.bazel | 2 + google/cloud/aiplatform/v1/aiplatform_v1.yaml | 9 ++ .../aiplatform/v1/llm_utility_service.proto | 100 ++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 google/cloud/aiplatform/v1/llm_utility_service.proto diff --git a/google/cloud/aiplatform/v1/BUILD.bazel b/google/cloud/aiplatform/v1/BUILD.bazel index 7a0c5467db47b..7b0141d35e2ac 100644 --- a/google/cloud/aiplatform/v1/BUILD.bazel +++ b/google/cloud/aiplatform/v1/BUILD.bazel @@ -76,6 +76,7 @@ proto_library( "job_service.proto", "job_state.proto", "lineage_subgraph.proto", + "llm_utility_service.proto", "machine_resources.proto", "manual_batch_tuning_parameters.proto", "match_service.proto", @@ -216,6 +217,7 @@ java_gapic_test( "com.google.cloud.aiplatform.v1.IndexEndpointServiceClientTest", "com.google.cloud.aiplatform.v1.IndexServiceClientTest", "com.google.cloud.aiplatform.v1.JobServiceClientTest", + "com.google.cloud.aiplatform.v1.LlmUtilityServiceClientTest", "com.google.cloud.aiplatform.v1.MetadataServiceClientTest", "com.google.cloud.aiplatform.v1.MigrationServiceClientTest", "com.google.cloud.aiplatform.v1.ModelServiceClientTest", diff --git a/google/cloud/aiplatform/v1/aiplatform_v1.yaml b/google/cloud/aiplatform/v1/aiplatform_v1.yaml index 57d710fc705c9..75b34fa95902e 100644 --- a/google/cloud/aiplatform/v1/aiplatform_v1.yaml +++ b/google/cloud/aiplatform/v1/aiplatform_v1.yaml @@ -14,6 +14,7 @@ apis: - name: google.cloud.aiplatform.v1.IndexEndpointService - name: google.cloud.aiplatform.v1.IndexService - name: google.cloud.aiplatform.v1.JobService +- name: google.cloud.aiplatform.v1.LlmUtilityService - name: google.cloud.aiplatform.v1.MatchService - name: google.cloud.aiplatform.v1.MetadataService - name: google.cloud.aiplatform.v1.MigrationService @@ -610,6 +611,14 @@ authentication: canonical_scopes: |- https://www.googleapis.com/auth/cloud-platform, https://www.googleapis.com/auth/cloud-platform.read-only + - selector: google.cloud.aiplatform.v1.LlmUtilityService.ComputeTokens + oauth: + canonical_scopes: |- + https://www.googleapis.com/auth/cloud-platform + - selector: google.cloud.aiplatform.v1.LlmUtilityService.CountTokens + oauth: + canonical_scopes: |- + https://www.googleapis.com/auth/cloud-platform - selector: google.cloud.aiplatform.v1.MatchService.FindNeighbors oauth: canonical_scopes: |- diff --git a/google/cloud/aiplatform/v1/llm_utility_service.proto b/google/cloud/aiplatform/v1/llm_utility_service.proto new file mode 100644 index 0000000000000..3cd98bd083715 --- /dev/null +++ b/google/cloud/aiplatform/v1/llm_utility_service.proto @@ -0,0 +1,100 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.aiplatform.v1; + +import "google/api/annotations.proto"; +import "google/api/client.proto"; +import "google/api/field_behavior.proto"; +import "google/api/resource.proto"; +import "google/cloud/aiplatform/v1/prediction_service.proto"; +import "google/protobuf/struct.proto"; + +option csharp_namespace = "Google.Cloud.AIPlatform.V1"; +option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; +option java_multiple_files = true; +option java_outer_classname = "LlmUtilityServiceProto"; +option java_package = "com.google.cloud.aiplatform.v1"; +option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; +option ruby_package = "Google::Cloud::AIPlatform::V1"; + +// Service for LLM related utility functions. +service LlmUtilityService { + option (google.api.default_host) = "aiplatform.googleapis.com"; + option (google.api.oauth_scopes) = + "https://www.googleapis.com/auth/cloud-platform"; + + // Perform a token counting. + rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) { + option (google.api.http) = { + post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:countTokens" + body: "*" + additional_bindings { + post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:countTokens" + body: "*" + } + }; + option (google.api.method_signature) = "endpoint,instances"; + } + + // Return a list of tokens based on the input text. + rpc ComputeTokens(ComputeTokensRequest) returns (ComputeTokensResponse) { + option (google.api.http) = { + post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:computeTokens" + body: "*" + additional_bindings { + post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:computeTokens" + body: "*" + } + }; + option (google.api.method_signature) = "endpoint,instances"; + } +} + +// Request message for ComputeTokens RPC call. +message ComputeTokensRequest { + // Required. The name of the Endpoint requested to get lists of tokens and + // token ids. + string endpoint = 1 [ + (google.api.field_behavior) = REQUIRED, + (google.api.resource_reference) = { + type: "aiplatform.googleapis.com/Endpoint" + } + ]; + + // Required. The instances that are the input to token computing API call. + // Schema is identical to the prediction schema of the text model, even for + // the non-text models, like chat models, or Codey models. + repeated google.protobuf.Value instances = 2 + [(google.api.field_behavior) = REQUIRED]; +} + +// Tokens info with a list of tokens and the corresponding list of token ids. +message TokensInfo { + // A list of tokens from the input. + repeated bytes tokens = 1; + + // A list of token ids from the input. + repeated int64 token_ids = 2; +} + +// Response message for ComputeTokens RPC call. +message ComputeTokensResponse { + // Lists of tokens info from the input. A ComputeTokensRequest could have + // multiple instances with a prompt in each instance. We also need to return + // lists of tokens info for the request with multiple instances. + repeated TokensInfo tokens_info = 1; +}