feat: Add rest binding for tuned models

feat: Add question_answering and fact_verification task types for AQA feat: Add output dimensionality for embeddings docs: Lots of small fixed PiperOrigin-RevId: 624205405
googleapis · Apr 12, 2024 · f758dd6 · f758dd6
1 parent 13edbc1
commit f758dd6
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 23 deletions.
diff --git a/google/ai/generativelanguage/v1/generative_service.proto b/google/ai/generativelanguage/v1/generative_service.proto
@@ -41,6 +41,10 @@ service GenerativeService {
     option (google.api.http) = {
       post: "/v1/{model=models/*}:generateContent"
       body: "*"
+      additional_bindings {
+        post: "/v1/{model=tunedModels/*}:generateContent"
+        body: "*"
+      }
     };
     option (google.api.method_signature) = "model,contents";
   }
@@ -105,6 +109,12 @@ enum TaskType {
 
   // Specifies that the embeddings will be used for clustering.
   CLUSTERING = 5;
+
+  // Specifies that the given text will be used for question answering.
+  QUESTION_ANSWERING = 6;
+
+  // Specifies that the given text will be used for fact verification.
+  FACT_VERIFICATION = 7;
 }
 
 // Request to generate a completion from the model.
@@ -152,7 +162,7 @@ message GenerateContentRequest {
 message GenerationConfig {
   // Optional. Number of generated responses to return.
   //
-  // This value must be between [1, 8], inclusive. If unset, this will default
+  // Currently, this value can only be set to 1. If unset, this will default
   // to 1.
   optional int32 candidate_count = 1 [(google.api.field_behavior) = OPTIONAL];
 
@@ -164,18 +174,16 @@ message GenerationConfig {
 
   // Optional. The maximum number of tokens to include in a candidate.
   //
-  // If unset, this will default to output_token_limit specified in the `Model`
-  // specification.
+  // Note: The default value varies by model, see the `Model.output_token_limit`
+  // attribute of the `Model` returned from the `getModel` function.
   optional int32 max_output_tokens = 4 [(google.api.field_behavior) = OPTIONAL];
 
   // Optional. Controls the randomness of the output.
+  //
   // Note: The default value varies by model, see the `Model.temperature`
-  // attribute of the `Model` returned the `getModel` function.
+  // attribute of the `Model` returned from the `getModel` function.
   //
-  // Values can range from [0.0,1.0],
-  // inclusive. A value closer to 1.0 will produce responses that are more
-  // varied and creative, while a value closer to 0.0 will typically result in
-  // more straightforward responses from the model.
+  // Values can range from [0.0, 2.0].
   optional float temperature = 5 [(google.api.field_behavior) = OPTIONAL];
 
   // Optional. The maximum cumulative probability of tokens to consider when
@@ -189,18 +197,19 @@ message GenerationConfig {
   // of tokens based on the cumulative probability.
   //
   // Note: The default value varies by model, see the `Model.top_p`
-  // attribute of the `Model` returned the `getModel` function.
+  // attribute of the `Model` returned from the `getModel` function.
   optional float top_p = 6 [(google.api.field_behavior) = OPTIONAL];
 
   // Optional. The maximum number of tokens to consider when sampling.
   //
-  // The model uses combined Top-k and nucleus sampling.
-  //
+  // Models use nucleus sampling or combined Top-k and nucleus sampling.
   // Top-k sampling considers the set of `top_k` most probable tokens.
-  // Defaults to 40.
+  // Models running with nucleus sampling don't allow top_k setting.
   //
   // Note: The default value varies by model, see the `Model.top_k`
-  // attribute of the `Model` returned the `getModel` function.
+  // attribute of the `Model` returned from the `getModel` function. Empty
+  // `top_k` field in `Model` indicates the model doesn't apply top-k sampling
+  // and doesn't allow setting `top_k` on requests.
   optional int32 top_k = 7 [(google.api.field_behavior) = OPTIONAL];
 }
 
@@ -330,6 +339,12 @@ message EmbedContentRequest {
   // Note: Specifying a `title` for `RETRIEVAL_DOCUMENT` provides better quality
   // embeddings for retrieval.
   optional string title = 4 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Optional reduced dimension for the output embedding. If set,
+  // excessive values in the output embedding are truncated from the end.
+  // Supported by `models/text-embedding-latest`.
+  optional int32 output_dimensionality = 5
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // A list of floats representing an embedding.

diff --git a/google/ai/generativelanguage/v1/generativelanguage_v1.yaml b/google/ai/generativelanguage/v1/generativelanguage_v1.yaml
@@ -11,13 +11,12 @@ apis:
 documentation:
   summary: |-
     The Gemini API allows developers to build generative AI applications using
-    Gemini models. Gemini is our most capable model in the world, built from
-    the ground up to be multimodal. It can generalize and seamlessly
-    understand, operate across, and combine different types of information.
-    You can use the Gemini
-    API for use cases like reasoning across text and images, content
-    generation, dialogue agents, summarization and classification systems, and
-    more.
+    Gemini models. Gemini is our most capable model, built from the ground up
+    to be multimodal. It can generalize and seamlessly understand, operate
+    across, and combine different types of information. including language,
+    images, audio, video, and code. You can use the Gemini API for use cases
+    like reasoning across text and images, content generation, dialogue
+    agents, summarization and classification systems, and more.
 
 http:
   rules:
@@ -33,7 +32,7 @@ http:
 
 publishing:
   new_issue_uri: https://github.com/google/generative-ai-python/issues/new
-  documentation_uri: https://developers.generativeai.google/
+  documentation_uri: https://ai.google.dev/docs
   api_short_name: generativelanguage
   github_label: 'api: ai'
   doc_tag_prefix: generativelanguage

diff --git a/google/ai/generativelanguage/v1/model.proto b/google/ai/generativelanguage/v1/model.proto
@@ -97,5 +97,7 @@ message Model {
   // Top-k sampling considers the set of `top_k` most probable tokens.
   // This value specifies default to be used by the backend while making the
   // call to the model.
+  // If empty, indicates the model doesn't use top-k sampling, and `top_k` isn't
+  // allowed as a generation parameter.
   optional int32 top_k = 11;
 }
diff --git a/google/ai/generativelanguage/v1/safety.proto b/google/ai/generativelanguage/v1/safety.proto
@@ -34,10 +34,10 @@ enum HarmCategory {
   // Negative or harmful comments targeting identity and/or protected attribute.
   HARM_CATEGORY_DEROGATORY = 1;
 
-  // Content that is rude, disrepspectful, or profane.
+  // Content that is rude, disrespectful, or profane.
   HARM_CATEGORY_TOXICITY = 2;
 
-  // Describes scenarios depictng violence against an individual or group, or
+  // Describes scenarios depicting violence against an individual or group, or
   // general descriptions of gore.
   HARM_CATEGORY_VIOLENCE = 3;