gemini-cli-extensions · omkargaikwad23 · May 5, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
@@ -83,4 +83,8 @@
 
 - name: 'release-please:force-run'
   color: bdca82
-  description: Manually trigger the release please workflow on a PR.
+  description: Manually trigger the release please workflow on a PR.
+
+- name: 'ci:run-evals'
+  color: 4285f4
+  description: Manually trigger the evaluation CI pipeline on a PR.
@@ -0,0 +1,106 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+options:
+  logging: CLOUD_LOGGING_ONLY
+
+steps:
+
+  # --- Evaluation Step ---
+  - name: 'us-central1-docker.pkg.dev/cloud-db-nl2sql/evalbench/eval_server:latest'
+    entrypoint: 'bash'
+    # Decrypts the secret from Secret Manager into the MSSQL_DB_PASSWORD environment variable
+    secretEnv: ['MSSQL_DB_PASSWORD', 'GITHUB_TOKEN']
+    args:
+      - '-c'
+      - |
+        set -e
+
+        echo "Fetching PR data from GitHub API..."
+
+        # Fetch PR data and status code
+        HTTP_STATUS=$(curl -s -o pr_data.json -w "%{http_code}" -H "Authorization: token $$GITHUB_TOKEN" \
+          "https://api.github.com/repos/$REPO_FULL_NAME/pulls/$_PR_NUMBER")
+
+        if [ "$$HTTP_STATUS" -ne 200 ]; then
+          echo "Error fetching PR data: HTTP $$HTTP_STATUS"
+          cat pr_data.json
+          exit 1
+        fi
+
+        PR_DATA=$(cat pr_data.json)
+
+        # Extract labels and title from PR data (Use $$ to escape bash variables)
+        PR_LABELS=$(echo "$$PR_DATA" | jq -r '[.labels[].name] | join(",")')
+        PR_TITLE=$(echo "$$PR_DATA" | jq -r '.title')
+
+        # Check if execution labels are present
+        if [[ "$$PR_LABELS" != *"autorelease: pending"* && "$$PR_LABELS" != *"ci:run-evals"* ]]; then
+          echo "PR does not have 'autorelease: pending' or 'ci:run-evals' label. Skipping execution."
+          exit 0
+        fi
+        echo "Execution label detected. Processing release version context..."
+
+        # Determine Release Version based on branch name
+        if [[ "$_HEAD_BRANCH" == release-please-* ]]; then
+          if [[ "$$PR_TITLE" =~ release\ ([0-9]+\.[0-9]+\.[0-9]+) ]]; then
+            export RELEASE_VERSION="$${BASH_REMATCH[1]}"
+          else
+            export RELEASE_VERSION="pr-$_PR_NUMBER-release-unknown"
+          fi
+        else
+          export RELEASE_VERSION="pr-$_PR_NUMBER-ci-run-evals"
+        fi
+
+        # Workaround for evalbench bug: settings are only applied if path basename matches extension ID
+        ln -s /workspace /workspace/cloud-sql-sqlserver
+        cd /evalbench
+
+        # evalbench specific environment variables
+        export EVAL_GCP_PROJECT_ID=$PROJECT_ID
+        export EVAL_GCP_PROJECT_REGION=$_CLOUD_SQL_REGION
+        export EVAL_REPORTING_PROJECT=$_EVAL_REPORTING_PROJECT
+        export GOOGLE_CLOUD_PROJECT=$PROJECT_ID
+
+        # Cloud SQL SQL Server specific environment variables
+        export CLOUD_SQL_MSSQL_PROJECT=$PROJECT_ID
+        export CLOUD_SQL_MSSQL_INSTANCE=$_CLOUD_SQL_INSTANCE
+        export CLOUD_SQL_MSSQL_REGION=$_CLOUD_SQL_REGION
+        export CLOUD_SQL_MSSQL_DATABASE=$_CLOUD_SQL_DATABASE
+        export CLOUD_SQL_MSSQL_USER=$_CLOUD_SQL_USER
+        export CLOUD_SQL_MSSQL_IP_TYPE=$_CLOUD_SQL_IP_TYPE
+
+        # Maps the decrypted MSSQL_DB_PASSWORD to the exact variable expected by gemini_cli and extension skills
+        export CLOUD_SQL_MSSQL_PASSWORD=$$MSSQL_DB_PASSWORD
+
+        # Combine CI metadata with run config
+        cat /workspace/evals/ci_metadata.yaml >> /workspace/evals/run_config.yaml
+
+        # Substitute environment variables in model_config.yaml
+        python3 /workspace/evals/substitute_env.py
+
+        cd /evalbench
+        export PYTHONPATH=./evalbench:./evalbench/evalproto
+        export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
+
+        echo "Launching Standalone Evaluation..."
+        python3 evalbench/evalbench.py --experiment_config=/workspace/evals/run_config.yaml
+
+
+availableSecrets:
+  secretManager:
+  - versionName: projects/$PROJECT_ID/secrets/MSSQL_DB_PASSWORD/versions/latest
+    env: 'MSSQL_DB_PASSWORD'
+  - versionName: projects/$PROJECT_ID/secrets/GITHUB_TOKEN/versions/latest
+    env: 'GITHUB_TOKEN'
@@ -0,0 +1,22 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+############################################################
+### CI Metadata (Repository Specific)
+### Note: These fields are used for version tracking in BQ
+### and are not part of the core Evalbench schema.
+############################################################
+
+extension_id: cloud-sql-sqlserver
+release_version: ${RELEASE_VERSION}
@@ -0,0 +1,45 @@
+{
+  "scenarios": [
+    {
+      "id": "cloud-sql-debug-instance",
+      "starting_prompt": "Check on my databases in project ${GOOGLE_CLOUD_PROJECT}.",
+      "conversation_plan": "Ask the agent to list all Cloud SQL instances in the project. Once all instances are listed, if '${CLOUD_SQL_MSSQL_INSTANCE}' exists, get its details and validate it is RUNNABLE.",
+      "expected_trajectory": [
+        "list_instances",
+        "get_instance"
+      ],
+      "env": {
+        "GOOGLE_CLOUD_PROJECT": "${GOOGLE_CLOUD_PROJECT}"
+      },
+      "kind": "tools",
+      "max_turns": 3
+    },
+    {
+      "id": "cloud-sql-schema-tables-explore",
+      "starting_prompt": "I want to understand the structure of my database.",
+      "conversation_plan": "First, ask the agent to list the databases in the instance. After the agent provides the databases, ask it to list the tables specifically for that database.",
+      "expected_trajectory": [
+        "list_databases",
+        "list_tables"
+      ],
+      "env": {
+        "GOOGLE_CLOUD_PROJECT": "${GOOGLE_CLOUD_PROJECT}"
+      },
+      "kind": "tools",
+      "max_turns": 3
+    },
+    {
+      "id": "cloud-sql-performance-check",
+      "starting_prompt": "Our database performance seems degraded.",
+      "conversation_plan": "Start by asking the agent to check the CPU utilization system metrics for the database instance to see if it's overloaded.",
+      "expected_trajectory": [
+        "get_system_metrics"
+      ],
+      "env": {
+        "GOOGLE_CLOUD_PROJECT": "${GOOGLE_CLOUD_PROJECT}"
+      },
+      "kind": "tools",
+      "max_turns": 3
+    }
+  ]
+}
@@ -0,0 +1,18 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+generator: gcp_vertex_gemini
+vertex_model: gemini-2.5-pro
+base_prompt: ""
+execs_per_minute: 5
@@ -0,0 +1,33 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+gemini_cli_version: "@google/gemini-cli@latest"
+generator: gemini_cli
+env:
+  GOOGLE_CLOUD_PROJECT: "${GOOGLE_CLOUD_PROJECT}"
+  GOOGLE_CLOUD_LOCATION: "global"
+  GOOGLE_GENAI_USE_VERTEXAI: "true"
+  GEMINI_CLI_TRUST_WORKSPACE: "true"
+setup:
+  extensions:
+    # Points to the symlink created in cloudbuild.yaml to match the extension ID
+    "/workspace/cloud-sql-sqlserver":
+      settings:
+        CLOUD_SQL_MSSQL_PROJECT: "${CLOUD_SQL_MSSQL_PROJECT}"
+        CLOUD_SQL_MSSQL_INSTANCE: "${CLOUD_SQL_MSSQL_INSTANCE}"
+        CLOUD_SQL_MSSQL_REGION: "${CLOUD_SQL_MSSQL_REGION}"
+        CLOUD_SQL_MSSQL_DATABASE: "${CLOUD_SQL_MSSQL_DATABASE}"
+        CLOUD_SQL_MSSQL_USER: "${CLOUD_SQL_MSSQL_USER}"
+        CLOUD_SQL_MSSQL_PASSWORD: '${CLOUD_SQL_MSSQL_PASSWORD}'
+        CLOUD_SQL_MSSQL_IP_TYPE: "${CLOUD_SQL_MSSQL_IP_TYPE}"
@@ -0,0 +1,41 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+dataset_config: /workspace/evals/dataset.json
+dataset_format: gemini-cli-format
+
+orchestrator: geminicli
+model_config: /workspace/evals/model_config.yaml
+simulated_user_model_config: /workspace/evals/gemini_2.5_pro_model.yaml
+
+scorers:
+  # Qualitative (Judge-based)
+  goal_completion:
+    model_config: /workspace/evals/gemini_2.5_pro_model.yaml
+  behavioral_metrics:
+    model_config: /workspace/evals/gemini_2.5_pro_model.yaml
+  skills_best_practices:
+    model_config: /workspace/evals/gemini_2.5_pro_model.yaml
+    skills_dir: /workspace/cloud-sql-sqlserver/skills
+
+  # Performance
+  turn_count: {}
+  end_to_end_latency: {}
+  tool_call_latency: {}
+  token_consumption: {}
+  skills_trajectory: {}
+
+reporting:
+  bigquery:
+    gcp_project_id: "${EVAL_REPORTING_PROJECT}"
@@ -0,0 +1,18 @@
+import os
+import re
+
+def main():
+    yaml_paths = ['/workspace/evals/model_config.yaml', '/workspace/evals/run_config.yaml', '/workspace/evals/dataset.json']
+    for yaml_path in yaml_paths:
+        if os.path.exists(yaml_path):
+            with open(yaml_path, 'r') as f:
+                content = f.read()
+            content = re.sub(r'\${(\w+)}', lambda m: os.environ.get(m.group(1), m.group(0)), content)
+            with open(yaml_path, 'w') as f:
+                f.write(content)
+            print(f"Successfully substituted environment variables in {yaml_path}")
+        else:
+            print(f"File not found: {yaml_path}")
+
+if __name__ == '__main__':
+    main()