From ce5b17084947daa43e4c6214410969bfd171a24d Mon Sep 17 00:00:00 2001
From: Vikrant Puppala <vikrant.puppala@databricks.com>
Date: Tue, 26 May 2026 09:13:58 +0000
Subject: [PATCH 1/2] [PECOBLR-2461] fix(tests/e2e): de-collide MST table names
 across concurrent CI jobs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`_unique_table_name` derived the table name purely from the test node id,
so two CI runs racing on the same warehouse + catalog (e.g. PR + push to
main landing within seconds, as happened in run 26410038645) would both
target `peco.default.mst_pysql_<test_name>` and step on each other's
CREATE / DROP / DML. This caused the three "flaky" failures we kept
seeing in `test_transactions.py`:

  - test_multi_table_commit       → TRANSACTION_ROLLBACK_REQUIRED_AFTER_ABORT
  - test_executemany_rollback_in_txn → TABLE_OR_VIEW_NOT_FOUND
  - test_write_conflict_single_table → TABLE_OR_VIEW_ALREADY_EXISTS

The companion helper `_unique_table_name_raw` already appended a uuid4
suffix for exactly this reason; the fixture helper was an oversight from
#775. Add the same suffix here, reserving room so the result still fits
in the 80-char cap.

Co-authored-by: Isaac
Signed-off-by: Vikrant Puppala <vikrant.puppala@databricks.com>
---
 tests/e2e/test_transactions.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/tests/e2e/test_transactions.py b/tests/e2e/test_transactions.py
index e91afc0dd..fd4fd73ab 100644
--- a/tests/e2e/test_transactions.py
+++ b/tests/e2e/test_transactions.py
@@ -35,10 +35,17 @@
 
 
 def _unique_table_name(request):
-    """Derive a unique Delta table name from the test node id."""
+    """Derive a unique Delta table name from the test node id.
+
+    The uuid suffix keeps tables unique across concurrent CI jobs that
+    share the same warehouse/catalog — without it, two runs racing on
+    the same test name collide on CREATE/DROP.
+    """
     node_id = request.node.name
     sanitized = re.sub(r"[^a-z0-9_]", "_", node_id.lower())
-    return f"mst_pysql_{sanitized}"[:80]
+    suffix = uuid.uuid4().hex[:8]
+    # Reserve room for the 9-char "_{suffix}" tail so total stays <= 80.
+    return f"mst_pysql_{sanitized}"[:71] + f"_{suffix}"
 
 
 def _unique_table_name_raw(suffix):

From 665b37010fa61cc46cb0c412b48c94c5d2a103b0 Mon Sep 17 00:00:00 2001
From: Vikrant Puppala <vikrant.puppala@databricks.com>
Date: Tue, 26 May 2026 09:40:16 +0000
Subject: [PATCH 2/2] also de-collide UC Volume tests and add PR-branch
 concurrency cancellation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While verifying the transactions fix, a second flake surfaced — the same
cross-CI race pattern, this time on a UC Volume file path:

  tests/e2e/test_driver.py::TestPySQLCoreSuite::
    test_uc_volume_put_fails_if_file_exists_and_overwrite_not_set
  → Failed: DID NOT RAISE <class 'databricks.sql.exc.ServerOperationError'>

Two tests in `tests/e2e/common/uc_volume_tests.py` PUT/GET/REMOVE against
the hardcoded path `/Volumes/{catalog}/{schema}/e2etests/file1.csv`. When
two CI runs overlap (as happened on this PR after the force-push for DCO
sign-off), one run's REMOVE deletes the other run's file between its two
PUTs, so the expected `FILE_IN_STAGING_PATH_ALREADY_EXISTS` never fires.

Suffix the volume path with `uuid4().hex[:8]` in the two affected tests
(test_uc_volume_life_cycle and test_uc_volume_put_fails_if_file_exists_
and_overwrite_not_set). The other UC Volume tests that reference the same
path are exercising client-side / server-parse failure paths that never
touch the file — leaving those alone keeps the diff focused.

Also add a `concurrency` block to the E2E workflow:

  - On pull_request, cancel-in-progress: a new push / force-push on a PR
    cancels the previous run on that ref. Eliminates the entire class of
    "force-pushed during CI → two runs racing on shared warehouse state"
    failures.
  - On push to main, runs are NOT cancelled — each merge commit keeps its
    own clean CI signal so a regression on commit N can't be hidden by
    commit N+1 landing seconds later. Concurrent main runs can still
    collide on shared state, but the uuid-suffix conventions in the e2e
    tests are what keep that isolated.

Signed-off-by: Vikrant Puppala <vikrant.puppala@databricks.com>
---
 .github/workflows/code-coverage.yml | 13 +++++++++++++
 tests/e2e/common/uc_volume_tests.py | 24 ++++++++++++++++--------
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml
index 9f578ec9f..c5da8ab2d 100644
--- a/.github/workflows/code-coverage.yml
+++ b/.github/workflows/code-coverage.yml
@@ -11,6 +11,19 @@ on:
   pull_request:
   workflow_dispatch:
 
+# Serialise E2E runs per ref so a force-push (or a fast follow-up commit)
+# on a PR cancels the previous run instead of racing it against shared
+# warehouse state (Delta tables, UC Volume files, etc.).
+#
+# Pushes to main are NOT cancelled — each merge commit needs its own clean
+# CI signal so a regression on commit N doesn't get hidden by commit N+1
+# arriving seconds later. (Concurrent main runs can still collide on shared
+# state, but that's the cost of preserving per-commit signal; the
+# uuid-suffix conventions in the e2e tests are what keep them isolated.)
+concurrency:
+  group: e2e-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 jobs:
   test-with-coverage:
     runs-on:
diff --git a/tests/e2e/common/uc_volume_tests.py b/tests/e2e/common/uc_volume_tests.py
index 5b4086f91..cf8797c63 100644
--- a/tests/e2e/common/uc_volume_tests.py
+++ b/tests/e2e/common/uc_volume_tests.py
@@ -1,5 +1,6 @@
 import os
 import tempfile
+from uuid import uuid4
 
 import pytest
 import databricks.sql as sql
@@ -40,12 +41,16 @@ def test_uc_volume_life_cycle(self, catalog, schema):
         with open(fh, "wb") as fp:
             fp.write(original_text)
 
+        # Unique per-run path so concurrent CI jobs sharing the same volume
+        # don't step on each other's PUT/GET/REMOVE.
+        volume_path = f"/Volumes/{catalog}/{schema}/e2etests/life_cycle_{uuid4().hex[:8]}.csv"
+
         with self.connection(
             extra_params={"staging_allowed_local_path": temp_path}
         ) as conn:
 
             cursor = conn.cursor()
-            query = f"PUT '{temp_path}' INTO '/Volumes/{catalog}/{schema}/e2etests/file1.csv' OVERWRITE"
+            query = f"PUT '{temp_path}' INTO '{volume_path}' OVERWRITE"
             cursor.execute(query)
 
         # GET should succeed
@@ -56,7 +61,7 @@ def test_uc_volume_life_cycle(self, catalog, schema):
             extra_params={"staging_allowed_local_path": new_temp_path}
         ) as conn:
             cursor = conn.cursor()
-            query = f"GET '/Volumes/{catalog}/{schema}/e2etests/file1.csv' TO '{new_temp_path}'"
+            query = f"GET '{volume_path}' TO '{new_temp_path}'"
             cursor.execute(query)
 
         with open(new_fh, "rb") as fp:
@@ -66,7 +71,7 @@ def test_uc_volume_life_cycle(self, catalog, schema):
 
         # REMOVE should succeed
 
-        remove_query = f"REMOVE '/Volumes/{catalog}/{schema}/e2etests/file1.csv'"
+        remove_query = f"REMOVE '{volume_path}'"
 
         # Use minimal retry settings to fail fast
         extra_params = {
@@ -84,7 +89,7 @@ def test_uc_volume_life_cycle(self, catalog, schema):
                 Error, match="Staging operation over HTTP was unsuccessful: 404"
             ):
                 cursor = conn.cursor()
-                query = f"GET '/Volumes/{catalog}/{schema}/e2etests/file1.csv' TO '{new_temp_path}'"
+                query = f"GET '{volume_path}' TO '{new_temp_path}'"
                 cursor.execute(query)
 
         os.remove(temp_path)
@@ -151,19 +156,22 @@ def test_uc_volume_put_fails_if_file_exists_and_overwrite_not_set(
         with open(fh, "wb") as fp:
             fp.write(original_text)
 
+        # Unique per-run path so a concurrent CI job's REMOVE doesn't delete
+        # our file between the two PUTs and silently turn the expected
+        # FILE_IN_STAGING_PATH_ALREADY_EXISTS into a successful PUT.
+        volume_path = f"/Volumes/{catalog}/{schema}/e2etests/put_conflict_{uuid4().hex[:8]}.csv"
+
         def perform_put():
             with self.connection(
                 extra_params={"staging_allowed_local_path": temp_path}
             ) as conn:
                 cursor = conn.cursor()
-                query = f"PUT '{temp_path}' INTO '/Volumes/{catalog}/{schema}/e2etests/file1.csv'"
+                query = f"PUT '{temp_path}' INTO '{volume_path}'"
                 cursor.execute(query)
 
         def perform_remove():
             try:
-                remove_query = (
-                    f"REMOVE '/Volumes/{catalog}/{schema}/e2etests/file1.csv'"
-                )
+                remove_query = f"REMOVE '{volume_path}'"
 
                 with self.connection(
                     extra_params={"staging_allowed_local_path": "/"}