From 8edeb4d1ed648d6d53ac5f535983fb4e5065a85a Mon Sep 17 00:00:00 2001
From: Victor Skvortsov <vds003@gmail.com>
Date: Fri, 24 Oct 2025 12:40:46 +0500
Subject: [PATCH 1/2] Set transaction_per_migration=True

---
 src/dstack/_internal/server/migrations/env.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/dstack/_internal/server/migrations/env.py b/src/dstack/_internal/server/migrations/env.py
index 4259ec9fa..0b2f73a19 100644
--- a/src/dstack/_internal/server/migrations/env.py
+++ b/src/dstack/_internal/server/migrations/env.py
@@ -36,7 +36,6 @@ def run_migrations_offline():
         literal_binds=True,
         dialect_opts={"paramstyle": "named"},
     )
-
     with context.begin_transaction():
         context.run_migrations()
 
@@ -61,12 +60,21 @@ def run_migrations(connection: Connection):
     # https://alembic.sqlalchemy.org/en/latest/batch.html#dealing-with-referencing-foreign-keys
     if connection.dialect.name == "sqlite":
         connection.execute(text("PRAGMA foreign_keys=OFF;"))
+    elif connection.dialect.name == "postgresql":
+        # lock_timeout is needed so that migrations that acquire locks
+        # do not wait for locks forever, blocking live queries.
+        # Better to fail and retry a deployment.
+        connection.execute(text("SET lock_timeout='10s';"))
     connection.commit()
     context.configure(
         connection=connection,
         target_metadata=target_metadata,
         compare_type=True,
         render_as_batch=True,
+        # Running each migration in a separate transaction.
+        # Running all migrations in one transaction may lead to deadlocks in HA deployments
+        # because lock ordering is not respected across all migrations.
+        transaction_per_migration=True,
     )
     with context.begin_transaction():
         context.run_migrations()

From d9a4c6fbdc6b6cd79b6289160471e77647f6ff5e Mon Sep 17 00:00:00 2001
From: Victor Skvortsov <vds003@gmail.com>
Date: Fri, 24 Oct 2025 14:24:53 +0500
Subject: [PATCH 2/2] Document Server upgrades

---
 docs/docs/guides/server-deployment.md | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/docs/docs/guides/server-deployment.md b/docs/docs/guides/server-deployment.md
index 80f34a335..4976bf060 100644
--- a/docs/docs/guides/server-deployment.md
+++ b/docs/docs/guides/server-deployment.md
@@ -400,6 +400,28 @@ export DSTACK_DB_MAX_OVERFLOW=80
 You have to ensure your Postgres installation supports that many connections by
 configuring [`max_connections`](https://www.postgresql.org/docs/current/runtime-config-connection.html#GUC-MAX-CONNECTIONS) and/or using connection pooler.
 
+## Server upgrades
+
+When upgrading the `dstack` server, follow these guidelines to ensure a smooth transition and minimize downtime.
+
+### Before upgrading
+
+1. **Check the changelog**: Review the [release notes :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/releases){:target="_blank"} for breaking changes, new features, and migration notes.
+2. **Review backward compatibility**: Understand the [backward compatibility](#backward-compatibility) policy.
+3. **Back up your data**: Ensure you always create a backup before upgrading.
+
+### Best practices
+
+- **Test in staging**: Always test upgrades in a non-production environment first.
+- **Monitor logs**: Watch server logs during and after the upgrade for any errors or warnings.
+- **Keep backups**: Retain backups for at least a few days after a successful upgrade.
+
+### Troubleshooting
+
+**Deadlock when upgrading a multi-replica PostgreSQL deployment**
+
+If a deployment is stuck due to a deadlock when applying DB migrations, try scaling server replicas to 1 and retry the deployment multiple times. Some releases may not support rolling deployments, which is always noted in the release notes. If you think there is a bug, please [file an issue](https://github.com/dstackai/dstack/issues).
+
 ## FAQs
 
 ??? info "Can I run multiple replicas of dstack server?"