From dfd6ed052483e628038ceebd4f4738fc8c3ad50f Mon Sep 17 00:00:00 2001
From: David Peter <sharkdp@users.noreply.github.com>
Date: Mon, 8 Dec 2025 11:44:20 +0100
Subject: [PATCH] [ty] mdtests with external dependencies (#20904)

## Summary

This PR adds the possibility to write mdtests that specify external
dependencies in a `project` section of TOML blocks. For example, here is
a test that makes sure that we understand Pydantic's dataclass-transform
setup:

````markdown
```toml
[environment]
python-version = "3.12"
python-platform = "linux"

[project]
dependencies = ["pydantic==2.12.2"]
```

```py
from pydantic import BaseModel

class User(BaseModel):
    id: int
    name: str

user = User(id=1, name="Alice")
reveal_type(user.id)  # revealed: int
reveal_type(user.name)  # revealed: str

# error: [missing-argument] "No argument provided for required parameter
`name`"
invalid_user = User(id=2)
```
````

## How?

Using the `python-version` and the `dependencies` fields from the
Markdown section, we generate a `pyproject.toml` file, write it to a
temporary directory, and use `uv sync` to install the dependencies into
a virtual environment. We then copy the Python source files from that
venv's `site-packages` folder to a corresponding directory structure in
the in-memory filesystem. Finally, we configure the search paths
accordingly, and run the mdtest as usual.

I fully understand that there are valid concerns here:
* Doesn't this require network access? (yes, it does)
* Is this fast enough? (`uv` caching makes this almost unnoticeable,
actually)
* Is this deterministic? ~~(probably not, package resolution can depend
on the platform you're on)~~ (yes, hopefully)

For this reason, this first version is opt-in, locally. ~~We don't even
run these tests in CI (even though they worked fine in a previous
iteration of this PR).~~ You need to set `MDTEST_EXTERNAL=1`, or use the
new `-e/--enable-external` command line option of the `mdtest.py`
runner. For example:
```bash
# Skip mdtests with external dependencies (default):
uv run crates/ty_python_semantic/mdtest.py

# Run all mdtests, including those with external dependencies:
uv run crates/ty_python_semantic/mdtest.py -e

# Only run the `pydantic` tests. Use `-e` to make sure it is not skipped:
uv run crates/ty_python_semantic/mdtest.py -e pydantic
```

## Why?

I believe that this can be a useful addition to our testing strategy,
which lies somewhere between ecosystem tests and normal mdtests.
Ecosystem tests cover much more code, but they have the disadvantage
that we only see second- or third-order effects via diagnostic diffs. If
we unexpectedly gain or lose type coverage somewhere, we might not even
notice (assuming the gradual guarantee holds, and ecosystem code is
mostly correct). Another disadvantage of ecosystem checks is that they
only test checked-in code that is usually correct. However, we also want
to test what happens on wrong code, like the code that is momentarily
written in an editor, before fixing it. On the other end of the spectrum
we have normal mdtests, which have the disadvantage that they do not
reflect the reality of complex real-world code. We experience this
whenever we're surprised by an ecosystem report on a PR.

That said, these tests should not be seen as a replacement for either of
these things. For example, we should still strive to write detailed
self-contained mdtests for user-reported issues. But we might use this
new layer for regression tests, or simply as a debugging tool. It can
also serve as a tool to document our support for popular third-party
libraries.

## Test Plan

* I've been locally using this for a couple of weeks now.
* `uv run crates/ty_python_semantic/mdtest.py -e`
---
 .github/workflows/ci.yaml                     |   2 +
 Cargo.lock                                    |   1 +
 crates/ty_python_semantic/mdtest.py           |  15 +-
 .../resources/mdtest/external/README.md       |   4 +
 .../resources/mdtest/external/attrs.md        |  78 ++++++++
 .../resources/mdtest/external/numpy.md        |  23 +++
 .../resources/mdtest/external/pydantic.md     |  48 +++++
 .../resources/mdtest/external/pytest.md       |  27 +++
 .../resources/mdtest/external/sqlalchemy.md   | 124 ++++++++++++
 .../resources/mdtest/external/sqlmodel.md     |  30 +++
 .../resources/mdtest/external/strawberry.md   |  27 +++
 crates/ty_test/Cargo.toml                     |   1 +
 crates/ty_test/README.md                      |  38 ++++
 crates/ty_test/src/config.rs                  |  24 +++
 crates/ty_test/src/external_dependencies.rs   | 186 ++++++++++++++++++
 crates/ty_test/src/lib.rs                     |  65 +++++-
 16 files changed, 684 insertions(+), 9 deletions(-)
 create mode 100644 crates/ty_python_semantic/resources/mdtest/external/README.md
 create mode 100644 crates/ty_python_semantic/resources/mdtest/external/attrs.md
 create mode 100644 crates/ty_python_semantic/resources/mdtest/external/numpy.md
 create mode 100644 crates/ty_python_semantic/resources/mdtest/external/pydantic.md
 create mode 100644 crates/ty_python_semantic/resources/mdtest/external/pytest.md
 create mode 100644 crates/ty_python_semantic/resources/mdtest/external/sqlalchemy.md
 create mode 100644 crates/ty_python_semantic/resources/mdtest/external/sqlmodel.md
 create mode 100644 crates/ty_python_semantic/resources/mdtest/external/strawberry.md
 create mode 100644 crates/ty_test/src/external_dependencies.rs

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 83b6b83bc37ad..993ecfac97f63 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -24,6 +24,8 @@ env:
   PACKAGE_NAME: ruff
   PYTHON_VERSION: "3.14"
   NEXTEST_PROFILE: ci
+  # Enable mdtests that require external dependencies
+  MDTEST_EXTERNAL: "1"
 
 jobs:
   determine_changes:
diff --git a/Cargo.lock b/Cargo.lock
index 6bc8bf881c33c..6bde255074268 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4557,6 +4557,7 @@ dependencies = [
  "anyhow",
  "camino",
  "colored 3.0.0",
+ "dunce",
  "insta",
  "memchr",
  "path-slash",
diff --git a/crates/ty_python_semantic/mdtest.py b/crates/ty_python_semantic/mdtest.py
index c3260637cca0a..2acc6f452b1ec 100644
--- a/crates/ty_python_semantic/mdtest.py
+++ b/crates/ty_python_semantic/mdtest.py
@@ -37,14 +37,16 @@ class MDTestRunner:
     mdtest_executable: Path | None
     console: Console
     filters: list[str]
+    enable_external: bool
 
-    def __init__(self, filters: list[str] | None = None) -> None:
+    def __init__(self, filters: list[str] | None, enable_external: bool) -> None:
         self.mdtest_executable = None
         self.console = Console()
         self.filters = [
             f.removesuffix(".md").replace("/", "_").replace("-", "_")
             for f in (filters or [])
         ]
+        self.enable_external = enable_external
 
     def _run_cargo_test(self, *, message_format: Literal["human", "json"]) -> str:
         return subprocess.check_output(
@@ -120,6 +122,7 @@ def _run_mdtest(
                 CLICOLOR_FORCE="1",
                 INSTA_FORCE_PASS="1",
                 INSTA_OUTPUT="none",
+                MDTEST_EXTERNAL="1" if self.enable_external else "0",
             ),
             capture_output=capture_output,
             text=True,
@@ -266,11 +269,19 @@ def main() -> None:
         nargs="*",
         help="Partial paths or mangled names, e.g., 'loops/for.md' or 'loops_for'",
     )
+    parser.add_argument(
+        "--enable-external",
+        "-e",
+        action="store_true",
+        help="Enable tests with external dependencies",
+    )
 
     args = parser.parse_args()
 
     try:
-        runner = MDTestRunner(filters=args.filters)
+        runner = MDTestRunner(
+            filters=args.filters, enable_external=args.enable_external
+        )
         runner.watch()
     except KeyboardInterrupt:
         print()
diff --git a/crates/ty_python_semantic/resources/mdtest/external/README.md b/crates/ty_python_semantic/resources/mdtest/external/README.md
new file mode 100644
index 0000000000000..a54c31c862b4e
--- /dev/null
+++ b/crates/ty_python_semantic/resources/mdtest/external/README.md
@@ -0,0 +1,4 @@
+# mdtests with external dependencies
+
+This directory contains mdtests that make use of external packages. See the mdtest `README.md` for
+more information.
diff --git a/crates/ty_python_semantic/resources/mdtest/external/attrs.md b/crates/ty_python_semantic/resources/mdtest/external/attrs.md
new file mode 100644
index 0000000000000..3b4bc342a6fbd
--- /dev/null
+++ b/crates/ty_python_semantic/resources/mdtest/external/attrs.md
@@ -0,0 +1,78 @@
+# attrs
+
+```toml
+[environment]
+python-version = "3.13"
+python-platform = "linux"
+
+[project]
+dependencies = ["attrs==25.4.0"]
+```
+
+## Basic class (`attr`)
+
+```py
+import attr
+
+@attr.s
+class User:
+    id: int = attr.ib()
+    name: str = attr.ib()
+
+user = User(id=1, name="John Doe")
+
+reveal_type(user.id)  # revealed: int
+reveal_type(user.name)  # revealed: str
+```
+
+## Basic class (`define`)
+
+```py
+from attrs import define, field
+
+@define
+class User:
+    id: int = field()
+    internal_name: str = field(alias="name")
+
+user = User(id=1, name="John Doe")
+reveal_type(user.id)  # revealed: int
+reveal_type(user.internal_name)  # revealed: str
+```
+
+## Usage of `field` parameters
+
+```py
+from attrs import define, field
+
+@define
+class Product:
+    id: int = field(init=False)
+    name: str = field()
+    price_cent: int = field(kw_only=True)
+
+reveal_type(Product.__init__)  # revealed: (self: Product, name: str, *, price_cent: int) -> None
+```
+
+## Dedicated support for the `default` decorator?
+
+We currently do not support this:
+
+```py
+from attrs import define, field
+
+@define
+class Person:
+    id: int = field()
+    name: str = field()
+
+    # error: [call-non-callable] "Object of type `_MISSING_TYPE` is not callable"
+    @id.default
+    def _default_id(self) -> int:
+        raise NotImplementedError
+
+# error: [missing-argument] "No argument provided for required parameter `id`"
+person = Person(name="Alice")
+reveal_type(person.id)  # revealed: int
+reveal_type(person.name)  # revealed: str
+```
diff --git a/crates/ty_python_semantic/resources/mdtest/external/numpy.md b/crates/ty_python_semantic/resources/mdtest/external/numpy.md
new file mode 100644
index 0000000000000..39bfa6d1106d8
--- /dev/null
+++ b/crates/ty_python_semantic/resources/mdtest/external/numpy.md
@@ -0,0 +1,23 @@
+# numpy
+
+```toml
+[environment]
+python-version = "3.13"
+python-platform = "linux"
+
+[project]
+dependencies = ["numpy==2.3.0"]
+```
+
+## Basic usage
+
+```py
+import numpy as np
+
+xs = np.array([1, 2, 3])
+reveal_type(xs)  # revealed: ndarray[tuple[Any, ...], dtype[Any]]
+
+xs = np.array([1.0, 2.0, 3.0], dtype=np.float64)
+# TODO: should be `ndarray[tuple[Any, ...], dtype[float64]]`
+reveal_type(xs)  # revealed: ndarray[tuple[Any, ...], dtype[Unknown]]
+```
diff --git a/crates/ty_python_semantic/resources/mdtest/external/pydantic.md b/crates/ty_python_semantic/resources/mdtest/external/pydantic.md
new file mode 100644
index 0000000000000..6fb82840f5788
--- /dev/null
+++ b/crates/ty_python_semantic/resources/mdtest/external/pydantic.md
@@ -0,0 +1,48 @@
+# Pydantic
+
+```toml
+[environment]
+python-version = "3.12"
+python-platform = "linux"
+
+[project]
+dependencies = ["pydantic==2.12.2"]
+```
+
+## Basic model
+
+```py
+from pydantic import BaseModel
+
+class User(BaseModel):
+    id: int
+    name: str
+
+reveal_type(User.__init__)  # revealed: (self: User, *, id: int, name: str) -> None
+
+user = User(id=1, name="John Doe")
+reveal_type(user.id)  # revealed: int
+reveal_type(user.name)  # revealed: str
+
+# error: [missing-argument] "No argument provided for required parameter `name`"
+invalid_user = User(id=2)
+```
+
+## Usage of `Field`
+
+```py
+from pydantic import BaseModel, Field
+
+class Product(BaseModel):
+    id: int = Field(init=False)
+    name: str = Field(..., kw_only=False, min_length=1)
+    internal_price_cent: int = Field(..., gt=0, alias="price_cent")
+
+reveal_type(Product.__init__)  # revealed: (self: Product, name: str = Any, *, price_cent: int = Any) -> None
+
+product = Product("Laptop", price_cent=999_00)
+
+reveal_type(product.id)  # revealed: int
+reveal_type(product.name)  # revealed: str
+reveal_type(product.internal_price_cent)  # revealed: int
+```
diff --git a/crates/ty_python_semantic/resources/mdtest/external/pytest.md b/crates/ty_python_semantic/resources/mdtest/external/pytest.md
new file mode 100644
index 0000000000000..823ef4d162d99
--- /dev/null
+++ b/crates/ty_python_semantic/resources/mdtest/external/pytest.md
@@ -0,0 +1,27 @@
+# pytest
+
+```toml
+[environment]
+python-version = "3.13"
+python-platform = "linux"
+
+[project]
+dependencies = ["pytest==9.0.1"]
+```
+
+## `pytest.fail`
+
+Make sure that we recognize `pytest.fail` calls as terminal:
+
+```py
+import pytest
+
+def some_runtime_condition() -> bool:
+    return True
+
+def test_something():
+    if not some_runtime_condition():
+        pytest.fail("Runtime condition failed")
+
+        no_error_here_this_is_unreachable
+```
diff --git a/crates/ty_python_semantic/resources/mdtest/external/sqlalchemy.md b/crates/ty_python_semantic/resources/mdtest/external/sqlalchemy.md
new file mode 100644
index 0000000000000..0ac9c4c219827
--- /dev/null
+++ b/crates/ty_python_semantic/resources/mdtest/external/sqlalchemy.md
@@ -0,0 +1,124 @@
+# SQLAlchemy
+
+```toml
+[environment]
+python-version = "3.13"
+python-platform = "linux"
+
+[project]
+dependencies = ["SQLAlchemy==2.0.44"]
+```
+
+## Basic model
+
+Here, we mostly make sure that ty understands SQLAlchemy's dataclass-transformer setup:
+
+```py
+from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
+
+class Base(DeclarativeBase):
+    pass
+
+class User(Base):
+    __tablename__ = "user"
+
+    id: Mapped[int] = mapped_column(primary_key=True, init=False)
+    internal_name: Mapped[str] = mapped_column(alias="name")
+
+user = User(name="John Doe")
+reveal_type(user.id)  # revealed: int
+reveal_type(user.internal_name)  # revealed: str
+```
+
+Unfortunately, SQLAlchemy overrides `__init__` and explicitly accepts all combinations of keyword
+arguments. This is why we currently cannot flag invalid constructor calls:
+
+```py
+reveal_type(User.__init__)  # revealed: def __init__(self, **kw: Any) -> Unknown
+
+# TODO: this should ideally be an error
+invalid_user = User(invalid_arg=42)
+```
+
+## Queries
+
+First, the basic setup:
+
+```py
+from datetime import datetime
+
+from sqlalchemy import select, Integer, Text, Boolean, DateTime
+from sqlalchemy.orm import Session
+from sqlalchemy.orm import DeclarativeBase
+from sqlalchemy.orm import Mapped, mapped_column
+from sqlalchemy import create_engine
+
+engine = create_engine("sqlite://example.db")
+session = Session(engine)
+```
+
+Now we can declare a simple model:
+
+```py
+class Base(DeclarativeBase):
+    pass
+
+class User(Base):
+    __tablename__ = "users"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True)
+    name: Mapped[str] = mapped_column(Text)
+    is_admin: Mapped[bool] = mapped_column(Boolean, default=False)
+```
+
+And perform simple queries:
+
+```py
+stmt = select(User)
+reveal_type(stmt)  # revealed: Select[tuple[User]]
+
+users = session.scalars(stmt).all()
+reveal_type(users)  # revealed: Sequence[User]
+
+for row in session.execute(stmt):
+    reveal_type(row)  # revealed: Row[tuple[User]]
+
+stmt = select(User).where(User.name == "Alice")
+alice = session.scalars(stmt).first()
+reveal_type(alice)  # revealed: User | None
+
+stmt = select(User).where(User.is_admin == True).order_by(User.name).limit(10)
+admin_users = session.scalars(stmt).all()
+reveal_type(admin_users)  # revealed: Sequence[User]
+```
+
+This also works with the legacy `query` API:
+
+```py
+users_legacy = session.query(User).all()
+reveal_type(users_legacy)  # revealed: list[User]
+```
+
+We can also specify particular columns to select:
+
+```py
+stmt = select(User.id, User.name)
+# TODO: should be `Select[tuple[int, str]]`
+reveal_type(stmt)  # revealed: Select[tuple[Unknown, Unknown]]
+
+for row in session.execute(stmt):
+    # TODO: should be `Row[Tuple[int, str]]`
+    reveal_type(row)  # revealed: Row[tuple[Unknown, Unknown]]
+```
+
+And similarly with the legacy `query` API:
+
+```py
+query = session.query(User.id, User.name)
+# TODO: should be `RowReturningQuery[tuple[int, str]]`
+reveal_type(query)  # revealed: RowReturningQuery[tuple[Unknown, Unknown]]
+
+for row in query.all():
+    # TODO: should be `Row[Tuple[int, str]]`
+    reveal_type(row)  # revealed: Row[tuple[Unknown, Unknown]]
+```
diff --git a/crates/ty_python_semantic/resources/mdtest/external/sqlmodel.md b/crates/ty_python_semantic/resources/mdtest/external/sqlmodel.md
new file mode 100644
index 0000000000000..7dafa336db2fe
--- /dev/null
+++ b/crates/ty_python_semantic/resources/mdtest/external/sqlmodel.md
@@ -0,0 +1,30 @@
+# SQLModel
+
+```toml
+[environment]
+python-version = "3.13"
+python-platform = "linux"
+
+[project]
+dependencies = ["sqlmodel==0.0.27"]
+```
+
+## Basic model
+
+```py
+from sqlmodel import SQLModel
+
+class User(SQLModel):
+    id: int
+    name: str
+
+user = User(id=1, name="John Doe")
+reveal_type(user.id)  # revealed: int
+reveal_type(user.name)  # revealed: str
+
+# TODO: this should not mention `__pydantic_self__`, and have proper parameters defined by the fields
+reveal_type(User.__init__)  # revealed: def __init__(__pydantic_self__, **data: Any) -> None
+
+# TODO: this should be an error
+User()
+```
diff --git a/crates/ty_python_semantic/resources/mdtest/external/strawberry.md b/crates/ty_python_semantic/resources/mdtest/external/strawberry.md
new file mode 100644
index 0000000000000..849b50aa74d29
--- /dev/null
+++ b/crates/ty_python_semantic/resources/mdtest/external/strawberry.md
@@ -0,0 +1,27 @@
+# Strawberry GraphQL
+
+```toml
+[environment]
+python-version = "3.13"
+python-platform = "linux"
+
+[project]
+dependencies = ["strawberry-graphql==0.283.3"]
+```
+
+## Basic model
+
+```py
+import strawberry
+
+@strawberry.type
+class User:
+    id: int
+    role: str = strawberry.field(default="user")
+
+reveal_type(User.__init__)  # revealed: (self: User, *, id: int, role: str = Any) -> None
+
+user = User(id=1)
+reveal_type(user.id)  # revealed: int
+reveal_type(user.role)  # revealed: str
+```
diff --git a/crates/ty_test/Cargo.toml b/crates/ty_test/Cargo.toml
index f300b614a0d6d..a7c18b9d002d7 100644
--- a/crates/ty_test/Cargo.toml
+++ b/crates/ty_test/Cargo.toml
@@ -25,6 +25,7 @@ ty_vendored = { workspace = true }
 
 anyhow = { workspace = true }
 camino = { workspace = true }
+dunce = { workspace = true }
 colored = { workspace = true }
 insta = { workspace = true, features = ["filters"] }
 memchr = { workspace = true }
diff --git a/crates/ty_test/README.md b/crates/ty_test/README.md
index ecf4614d94d14..b31a45e87774f 100644
--- a/crates/ty_test/README.md
+++ b/crates/ty_test/README.md
@@ -316,6 +316,44 @@ To enable logging in an mdtest, set `log = true` at the top level of the TOML bl
 See [`MarkdownTestConfig`](https://github.com/astral-sh/ruff/blob/main/crates/ty_test/src/config.rs)
 for the full list of supported configuration options.
 
+### Testing with external dependencies
+
+Tests can specify external Python dependencies using a `[project]` section in the TOML configuration.
+This allows testing code that uses third-party libraries like `pydantic`, `numpy`, etc.
+
+It is recommended to specify exact versions of packages to ensure reproducibility. The specified
+Python version and platform are required for tests with external dependencies, as they are used
+during package resolution.
+
+````markdown
+```toml
+[environment]
+python-version = "3.13"
+python-platform = "linux"
+
+[project]
+dependencies = ["pydantic==2.12.2"]
+```
+
+```py
+import pydantic
+
+# use pydantic in the test
+```
+````
+
+When a test has dependencies:
+
+1. The test framework creates a `pyproject.toml` in a temporary directory.
+1. Runs `uv sync` to install the dependencies.
+1. Copies the installed packages from the virtual environment's `site-packages` directory into the test's
+    in-memory filesystem.
+1. Configures the type checker to use these packages.
+
+**Note**: This feature requires `uv` to be installed and available in your `PATH`. The dependencies
+are installed fresh for each test that specifies them, so tests with many dependencies may be slower
+to run.
+
 ### Specifying a custom typeshed
 
 Some tests will need to override the default typeshed with custom files. The `[environment]`
diff --git a/crates/ty_test/src/config.rs b/crates/ty_test/src/config.rs
index 6fe3a17fd0537..6154208d69f26 100644
--- a/crates/ty_test/src/config.rs
+++ b/crates/ty_test/src/config.rs
@@ -4,8 +4,12 @@
 //!
 //! ```toml
 //! log = true # or log = "ty=WARN"
+//!
 //! [environment]
 //! python-version = "3.10"
+//!
+//! [project]
+//! dependencies = ["pydantic==2.12.2"]
 //! ```
 
 use anyhow::Context;
@@ -25,6 +29,9 @@ pub(crate) struct MarkdownTestConfig {
     ///
     /// Defaults to the case-sensitive [`ruff_db::system::InMemorySystem`].
     pub(crate) system: Option<SystemKind>,
+
+    /// Project configuration for installing external dependencies.
+    pub(crate) project: Option<Project>,
 }
 
 impl MarkdownTestConfig {
@@ -51,6 +58,10 @@ impl MarkdownTestConfig {
     pub(crate) fn python(&self) -> Option<&SystemPath> {
         self.environment.as_ref()?.python.as_deref()
     }
+
+    pub(crate) fn dependencies(&self) -> Option<&[String]> {
+        self.project.as_ref()?.dependencies.as_deref()
+    }
 }
 
 #[derive(Deserialize, Debug, Default, Clone)]
@@ -116,3 +127,16 @@ pub(crate) enum SystemKind {
     /// This system should only be used when testing system or OS specific behavior.
     Os,
 }
+
+/// Project configuration for tests that need external dependencies.
+#[derive(Deserialize, Debug, Default, Clone)]
+#[serde(rename_all = "kebab-case", deny_unknown_fields)]
+pub(crate) struct Project {
+    /// List of Python package dependencies in `pyproject.toml` format.
+    ///
+    /// These will be installed using `uv sync` into a temporary virtual environment.
+    /// The site-packages directory will then be copied into the test's filesystem.
+    ///
+    /// Example: `dependencies = ["pydantic==2.12.2"]`
+    pub(crate) dependencies: Option<Vec<String>>,
+}
diff --git a/crates/ty_test/src/external_dependencies.rs b/crates/ty_test/src/external_dependencies.rs
new file mode 100644
index 0000000000000..38a14d78c9782
--- /dev/null
+++ b/crates/ty_test/src/external_dependencies.rs
@@ -0,0 +1,186 @@
+use crate::db::Db;
+
+use anyhow::{Context, Result, anyhow, bail};
+use ruff_db::system::{DbWithWritableSystem as _, OsSystem, SystemPath};
+use ruff_python_ast::PythonVersion;
+use ty_python_semantic::{PythonEnvironment, PythonPlatform, SysPrefixPathOrigin};
+
+/// Setup a virtual environment in the in-memory filesystem of `db` with
+/// the specified dependencies installed.
+pub(crate) fn setup_venv(
+    db: &mut Db,
+    dependencies: &[String],
+    python_version: PythonVersion,
+    python_platform: &PythonPlatform,
+    dest_venv_path: &SystemPath,
+) -> Result<()> {
+    // Create a temporary directory for the project
+    let temp_dir = tempfile::Builder::new()
+        .prefix("mdtest-venv-")
+        .tempdir()
+        .context("Failed to create temporary directory for mdtest virtual environment")?;
+
+    // Canonicalize here to fix problems with `.strip_prefix()` later on Windows
+    let temp_dir_path = dunce::canonicalize(temp_dir.path())
+        .context("Failed to canonicalize temporary directory path")?;
+
+    let temp_path = SystemPath::from_std_path(&temp_dir_path)
+        .ok_or_else(|| {
+            anyhow!(
+                "Temporary directory path is not valid UTF-8: {}",
+                temp_dir_path.display()
+            )
+        })?
+        .to_path_buf();
+
+    // Generate a minimal pyproject.toml
+    let pyproject_toml = format!(
+        r#"[project]
+name = "mdtest-deps"
+version = "0.1.0"
+requires-python = "~={python_version}.0"
+dependencies = [
+{deps}
+]
+"#,
+        python_version = python_version,
+        deps = dependencies
+            .iter()
+            .map(|dep| format!("    \"{dep}\","))
+            .collect::<Vec<_>>()
+            .join("\n")
+    );
+
+    std::fs::write(
+        temp_path.join("pyproject.toml").as_std_path(),
+        pyproject_toml,
+    )
+    .context("Failed to write pyproject.toml")?;
+
+    // Convert PythonPlatform to uv's platform format
+    let uv_platform = match python_platform {
+        PythonPlatform::Identifier(id) => match id.as_str() {
+            "win32" => "windows",
+            "darwin" => "macos",
+            "linux" => "linux",
+            other => other,
+        },
+        PythonPlatform::All => {
+            bail!("For an mdtest with external dependencies, a Python platform must be specified");
+        }
+    };
+
+    // Run `uv sync` to install dependencies
+    let uv_sync_output = std::process::Command::new("uv")
+        .args(["sync", "--python-platform", uv_platform])
+        .current_dir(temp_path.as_std_path())
+        .output()
+        .context("Failed to run `uv sync`. Is `uv` installed?")?;
+
+    if !uv_sync_output.status.success() {
+        let stderr = String::from_utf8_lossy(&uv_sync_output.stderr);
+        bail!(
+            "`uv sync` failed with exit code {:?}:\n{}",
+            uv_sync_output.status.code(),
+            stderr
+        );
+    }
+
+    let venv_path = temp_path.join(".venv");
+
+    copy_site_packages_to_db(db, &venv_path, dest_venv_path, python_version)
+}
+
+/// Copy the site-packages directory from a real virtual environment to the in-memory filesystem of `db`.
+///
+/// This recursively copies all files from the venv's site-packages directory into the
+/// in-memory filesystem at the specified destination path.
+fn copy_site_packages_to_db(
+    db: &mut Db,
+    venv_path: &SystemPath,
+    dest_venv_path: &SystemPath,
+    _python_version: PythonVersion,
+) -> Result<()> {
+    // Discover the site-packages directory in the virtual environment
+    let system = OsSystem::new(venv_path);
+    let env = PythonEnvironment::new(venv_path, SysPrefixPathOrigin::LocalVenv, &system)
+        .context("Failed to create Python environment for temporary virtual environment")?;
+
+    let site_packages_paths = env
+        .site_packages_paths(&system)
+        .context(format!("Failed to discover site-packages in '{venv_path}'"))?;
+
+    let site_packages_path = site_packages_paths
+        .into_iter()
+        .next()
+        .ok_or_else(|| anyhow!("No site-packages directory found in '{venv_path}'"))?;
+
+    // Create the destination directory structure
+    let relative_site_packages = site_packages_path.strip_prefix(venv_path).map_err(|_| {
+        anyhow!("site-packages path '{site_packages_path}' is not under venv path '{venv_path}'")
+    })?;
+    let dest_site_packages = dest_venv_path.join(relative_site_packages);
+    db.create_directory_all(&dest_site_packages)
+        .context("Failed to create site-packages directory in database")?;
+
+    // Recursively copy all files from site-packages
+    copy_directory_recursive(db, &site_packages_path, &dest_site_packages)?;
+
+    Ok(())
+}
+
+fn copy_directory_recursive(db: &mut Db, src: &SystemPath, dest: &SystemPath) -> Result<()> {
+    use std::fs;
+
+    for entry in fs::read_dir(src.as_std_path())
+        .with_context(|| format!("Failed to read directory {src}"))?
+    {
+        let entry = entry.with_context(|| format!("Failed to read directory entry in {src}"))?;
+        let entry_path = entry.path();
+        let file_type = entry
+            .file_type()
+            .with_context(|| format!("Failed to get file type for {}", entry_path.display()))?;
+
+        let src_path = SystemPath::from_std_path(&entry_path)
+            .ok_or_else(|| anyhow!("Path {} is not valid UTF-8", entry_path.display()))?;
+
+        let file_name = entry.file_name();
+        let file_name_str = file_name.to_str().ok_or_else(|| {
+            anyhow!(
+                "File name {} is not valid UTF-8",
+                file_name.to_string_lossy()
+            )
+        })?;
+
+        let dest_path = dest.join(file_name_str);
+
+        if file_type.is_dir() {
+            // Skip __pycache__ directories and other unnecessary directories
+            if file_name_str == "__pycache__" || file_name_str.ends_with(".dist-info") {
+                continue;
+            }
+
+            db.create_directory_all(&dest_path)
+                .with_context(|| format!("Failed to create directory {dest_path}"))?;
+
+            copy_directory_recursive(db, src_path, &dest_path)?;
+        } else if file_type.is_file() {
+            let is_python_source = entry_path.extension().is_some_and(|ext| {
+                ext.eq_ignore_ascii_case("py") || ext.eq_ignore_ascii_case("pyi")
+            });
+
+            if !is_python_source {
+                // Skip all non-Python files (binaries, data files, etc.)
+                continue;
+            }
+
+            let contents = fs::read_to_string(src_path.as_std_path())
+                .with_context(|| format!("Failed to read file {src_path}"))?;
+
+            db.write_file(&dest_path, contents)
+                .with_context(|| format!("Failed to write file {dest_path}"))?;
+        }
+    }
+
+    Ok(())
+}
diff --git a/crates/ty_test/src/lib.rs b/crates/ty_test/src/lib.rs
index feb38bdf663b1..ad492617392d3 100644
--- a/crates/ty_test/src/lib.rs
+++ b/crates/ty_test/src/lib.rs
@@ -28,6 +28,7 @@ mod assertion;
 mod config;
 mod db;
 mod diagnostic;
+mod external_dependencies;
 mod matcher;
 mod parser;
 
@@ -70,16 +71,21 @@ pub fn run(
             Log::Filter(filter) => setup_logging_with_filter(filter),
         });
 
-        let failures = run_test(&mut db, relative_fixture_path, snapshot_path, &test);
-        let inconsistencies = run_module_resolution_consistency_test(&db);
-        let this_test_failed = failures.is_err() || inconsistencies.is_err();
+        let result = run_test(&mut db, relative_fixture_path, snapshot_path, &test);
+        let inconsistencies = if result.as_ref().is_ok_and(|t| t.has_been_skipped()) {
+            Ok(())
+        } else {
+            run_module_resolution_consistency_test(&db)
+        };
+
+        let this_test_failed = result.is_err() || inconsistencies.is_err();
         any_failures = any_failures || this_test_failed;
 
         if this_test_failed && output_format.is_cli() {
             println!("\n{}\n", test.name().bold().underline());
         }
 
-        if let Err(failures) = failures {
+        if let Err(failures) = result {
             let md_index = LineIndex::from_source_text(&source);
 
             for test_failures in failures {
@@ -212,12 +218,24 @@ impl OutputFormat {
     }
 }
 
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum TestOutcome {
+    Success,
+    Skipped,
+}
+
+impl TestOutcome {
+    const fn has_been_skipped(self) -> bool {
+        matches!(self, TestOutcome::Skipped)
+    }
+}
+
 fn run_test(
     db: &mut db::Db,
     relative_fixture_path: &Utf8Path,
     snapshot_path: &Utf8Path,
     test: &parser::MarkdownTest,
-) -> Result<(), Failures> {
+) -> Result<TestOutcome, Failures> {
     // Initialize the system and remove all files and directories to reset the system to a clean state.
     match test.configuration().system.unwrap_or_default() {
         SystemKind::InMemory => {
@@ -248,6 +266,27 @@ fn run_test(
     let custom_typeshed_path = test.configuration().typeshed();
     let python_version = test.configuration().python_version().unwrap_or_default();
 
+    // Setup virtual environment with dependencies if specified
+    let venv_for_external_dependencies = SystemPathBuf::from("/.venv");
+    if let Some(dependencies) = test.configuration().dependencies() {
+        if !std::env::var("MDTEST_EXTERNAL").is_ok_and(|v| v == "1") {
+            return Ok(TestOutcome::Skipped);
+        }
+
+        let python_platform = test.configuration().python_platform().expect(
+            "Tests with external dependencies must specify `python-platform` in the configuration",
+        );
+
+        external_dependencies::setup_venv(
+            db,
+            dependencies,
+            python_version,
+            &python_platform,
+            &venv_for_external_dependencies,
+        )
+        .expect("Failed to setup in-memory virtual environment with dependencies");
+    }
+
     let mut typeshed_files = vec![];
     let mut has_custom_versions_file = false;
 
@@ -350,7 +389,19 @@ fn run_test(
 
     let configuration = test.configuration();
 
-    let site_packages_paths = if let Some(python) = configuration.python() {
+    let site_packages_paths = if configuration.dependencies().is_some() {
+        // If dependencies were specified, use the venv we just set up
+        let environment = PythonEnvironment::new(
+            &venv_for_external_dependencies,
+            SysPrefixPathOrigin::PythonCliFlag,
+            db.system(),
+        )
+        .expect("Python environment to point to a valid path");
+        environment
+            .site_packages_paths(db.system())
+            .expect("Python environment to be valid")
+            .into_vec()
+    } else if let Some(python) = configuration.python() {
         let environment =
             PythonEnvironment::new(python, SysPrefixPathOrigin::PythonCliFlag, db.system())
                 .expect("Python environment to point to a valid path");
@@ -551,7 +602,7 @@ fn run_test(
     }
 
     if failures.is_empty() {
-        Ok(())
+        Ok(TestOutcome::Success)
     } else {
         Err(failures)
     }