From ddc531400944ee1d820428a5f175118e10d92489 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Tue, 3 Mar 2026 17:05:06 -0500
Subject: [PATCH 01/30] feat: Add MongoDB offline store (ibis-based PIT join,
 v1 alpha)

- MongoDBSource: DataSource backed by a MongoDB collection, schema
  sampled via \ aggregation (default N=100)
- MongoDBOfflineStoreConfig: connection_string + default database
- MongoDBOfflineStore: delegates to ibis PIT join engine via
  in-memory memtable approach
- SavedDatasetMongoDBStorage: persist training datasets to MongoDB
- _build_data_source_reader/_build_data_source_writer closures capture
  config (connection_string, database) for MongoDB access

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../contrib/mongodb_offline_store/__init__.py |   1 +
 .../contrib/mongodb_offline_store/mongodb.py  | 224 ++++++++++++++
 .../mongodb_offline_store/mongodb_source.py   | 276 ++++++++++++++++++
 3 files changed, 501 insertions(+)
 create mode 100644 sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/__init__.py
 create mode 100644 sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
 create mode 100644 sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/__init__.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/__init__.py
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/__init__.py
@@ -0,0 +1 @@
+
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
new file mode 100644
index 0000000000..482f2cdc88
--- /dev/null
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
@@ -0,0 +1,224 @@
+# Copyright 2025 The Feast Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+from datetime import datetime
+from typing import Any, Callable, List, Optional, Union
+
+import ibis
+import pandas as pd
+from ibis.expr.types import Table
+from pydantic import StrictStr
+
+try:
+    from pymongo import MongoClient
+except ImportError:
+    MongoClient = None  # type: ignore[assignment,misc]
+
+from feast.data_source import DataSource
+from feast.errors import (
+    FeastExtrasDependencyImportError,
+    SavedDatasetLocationAlreadyExists,
+)
+from feast.feature_view import FeatureView
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_source import (
+    MongoDBSource,
+)
+from feast.infra.offline_stores.ibis import (
+    get_historical_features_ibis,
+    pull_all_from_table_or_query_ibis,
+    pull_latest_from_table_or_query_ibis,
+)
+from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob
+from feast.infra.registry.base_registry import BaseRegistry
+from feast.repo_config import FeastConfigBaseModel, RepoConfig
+
+# Print RuntimeWarning only once per process.
+warnings.simplefilter("once", RuntimeWarning)
+
+
+class MongoDBOfflineStoreConfig(FeastConfigBaseModel):
+    """Configuration for the MongoDB offline store."""
+
+    type: StrictStr = "feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb.MongoDBOfflineStore"
+    """Offline store type selector"""
+
+    connection_string: StrictStr = "mongodb://localhost:27017"
+    """MongoDB connection URI"""
+
+    database: StrictStr = "feast"
+    """Default MongoDB database name"""
+
+
+class MongoDBOfflineStore(OfflineStore):
+    """Offline store backed by MongoDB, using ibis for point-in-time joins."""
+
+    @staticmethod
+    def pull_latest_from_table_or_query(
+        config: RepoConfig,
+        data_source: DataSource,
+        join_key_columns: List[str],
+        feature_name_columns: List[str],
+        timestamp_field: str,
+        created_timestamp_column: Optional[str],
+        start_date: datetime,
+        end_date: datetime,
+    ) -> RetrievalJob:
+        assert isinstance(data_source, MongoDBSource)
+        warnings.warn(
+            "MongoDB offline store is in alpha. API may change without notice.",
+            RuntimeWarning,
+        )
+        return pull_latest_from_table_or_query_ibis(
+            config=config,
+            data_source=data_source,
+            join_key_columns=join_key_columns,
+            feature_name_columns=feature_name_columns,
+            timestamp_field=timestamp_field,
+            created_timestamp_column=created_timestamp_column,
+            start_date=start_date,
+            end_date=end_date,
+            data_source_reader=_build_data_source_reader(config),
+            data_source_writer=_build_data_source_writer(config),  # type: ignore[arg-type]
+        )
+
+    @staticmethod
+    def get_historical_features(
+        config: RepoConfig,
+        feature_views: List[FeatureView],
+        feature_refs: List[str],
+        entity_df: Union[pd.DataFrame, str],
+        registry: BaseRegistry,
+        project: str,
+        full_feature_names: bool = False,
+    ) -> RetrievalJob:
+        warnings.warn(
+            "MongoDB offline store is in alpha. API may change without notice.",
+            RuntimeWarning,
+        )
+        return get_historical_features_ibis(
+            config=config,
+            feature_views=feature_views,
+            feature_refs=feature_refs,
+            entity_df=entity_df,
+            registry=registry,
+            project=project,
+            full_feature_names=full_feature_names,
+            data_source_reader=_build_data_source_reader(config),
+            data_source_writer=_build_data_source_writer(config),  # type: ignore[arg-type]
+        )
+
+    @staticmethod
+    def pull_all_from_table_or_query(
+        config: RepoConfig,
+        data_source: DataSource,
+        join_key_columns: List[str],
+        feature_name_columns: List[str],
+        timestamp_field: str,
+        created_timestamp_column: Optional[str] = None,
+        start_date: Optional[datetime] = None,
+        end_date: Optional[datetime] = None,
+    ) -> RetrievalJob:
+        assert isinstance(data_source, MongoDBSource)
+        warnings.warn(
+            "MongoDB offline store is in alpha. API may change without notice.",
+            RuntimeWarning,
+        )
+        return pull_all_from_table_or_query_ibis(
+            config=config,
+            data_source=data_source,
+            join_key_columns=join_key_columns,
+            feature_name_columns=feature_name_columns,
+            timestamp_field=timestamp_field,
+            created_timestamp_column=created_timestamp_column,
+            start_date=start_date,
+            end_date=end_date,
+            data_source_reader=_build_data_source_reader(config),
+            data_source_writer=_build_data_source_writer(config),  # type: ignore[arg-type]
+        )
+
+
+def _build_data_source_reader(config: RepoConfig) -> Callable[[DataSource, str], Table]:
+    """Return a closure that fetches a MongoDB collection as an ibis in-memory table."""
+
+    def reader(data_source: DataSource, repo_path: str) -> Table:
+        if MongoClient is None:
+            raise FeastExtrasDependencyImportError(
+                "mongodb", "pymongo is not installed."
+            )
+        assert isinstance(data_source, MongoDBSource)
+        connection_string = config.offline_store.connection_string
+        db_name = data_source.database or config.offline_store.database
+        client: Any = MongoClient(connection_string, tz_aware=True)
+        try:
+            docs = list(client[db_name][data_source.collection].find({}, {"_id": 0}))
+        finally:
+            client.close()
+
+        df = pd.DataFrame(docs)
+        if df.empty:
+            return ibis.memtable(df)
+
+        # Ensure datetime-like columns are timezone-aware UTC pandas timestamps.
+        for col in df.columns:
+            if pd.api.types.is_datetime64_any_dtype(df[col]):
+                if df[col].dt.tz is None:
+                    df[col] = pd.to_datetime(df[col], utc=True)
+            elif df[col].dtype == object and len(df[col].dropna()) > 0:
+                sample = df[col].dropna().iloc[0]
+                if isinstance(sample, datetime):
+                    try:
+                        df[col] = pd.to_datetime(df[col], utc=True)
+                    except Exception:
+                        pass
+
+        return ibis.memtable(df)
+
+    return reader
+
+
+def _build_data_source_writer(
+    config: RepoConfig,
+) -> Callable[[Table, DataSource, str, str, bool], None]:
+    """Return a closure that writes an ibis table to a MongoDB collection."""
+
+    def writer(
+        table: Table,
+        data_source: DataSource,
+        repo_path: str,
+        mode: str = "append",
+        allow_overwrite: bool = False,
+    ) -> None:
+        if MongoClient is None:
+            raise FeastExtrasDependencyImportError(
+                "mongodb", "pymongo is not installed."
+            )
+        assert isinstance(data_source, MongoDBSource)
+        connection_string = config.offline_store.connection_string
+        db_name = data_source.database or config.offline_store.database
+        location = f"{db_name}.{data_source.collection}"
+        client: Any = MongoClient(connection_string)
+        try:
+            coll = client[db_name][data_source.collection]
+            if mode == "overwrite":
+                if not allow_overwrite and coll.estimated_document_count() > 0:
+                    raise SavedDatasetLocationAlreadyExists(location=location)
+                coll.drop()
+            records = table.to_pyarrow().to_pylist()
+            if records:
+                coll.insert_many(records)
+        finally:
+            client.close()
+
+    return writer
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py
new file mode 100644
index 0000000000..825f2910f7
--- /dev/null
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py
@@ -0,0 +1,276 @@
+# Copyright 2025 The Feast Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+from datetime import datetime
+from typing import Any, Callable, Dict, Iterable, Optional, Tuple
+
+try:
+    from pymongo import MongoClient
+except ImportError:
+    MongoClient = None  # type: ignore[assignment,misc]
+
+from feast.data_source import DataSource
+from feast.errors import DataSourceNoNameException, FeastExtrasDependencyImportError
+from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
+from feast.protos.feast.core.SavedDataset_pb2 import (
+    SavedDatasetStorage as SavedDatasetStorageProto,
+)
+from feast.repo_config import RepoConfig
+from feast.saved_dataset import SavedDatasetStorage
+from feast.value_type import ValueType
+
+
+def _infer_python_type_str(value: Any) -> Optional[str]:
+    """Infer a Feast-compatible type string from a Python value returned by pymongo."""
+    if value is None:
+        return None
+    if isinstance(value, bool):
+        return "bool"
+    if isinstance(value, int):
+        return "int"
+    if isinstance(value, float):
+        return "float"
+    if isinstance(value, str):
+        return "str"
+    if isinstance(value, bytes):
+        return "bytes"
+    if isinstance(value, datetime):
+        return "datetime"
+    if isinstance(value, list):
+        if not value:
+            return "list[str]"
+        elem_type = _infer_python_type_str(value[0])
+        if elem_type:
+            return f"list[{elem_type}]"
+        return "list[str]"
+    return None
+
+
+def mongodb_to_feast_value_type(type_str: str) -> ValueType:
+    """Map a Python type string (from pymongo) to a Feast ValueType."""
+    _MAP: Dict[str, ValueType] = {
+        "str": ValueType.STRING,
+        "int": ValueType.INT64,
+        "float": ValueType.DOUBLE,
+        "bool": ValueType.BOOL,
+        "bytes": ValueType.BYTES,
+        "datetime": ValueType.UNIX_TIMESTAMP,
+        "list[str]": ValueType.STRING_LIST,
+        "list[int]": ValueType.INT64_LIST,
+        "list[float]": ValueType.DOUBLE_LIST,
+        "list[bool]": ValueType.BOOL_LIST,
+        "list[bytes]": ValueType.BYTES_LIST,
+        "list[datetime]": ValueType.UNIX_TIMESTAMP_LIST,
+    }
+    return _MAP.get(type_str, ValueType.UNKNOWN)
+
+
+class MongoDBOptions:
+    """Options for a MongoDB data source (database + collection)."""
+
+    def __init__(self, database: str, collection: str):
+        self._database = database
+        self._collection = collection
+
+    def to_proto(self) -> DataSourceProto.CustomSourceOptions:
+        return DataSourceProto.CustomSourceOptions(
+            configuration=json.dumps(
+                {"database": self._database, "collection": self._collection}
+            ).encode()
+        )
+
+    @classmethod
+    def from_proto(
+        cls, options_proto: DataSourceProto.CustomSourceOptions
+    ) -> "MongoDBOptions":
+        config = json.loads(options_proto.configuration.decode("utf8"))
+        return cls(database=config["database"], collection=config["collection"])
+
+
+class MongoDBSource(DataSource):
+    """A MongoDB collection as a Feast offline data source."""
+
+    def source_type(self) -> DataSourceProto.SourceType.ValueType:
+        return DataSourceProto.CUSTOM_SOURCE
+
+    def __init__(
+        self,
+        name: Optional[str] = None,
+        database: Optional[str] = None,
+        collection: Optional[str] = None,
+        timestamp_field: Optional[str] = "",
+        created_timestamp_column: Optional[str] = "",
+        field_mapping: Optional[Dict[str, str]] = None,
+        description: Optional[str] = "",
+        tags: Optional[Dict[str, str]] = None,
+        owner: Optional[str] = "",
+        schema_sample_size: int = 100,
+    ):
+        if name is None and collection is None:
+            raise DataSourceNoNameException()
+        name = name or collection
+        assert name
+
+        self._mongodb_options = MongoDBOptions(
+            database=database or "",
+            collection=collection or name,
+        )
+        self._schema_sample_size = schema_sample_size
+
+        super().__init__(
+            name=name,
+            timestamp_field=timestamp_field,
+            created_timestamp_column=created_timestamp_column,
+            field_mapping=field_mapping,
+            description=description,
+            tags=tags,
+            owner=owner,
+        )
+
+    def __hash__(self):
+        return super().__hash__()
+
+    def __eq__(self, other):
+        if not isinstance(other, MongoDBSource):
+            raise TypeError(
+                "Comparisons should only involve MongoDBSource class objects."
+            )
+        return (
+            super().__eq__(other)
+            and self._mongodb_options._database == other._mongodb_options._database
+            and self._mongodb_options._collection == other._mongodb_options._collection
+            and self.timestamp_field == other.timestamp_field
+            and self.created_timestamp_column == other.created_timestamp_column
+            and self.field_mapping == other.field_mapping
+        )
+
+    @property
+    def database(self) -> str:
+        return self._mongodb_options._database
+
+    @property
+    def collection(self) -> str:
+        return self._mongodb_options._collection
+
+    @staticmethod
+    def from_proto(data_source: DataSourceProto) -> "MongoDBSource":
+        assert data_source.HasField("custom_options")
+        options = json.loads(data_source.custom_options.configuration)
+        return MongoDBSource(
+            name=data_source.name,
+            database=options["database"],
+            collection=options["collection"],
+            field_mapping=dict(data_source.field_mapping),
+            timestamp_field=data_source.timestamp_field,
+            created_timestamp_column=data_source.created_timestamp_column,
+            description=data_source.description,
+            tags=dict(data_source.tags),
+            owner=data_source.owner,
+        )
+
+    def _to_proto_impl(self) -> DataSourceProto:
+        data_source_proto = DataSourceProto(
+            name=self.name,
+            type=DataSourceProto.CUSTOM_SOURCE,
+            data_source_class_type="feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_source.MongoDBSource",
+            field_mapping=self.field_mapping,
+            custom_options=self._mongodb_options.to_proto(),
+            description=self.description,
+            tags=self.tags,
+            owner=self.owner,
+        )
+        data_source_proto.timestamp_field = self.timestamp_field
+        data_source_proto.created_timestamp_column = self.created_timestamp_column
+        return data_source_proto
+
+    def validate(self, config: RepoConfig):
+        pass
+
+    @staticmethod
+    def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]:
+        return mongodb_to_feast_value_type
+
+    def get_table_query_string(self) -> str:
+        return f"{self._mongodb_options._database}.{self._mongodb_options._collection}"
+
+    def get_table_column_names_and_types(
+        self, config: RepoConfig
+    ) -> Iterable[Tuple[str, str]]:
+        if MongoClient is None:
+            raise FeastExtrasDependencyImportError(
+                "mongodb", "pymongo is not installed."
+            )
+        connection_string = config.offline_store.connection_string
+        db_name = self.database or config.offline_store.database
+        client: Any = MongoClient(connection_string, tz_aware=True)
+        try:
+            docs = list(
+                client[db_name][self.collection].aggregate(
+                    [{"$sample": {"size": self._schema_sample_size}}]
+                )
+            )
+        finally:
+            client.close()
+
+        field_type_counts: Dict[str, Dict[str, int]] = {}
+        for doc in docs:
+            for field, value in doc.items():
+                if field == "_id":
+                    continue
+                type_str = _infer_python_type_str(value)
+                if type_str is None:
+                    continue
+                field_type_counts.setdefault(field, {})
+                field_type_counts[field][type_str] = (
+                    field_type_counts[field].get(type_str, 0) + 1
+                )
+
+        return [
+            (field, max(counts, key=lambda t: counts[t]))
+            for field, counts in field_type_counts.items()
+        ]
+
+
+class SavedDatasetMongoDBStorage(SavedDatasetStorage):
+    """Persists a Feast SavedDataset into a MongoDB collection."""
+
+    _proto_attr_name = "custom_storage"
+
+    mongodb_options: MongoDBOptions
+
+    def __init__(self, database: str, collection: str):
+        self.mongodb_options = MongoDBOptions(
+            database=database,
+            collection=collection,
+        )
+
+    @staticmethod
+    def from_proto(
+        storage_proto: SavedDatasetStorageProto,
+    ) -> "SavedDatasetMongoDBStorage":
+        options = json.loads(storage_proto.custom_storage.configuration)
+        return SavedDatasetMongoDBStorage(
+            database=options["database"],
+            collection=options["collection"],
+        )
+
+    def to_proto(self) -> SavedDatasetStorageProto:
+        return SavedDatasetStorageProto(custom_storage=self.mongodb_options.to_proto())
+
+    def to_data_source(self) -> DataSource:
+        return MongoDBSource(
+            database=self.mongodb_options._database,
+            collection=self.mongodb_options._collection,
+        )

From 8b7f7105b679dfb928026982f188524b274f2823 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Wed, 4 Mar 2026 12:35:19 -0500
Subject: [PATCH 02/30] refactor: improve MongoDB offline store code quality

- Update copyright headers to 2026
- Move mongodb_to_feast_value_type to feast/type_map.py, consistent
  with pg_type_to_feast_value_type and cb_columnar_type_to_feast_value_type
- Add docstrings to MongoDBOptions.to_proto/from_proto, MongoDBSource
  class, and get_table_column_names_and_types
- Replace dead 'assert name' with cast(str, ...) for type-checker safety
- Add explanatory comment to validate() stub
- Remove module-level warnings.simplefilter('once', RuntimeWarning),
  which was a process-wide side effect; per-call warnings.warn is enough
- Convert all assert isinstance(data_source, MongoDBSource) guards to
  ValueError with descriptive messages in both public API methods and
  the reader/writer closures
- Fix bug: add tz_aware=True to MongoClient in the writer closure,
  matching the reader, to ensure consistent timezone-aware datetime
  handling across read and write paths

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../contrib/mongodb_offline_store/mongodb.py  | 31 ++++++++---
 .../mongodb_offline_store/mongodb_source.py   | 55 +++++++++++--------
 sdk/python/feast/type_map.py                  | 24 ++++++++
 3 files changed, 77 insertions(+), 33 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
index 482f2cdc88..23b1295286 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
@@ -1,4 +1,4 @@
-# Copyright 2025 The Feast Authors
+# Copyright 2026 The Feast Authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -44,9 +44,6 @@
 from feast.infra.registry.base_registry import BaseRegistry
 from feast.repo_config import FeastConfigBaseModel, RepoConfig
 
-# Print RuntimeWarning only once per process.
-warnings.simplefilter("once", RuntimeWarning)
-
 
 class MongoDBOfflineStoreConfig(FeastConfigBaseModel):
     """Configuration for the MongoDB offline store."""
@@ -75,7 +72,11 @@ def pull_latest_from_table_or_query(
         start_date: datetime,
         end_date: datetime,
     ) -> RetrievalJob:
-        assert isinstance(data_source, MongoDBSource)
+        if not isinstance(data_source, MongoDBSource):
+            raise ValueError(
+                f"MongoDBOfflineStore expected a MongoDBSource, "
+                f"got {type(data_source).__name__!r}."
+            )
         warnings.warn(
             "MongoDB offline store is in alpha. API may change without notice.",
             RuntimeWarning,
@@ -130,7 +131,11 @@ def pull_all_from_table_or_query(
         start_date: Optional[datetime] = None,
         end_date: Optional[datetime] = None,
     ) -> RetrievalJob:
-        assert isinstance(data_source, MongoDBSource)
+        if not isinstance(data_source, MongoDBSource):
+            raise ValueError(
+                f"MongoDBOfflineStore expected a MongoDBSource, "
+                f"got {type(data_source).__name__!r}."
+            )
         warnings.warn(
             "MongoDB offline store is in alpha. API may change without notice.",
             RuntimeWarning,
@@ -157,7 +162,11 @@ def reader(data_source: DataSource, repo_path: str) -> Table:
             raise FeastExtrasDependencyImportError(
                 "mongodb", "pymongo is not installed."
             )
-        assert isinstance(data_source, MongoDBSource)
+        if not isinstance(data_source, MongoDBSource):
+            raise ValueError(
+                f"MongoDBOfflineStore reader expected a MongoDBSource, "
+                f"got {type(data_source).__name__!r}."
+            )
         connection_string = config.offline_store.connection_string
         db_name = data_source.database or config.offline_store.database
         client: Any = MongoClient(connection_string, tz_aware=True)
@@ -204,11 +213,15 @@ def writer(
             raise FeastExtrasDependencyImportError(
                 "mongodb", "pymongo is not installed."
             )
-        assert isinstance(data_source, MongoDBSource)
+        if not isinstance(data_source, MongoDBSource):
+            raise ValueError(
+                f"MongoDBOfflineStore writer expected a MongoDBSource, "
+                f"got {type(data_source).__name__!r}."
+            )
         connection_string = config.offline_store.connection_string
         db_name = data_source.database or config.offline_store.database
         location = f"{db_name}.{data_source.collection}"
-        client: Any = MongoClient(connection_string)
+        client: Any = MongoClient(connection_string, tz_aware=True)
         try:
             coll = client[db_name][data_source.collection]
             if mode == "overwrite":
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py
index 825f2910f7..ee55fe24e6 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py
@@ -1,4 +1,4 @@
-# Copyright 2025 The Feast Authors
+# Copyright 2026 The Feast Authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
 
 import json
 from datetime import datetime
-from typing import Any, Callable, Dict, Iterable, Optional, Tuple
+from typing import Any, Callable, Dict, Iterable, Optional, Tuple, cast
 
 try:
     from pymongo import MongoClient
@@ -29,6 +29,7 @@
 )
 from feast.repo_config import RepoConfig
 from feast.saved_dataset import SavedDatasetStorage
+from feast.type_map import mongodb_to_feast_value_type
 from feast.value_type import ValueType
 
 
@@ -58,25 +59,6 @@ def _infer_python_type_str(value: Any) -> Optional[str]:
     return None
 
 
-def mongodb_to_feast_value_type(type_str: str) -> ValueType:
-    """Map a Python type string (from pymongo) to a Feast ValueType."""
-    _MAP: Dict[str, ValueType] = {
-        "str": ValueType.STRING,
-        "int": ValueType.INT64,
-        "float": ValueType.DOUBLE,
-        "bool": ValueType.BOOL,
-        "bytes": ValueType.BYTES,
-        "datetime": ValueType.UNIX_TIMESTAMP,
-        "list[str]": ValueType.STRING_LIST,
-        "list[int]": ValueType.INT64_LIST,
-        "list[float]": ValueType.DOUBLE_LIST,
-        "list[bool]": ValueType.BOOL_LIST,
-        "list[bytes]": ValueType.BYTES_LIST,
-        "list[datetime]": ValueType.UNIX_TIMESTAMP_LIST,
-    }
-    return _MAP.get(type_str, ValueType.UNKNOWN)
-
-
 class MongoDBOptions:
     """Options for a MongoDB data source (database + collection)."""
 
@@ -85,6 +67,7 @@ def __init__(self, database: str, collection: str):
         self._collection = collection
 
     def to_proto(self) -> DataSourceProto.CustomSourceOptions:
+        """Serialize database and collection names as JSON into a CustomSourceOptions proto."""
         return DataSourceProto.CustomSourceOptions(
             configuration=json.dumps(
                 {"database": self._database, "collection": self._collection}
@@ -95,12 +78,28 @@ def to_proto(self) -> DataSourceProto.CustomSourceOptions:
     def from_proto(
         cls, options_proto: DataSourceProto.CustomSourceOptions
     ) -> "MongoDBOptions":
+        """Deserialize a CustomSourceOptions proto back into a MongoDBOptions instance."""
         config = json.loads(options_proto.configuration.decode("utf8"))
         return cls(database=config["database"], collection=config["collection"])
 
 
 class MongoDBSource(DataSource):
-    """A MongoDB collection as a Feast offline data source."""
+    """A MongoDB collection used as a Feast offline data source.
+
+    ``name`` is the logical Feast name for this source. If omitted, it defaults
+    to the value of ``collection``.  At least one of ``name`` or ``collection``
+    must be supplied.
+
+    ``database`` is the MongoDB database that contains the collection.  When
+    omitted it falls back to ``MongoDBOfflineStoreConfig.database`` at query
+    time, so a single store-level default can be shared across many sources.
+
+    ``schema_sample_size`` controls how many documents are randomly sampled
+    when Feast infers the collection schema (used by ``feast apply`` and
+    ``get_table_column_names_and_types``).  Increase it for collections with
+    highly variable document shapes; decrease it to speed up ``feast apply``
+    at the cost of schema coverage.
+    """
 
     def source_type(self) -> DataSourceProto.SourceType.ValueType:
         return DataSourceProto.CUSTOM_SOURCE
@@ -120,8 +119,8 @@ def __init__(
     ):
         if name is None and collection is None:
             raise DataSourceNoNameException()
-        name = name or collection
-        assert name
+        # At least one of name / collection is non-None; cast to satisfy the type checker.
+        name = cast(str, name or collection)
 
         self._mongodb_options = MongoDBOptions(
             database=database or "",
@@ -196,6 +195,8 @@ def _to_proto_impl(self) -> DataSourceProto:
         return data_source_proto
 
     def validate(self, config: RepoConfig):
+        # No upfront schema validation is required for MongoDB; the connection
+        # is exercised lazily when features are actually retrieved.
         pass
 
     @staticmethod
@@ -208,6 +209,12 @@ def get_table_query_string(self) -> str:
     def get_table_column_names_and_types(
         self, config: RepoConfig
     ) -> Iterable[Tuple[str, str]]:
+        """Sample documents from the collection to infer field names and their Feast type strings.
+
+        Uses ``$sample`` to fetch up to ``schema_sample_size`` documents, then
+        picks the most-frequent Python type observed per field.  The ``_id``
+        field is always excluded.
+        """
         if MongoClient is None:
             raise FeastExtrasDependencyImportError(
                 "mongodb", "pymongo is not installed."
diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py
index 5e77f532c9..b383963c0c 100644
--- a/sdk/python/feast/type_map.py
+++ b/sdk/python/feast/type_map.py
@@ -1762,6 +1762,30 @@ def cb_columnar_type_to_feast_value_type(type_str: str) -> ValueType:
     return value
 
 
+def mongodb_to_feast_value_type(type_str: str) -> ValueType:
+    """Map a Python type string (as inferred from pymongo documents) to a Feast ValueType.
+
+    The type strings are produced by
+    ``feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_source._infer_python_type_str``.
+    Unrecognised strings are mapped to ``ValueType.UNKNOWN``.
+    """
+    type_map: Dict[str, ValueType] = {
+        "str": ValueType.STRING,
+        "int": ValueType.INT64,
+        "float": ValueType.DOUBLE,
+        "bool": ValueType.BOOL,
+        "bytes": ValueType.BYTES,
+        "datetime": ValueType.UNIX_TIMESTAMP,
+        "list[str]": ValueType.STRING_LIST,
+        "list[int]": ValueType.INT64_LIST,
+        "list[float]": ValueType.DOUBLE_LIST,
+        "list[bool]": ValueType.BOOL_LIST,
+        "list[bytes]": ValueType.BYTES_LIST,
+        "list[datetime]": ValueType.UNIX_TIMESTAMP_LIST,
+    }
+    return type_map.get(type_str, ValueType.UNKNOWN)
+
+
 def convert_scalar_column(
     series: pd.Series, value_type: ValueType, target_pandas_type: str
 ) -> pd.Series:

From 62695aa3be52916e09aa98192e9cba19475af5fa Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Mon, 9 Mar 2026 10:13:47 -0400
Subject: [PATCH 03/30] Started work on full Mongo/MQL implementation. Kept
 MongoDBOfflineStoreIbis and MongoDBOfflineStoreNative

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../contrib/mongodb_offline_store/mongodb.py  | 421 +++++++++++++++++-
 1 file changed, 413 insertions(+), 8 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
index 23b1295286..89794e3ba8 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
@@ -13,11 +13,12 @@
 # limitations under the License.
 
 import warnings
-from datetime import datetime
-from typing import Any, Callable, List, Optional, Union
+from datetime import datetime, timezone
+from typing import Any, Callable, Dict, List, Optional, Union
 
 import ibis
 import pandas as pd
+import pyarrow
 from ibis.expr.types import Table
 from pydantic import StrictStr
 
@@ -40,15 +41,23 @@
     pull_all_from_table_or_query_ibis,
     pull_latest_from_table_or_query_ibis,
 )
-from feast.infra.offline_stores.offline_store import OfflineStore, RetrievalJob
+from feast.infra.offline_stores.offline_store import (
+    OfflineStore,
+    RetrievalJob,
+    RetrievalMetadata,
+)
+from feast.infra.offline_stores.offline_utils import (
+    infer_event_timestamp_from_entity_df,
+)
 from feast.infra.registry.base_registry import BaseRegistry
 from feast.repo_config import FeastConfigBaseModel, RepoConfig
+from feast.saved_dataset import SavedDatasetStorage
 
 
-class MongoDBOfflineStoreConfig(FeastConfigBaseModel):
-    """Configuration for the MongoDB offline store."""
+class MongoDBOfflineStoreIbisConfig(FeastConfigBaseModel):
+    """Configuration for the MongoDB Ibis-backed offline store."""
 
-    type: StrictStr = "feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb.MongoDBOfflineStore"
+    type: StrictStr = "feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb.MongoDBOfflineStoreIbis"
     """Offline store type selector"""
 
     connection_string: StrictStr = "mongodb://localhost:27017"
@@ -58,8 +67,8 @@ class MongoDBOfflineStoreConfig(FeastConfigBaseModel):
     """Default MongoDB database name"""
 
 
-class MongoDBOfflineStore(OfflineStore):
-    """Offline store backed by MongoDB, using ibis for point-in-time joins."""
+class MongoDBOfflineStoreIbis(OfflineStore):
+    """Offline store backed by MongoDB, using Ibis for point-in-time joins."""
 
     @staticmethod
     def pull_latest_from_table_or_query(
@@ -235,3 +244,399 @@ def writer(
             client.close()
 
     return writer
+
+
+# ---------------------------------------------------------------------------
+# Native MQL implementation
+# ---------------------------------------------------------------------------
+
+
+class MongoDBOfflineStoreNativeConfig(FeastConfigBaseModel):
+    """Configuration for the MongoDB native-MQL offline store."""
+
+    type: StrictStr = "feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb.MongoDBOfflineStoreNative"
+    """Offline store type selector"""
+
+    connection_string: StrictStr = "mongodb://localhost:27017"
+    """MongoDB connection URI"""
+
+    database: StrictStr = "feast"
+    """Default MongoDB database name"""
+
+
+def _fetch_collection_as_arrow(
+    connection_string: str,
+    db_name: str,
+    collection: str,
+    pipeline: Optional[List[Dict]] = None,
+) -> pyarrow.Table:
+    """Run an aggregation pipeline (or full scan) via PyMongo and return a pyarrow Table.
+
+    If *pipeline* is None the entire collection is scanned (``_id`` excluded).
+    The ``_id`` field is stripped from every result document before conversion.
+    """
+    if MongoClient is None:
+        raise FeastExtrasDependencyImportError("mongodb", "pymongo is not installed.")
+    client: Any = MongoClient(connection_string, tz_aware=True)
+    try:
+        if pipeline is not None:
+            docs = list(client[db_name][collection].aggregate(pipeline))
+        else:
+            docs = list(client[db_name][collection].find({}, {"_id": 0}))
+    finally:
+        client.close()
+
+    if not docs:
+        return pyarrow.table({})
+
+    for doc in docs:
+        doc.pop("_id", None)
+
+    return pyarrow.Table.from_pylist(docs)
+
+
+class MongoDBNativeRetrievalJob(RetrievalJob):
+    """A RetrievalJob whose results come from a lazy PyMongo query callable.
+
+    The callable is only executed when the caller materialises the job (e.g.
+    ``to_df()``, ``to_arrow()``, ``persist()``).
+    """
+
+    def __init__(
+        self,
+        query_fn: Callable[[], pyarrow.Table],
+        full_feature_names: bool,
+        on_demand_feature_views: List,
+        metadata: Optional[RetrievalMetadata],
+        config: RepoConfig,
+    ) -> None:
+        super().__init__()
+        self._query_fn = query_fn
+        self._full_feature_names = full_feature_names
+        self._on_demand_feature_views = on_demand_feature_views or []
+        self._metadata = metadata
+        self._config = config
+
+    def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table:
+        return self._query_fn()
+
+    def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame:
+        return self._to_arrow_internal().to_pandas()
+
+    @property
+    def full_feature_names(self) -> bool:
+        return self._full_feature_names
+
+    @property
+    def on_demand_feature_views(self) -> List:
+        return self._on_demand_feature_views
+
+    @property
+    def metadata(self) -> Optional[RetrievalMetadata]:
+        return self._metadata
+
+    def persist(
+        self,
+        storage: SavedDatasetStorage,
+        allow_overwrite: bool = False,
+        timeout: Optional[int] = None,
+    ) -> None:
+        if MongoClient is None:
+            raise FeastExtrasDependencyImportError(
+                "mongodb", "pymongo is not installed."
+            )
+        data_source = storage.to_data_source()
+        if not isinstance(data_source, MongoDBSource):
+            raise ValueError(
+                f"MongoDBNativeRetrievalJob.persist expected a MongoDBSource storage, "
+                f"got {type(data_source).__name__!r}."
+            )
+        table = self._to_arrow_internal()
+        connection_string = self._config.offline_store.connection_string
+        db_name = data_source.database or self._config.offline_store.database
+        location = f"{db_name}.{data_source.collection}"
+        client: Any = MongoClient(connection_string, tz_aware=True)
+        try:
+            coll = client[db_name][data_source.collection]
+            if not allow_overwrite and coll.estimated_document_count() > 0:
+                raise SavedDatasetLocationAlreadyExists(location=location)
+            coll.drop()
+            records = table.to_pylist()
+            if records:
+                coll.insert_many(records)
+        finally:
+            client.close()
+
+
+class MongoDBOfflineStoreNative(OfflineStore):
+    """Offline store backed by MongoDB using native MQL aggregation pipelines.
+
+    Compared with :class:`MongoDBOfflineStoreIbis`, this implementation avoids
+    the Ibis dependency entirely.  The three main workflows map to:
+
+    * ``offline_write_batch``           – Arrow → ``insert_many``
+    * ``pull_latest_from_table_or_query`` – ``$match`` → ``$sort`` → ``$group``
+    * ``pull_all_from_table_or_query``  – ``$match`` → ``$project``
+    * ``get_historical_features``       – per-collection fetch + ``merge_asof``
+    """
+
+    @staticmethod
+    def offline_write_batch(
+        config: RepoConfig,
+        feature_view: FeatureView,
+        table: pyarrow.Table,
+        progress: Optional[Callable[[int], Any]],
+    ) -> None:
+        if MongoClient is None:
+            raise FeastExtrasDependencyImportError(
+                "mongodb", "pymongo is not installed."
+            )
+        data_source = feature_view.batch_source
+        if not isinstance(data_source, MongoDBSource):
+            raise ValueError(
+                f"MongoDBOfflineStoreNative.offline_write_batch expected a MongoDBSource, "
+                f"got {type(data_source).__name__!r}."
+            )
+        connection_string = config.offline_store.connection_string
+        db_name = data_source.database or config.offline_store.database
+        records = table.to_pylist()
+        client: Any = MongoClient(connection_string, tz_aware=True)
+        try:
+            coll = client[db_name][data_source.collection]
+            if records:
+                coll.insert_many(records)
+                if progress:
+                    progress(len(records))
+        finally:
+            client.close()
+
+    @staticmethod
+    def pull_latest_from_table_or_query(
+        config: RepoConfig,
+        data_source: DataSource,
+        join_key_columns: List[str],
+        feature_name_columns: List[str],
+        timestamp_field: str,
+        created_timestamp_column: Optional[str],
+        start_date: datetime,
+        end_date: datetime,
+    ) -> RetrievalJob:
+        if not isinstance(data_source, MongoDBSource):
+            raise ValueError(
+                f"MongoDBOfflineStoreNative expected a MongoDBSource, "
+                f"got {type(data_source).__name__!r}."
+            )
+        warnings.warn(
+            "MongoDB offline store (native) is in alpha. API may change without notice.",
+            RuntimeWarning,
+        )
+        start_utc = start_date.astimezone(tz=timezone.utc)
+        end_utc = end_date.astimezone(tz=timezone.utc)
+        connection_string = config.offline_store.connection_string
+        db_name = data_source.database or config.offline_store.database
+        collection = data_source.collection
+
+        sort_spec: Dict = {timestamp_field: -1}
+        if created_timestamp_column:
+            sort_spec[created_timestamp_column] = -1
+
+        group_id = {k: f"${k}" for k in join_key_columns}
+        group_stage: Dict = {
+            "_id": group_id,  # todo this isn't correct. or i don't follow
+            **{f: {"$first": f"${f}"} for f in feature_name_columns},
+            timestamp_field: {"$first": f"${timestamp_field}"},
+        }
+        if created_timestamp_column:
+            group_stage[created_timestamp_column] = {
+                "$first": f"${created_timestamp_column}"
+            }
+
+        project_stage: Dict = {
+            "_id": 0,
+            **{k: f"$_id.{k}" for k in join_key_columns},  # todo here too
+            **{f: 1 for f in feature_name_columns},
+            timestamp_field: 1,
+        }
+        if created_timestamp_column:
+            project_stage[created_timestamp_column] = 1
+
+        pipeline = [
+            {"$match": {timestamp_field: {"$gte": start_utc, "$lte": end_utc}}},
+            {"$sort": sort_spec},
+            {"$group": group_stage},
+            {"$project": project_stage},
+        ]
+
+        def _run() -> pyarrow.Table:
+            return _fetch_collection_as_arrow(
+                connection_string, db_name, collection, pipeline
+            )
+
+        return MongoDBNativeRetrievalJob(
+            query_fn=_run,
+            full_feature_names=False,
+            on_demand_feature_views=[],
+            metadata=None,
+            config=config,
+        )
+
+    @staticmethod
+    def pull_all_from_table_or_query(
+        config: RepoConfig,
+        data_source: DataSource,
+        join_key_columns: List[str],
+        feature_name_columns: List[str],
+        timestamp_field: str,
+        created_timestamp_column: Optional[str] = None,
+        start_date: Optional[datetime] = None,
+        end_date: Optional[datetime] = None,
+    ) -> RetrievalJob:
+        if not isinstance(data_source, MongoDBSource):
+            raise ValueError(
+                f"MongoDBOfflineStoreNative expected a MongoDBSource, "
+                f"got {type(data_source).__name__!r}."
+            )
+        warnings.warn(
+            "MongoDB offline store (native) is in alpha. API may change without notice.",
+            RuntimeWarning,
+        )
+        connection_string = config.offline_store.connection_string
+        db_name = data_source.database or config.offline_store.database
+        collection = data_source.collection
+
+        fields = join_key_columns + feature_name_columns + [timestamp_field]
+        if created_timestamp_column:
+            fields.append(created_timestamp_column)
+
+        match_filter: Dict = {}
+        if start_date or end_date:
+            ts_filter: Dict = {}
+            if start_date:
+                ts_filter["$gte"] = start_date.astimezone(tz=timezone.utc)
+            if end_date:
+                ts_filter["$lte"] = end_date.astimezone(tz=timezone.utc)
+            match_filter[timestamp_field] = ts_filter
+
+        pipeline = [
+            {"$match": match_filter},
+            {"$project": {"_id": 0, **{f: 1 for f in fields}}},
+        ]
+
+        def _run() -> pyarrow.Table:
+            return _fetch_collection_as_arrow(
+                connection_string, db_name, collection, pipeline
+            )
+
+        return MongoDBNativeRetrievalJob(
+            query_fn=_run,
+            full_feature_names=False,
+            on_demand_feature_views=[],
+            metadata=None,
+            config=config,
+        )
+
+    @staticmethod
+    def get_historical_features(
+        config: RepoConfig,
+        feature_views: List[FeatureView],
+        feature_refs: List[str],
+        entity_df: Union[pd.DataFrame, str],
+        registry: BaseRegistry,
+        project: str,
+        full_feature_names: bool = False,
+    ) -> RetrievalJob:
+        if isinstance(entity_df, str):
+            raise ValueError(
+                "MongoDBOfflineStoreNative does not support SQL entity_df strings. "
+                "Pass a pandas DataFrame instead."
+            )
+        warnings.warn(
+            "MongoDB offline store (native) is in alpha. API may change without notice.",  # todo change wording: alpha -> preview
+            RuntimeWarning,
+        )
+        connection_string = config.offline_store.connection_string
+        default_db = config.offline_store.database
+
+        entity_schema = dict(zip(entity_df.columns, entity_df.dtypes))
+        event_timestamp_col = infer_event_timestamp_from_entity_df(entity_schema)
+
+        # Map "feature_view:feature" refs → {fv_name: [feature, ...]}
+        fv_to_features: Dict[str, List[str]] = {}
+        for ref in feature_refs:
+            fv_name, feat_name = ref.split(":", 1)
+            fv_to_features.setdefault(fv_name, []).append(feat_name)
+
+        fv_by_name = {fv.name: fv for fv in feature_views}
+
+        def _run() -> pyarrow.Table:
+            result = entity_df.copy()
+            # Ensure the entity timestamp is tz-aware UTC for merge_asof
+            if result[event_timestamp_col].dt.tz is None:
+                result[event_timestamp_col] = pd.to_datetime(
+                    result[event_timestamp_col], utc=True
+                )
+            result = result.sort_values(event_timestamp_col)
+
+            for fv_name, features in fv_to_features.items():
+                fv = fv_by_name[fv_name]
+                source = fv.batch_source
+                if not isinstance(source, MongoDBSource):
+                    raise ValueError(
+                        f"MongoDBOfflineStoreNative: feature view {fv_name!r} has "
+                        f"a non-MongoDBSource batch source ({type(source).__name__!r})."
+                    )
+                db_name = source.database or default_db
+                ts_field = source.timestamp_field
+                join_keys = [e.name for e in fv.entity_columns]
+
+                arrow_table = _fetch_collection_as_arrow(
+                    connection_string, db_name, source.collection
+                )
+                if arrow_table.num_rows == 0:
+                    for f in features:
+                        col = f"{fv_name}__{f}" if full_feature_names else f
+                        result[col] = None
+                    continue
+
+                feature_df = arrow_table.to_pandas()
+                # Ensure tz-aware UTC
+                if feature_df[ts_field].dt.tz is None:
+                    feature_df[ts_field] = pd.to_datetime(
+                        feature_df[ts_field], utc=True
+                    )
+                feature_df = feature_df.sort_values(ts_field)
+
+                col_rename = {
+                    f: (f"{fv_name}__{f}" if full_feature_names else f)
+                    for f in features
+                }
+                cols_to_select = join_keys + features + [ts_field]
+                feature_df = feature_df[cols_to_select].rename(columns=col_rename)
+                out_features = list(col_rename.values())
+
+                merged = pd.merge_asof(
+                    result,
+                    feature_df,
+                    left_on=event_timestamp_col,
+                    right_on=ts_field,
+                    by=join_keys,
+                    direction="backward",
+                )
+                # Apply TTL: null out features whose timestamp is too far in the past
+                if fv.ttl:
+                    cutoff = merged[event_timestamp_col] - fv.ttl
+                    too_old = merged[ts_field] < cutoff
+                    for col in out_features:
+                        merged.loc[too_old, col] = None
+
+                result = merged.drop(columns=[ts_field], errors="ignore")
+
+            return pyarrow.Table.from_pandas(result, preserve_index=False)
+
+        return MongoDBNativeRetrievalJob(
+            query_fn=_run,
+            full_feature_names=full_feature_names,
+            on_demand_feature_views=[],
+            metadata=None,
+            config=config,
+        )

From 812d03d4583e56b745d53ddd5263a1ff12930ee9 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Tue, 17 Mar 2026 11:14:25 -0400
Subject: [PATCH 04/30] refactor: rename alpha to preview, clarify MQL pipeline
 comments

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../contrib/mongodb_offline_store/mongodb.py  | 36 +++++++++++--------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
index 89794e3ba8..ee37b11c41 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
@@ -87,7 +87,7 @@ def pull_latest_from_table_or_query(
                 f"got {type(data_source).__name__!r}."
             )
         warnings.warn(
-            "MongoDB offline store is in alpha. API may change without notice.",
+            "MongoDB offline store is in preview. API may change without notice.",
             RuntimeWarning,
         )
         return pull_latest_from_table_or_query_ibis(
@@ -114,7 +114,7 @@ def get_historical_features(
         full_feature_names: bool = False,
     ) -> RetrievalJob:
         warnings.warn(
-            "MongoDB offline store is in alpha. API may change without notice.",
+            "MongoDB offline store is in preview. API may change without notice.",
             RuntimeWarning,
         )
         return get_historical_features_ibis(
@@ -146,7 +146,7 @@ def pull_all_from_table_or_query(
                 f"got {type(data_source).__name__!r}."
             )
         warnings.warn(
-            "MongoDB offline store is in alpha. API may change without notice.",
+            "MongoDB offline store is in preview. API may change without notice.",
             RuntimeWarning,
         )
         return pull_all_from_table_or_query_ibis(
@@ -178,7 +178,7 @@ def reader(data_source: DataSource, repo_path: str) -> Table:
             )
         connection_string = config.offline_store.connection_string
         db_name = data_source.database or config.offline_store.database
-        client: Any = MongoClient(connection_string, tz_aware=True)
+        client: Any = MongoClient(connection_string)
         try:
             docs = list(client[db_name][data_source.collection].find({}, {"_id": 0}))
         finally:
@@ -188,17 +188,17 @@ def reader(data_source: DataSource, repo_path: str) -> Table:
         if df.empty:
             return ibis.memtable(df)
 
-        # Ensure datetime-like columns are timezone-aware UTC pandas timestamps.
+        # Localize naive datetime columns to UTC. MongoDB stores all dates as UTC,
+        # and with tz_aware=False (default), pymongo returns naive datetime objects.
+        # We convert them to timezone-aware UTC timestamps for pyarrow compatibility.
         for col in df.columns:
-            if pd.api.types.is_datetime64_any_dtype(df[col]):
-                if df[col].dt.tz is None:
-                    df[col] = pd.to_datetime(df[col], utc=True)
-            elif df[col].dtype == object and len(df[col].dropna()) > 0:
+            if df[col].dtype == object and len(df[col].dropna()) > 0:
                 sample = df[col].dropna().iloc[0]
                 if isinstance(sample, datetime):
                     try:
                         df[col] = pd.to_datetime(df[col], utc=True)
-                    except Exception:
+                    except (ValueError, TypeError):
+                        # Skip columns that can't be converted (e.g., mixed types)
                         pass
 
         return ibis.memtable(df)
@@ -427,7 +427,7 @@ def pull_latest_from_table_or_query(
                 f"got {type(data_source).__name__!r}."
             )
         warnings.warn(
-            "MongoDB offline store (native) is in alpha. API may change without notice.",
+            "MongoDB offline store (native) is in preview. API may change without notice.",
             RuntimeWarning,
         )
         start_utc = start_date.astimezone(tz=timezone.utc)
@@ -436,13 +436,17 @@ def pull_latest_from_table_or_query(
         db_name = data_source.database or config.offline_store.database
         collection = data_source.collection
 
+        # Sort by timestamp descending so $first in $group gets the latest document
         sort_spec: Dict = {timestamp_field: -1}
         if created_timestamp_column:
             sort_spec[created_timestamp_column] = -1
 
+        # Group by entity/join keys. _id becomes a subdocument like {driver_id: 1}.
+        # $first grabs values from the first document in each group (the latest,
+        # due to prior $sort).
         group_id = {k: f"${k}" for k in join_key_columns}
         group_stage: Dict = {
-            "_id": group_id,  # todo this isn't correct. or i don't follow
+            "_id": group_id,
             **{f: {"$first": f"${f}"} for f in feature_name_columns},
             timestamp_field: {"$first": f"${timestamp_field}"},
         }
@@ -451,9 +455,11 @@ def pull_latest_from_table_or_query(
                 "$first": f"${created_timestamp_column}"
             }
 
+        # Project to flatten the output: extract join keys from _id subdocument,
+        # include feature columns directly. Excludes the _id field from output.
         project_stage: Dict = {
             "_id": 0,
-            **{k: f"$_id.{k}" for k in join_key_columns},  # todo here too
+            **{k: f"$_id.{k}" for k in join_key_columns},
             **{f: 1 for f in feature_name_columns},
             timestamp_field: 1,
         }
@@ -497,7 +503,7 @@ def pull_all_from_table_or_query(
                 f"got {type(data_source).__name__!r}."
             )
         warnings.warn(
-            "MongoDB offline store (native) is in alpha. API may change without notice.",
+            "MongoDB offline store (native) is in preview. API may change without notice.",
             RuntimeWarning,
         )
         connection_string = config.offline_store.connection_string
@@ -551,7 +557,7 @@ def get_historical_features(
                 "Pass a pandas DataFrame instead."
             )
         warnings.warn(
-            "MongoDB offline store (native) is in alpha. API may change without notice.",  # todo change wording: alpha -> preview
+            "MongoDB offline store (native) is in preview. API may change without notice.",
             RuntimeWarning,
         )
         connection_string = config.offline_store.connection_string

From c3401ea2524cee785d9dd96a070ea539e0816726 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Tue, 17 Mar 2026 11:27:44 -0400
Subject: [PATCH 05/30] Added unit tests for offline store retrieval, requiring
 docker and pymongo, skipping as natural.

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../contrib/test_mongodb_offline_retrieval.py | 388 ++++++++++++++++++
 1 file changed, 388 insertions(+)
 create mode 100644 sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py

diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py b/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
new file mode 100644
index 0000000000..cd83e33c0d
--- /dev/null
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
@@ -0,0 +1,388 @@
+"""
+Unit tests for MongoDB offline store (Ibis-based implementation).
+
+Docker-dependent tests are marked with ``@_requires_docker`` and are skipped when
+Docker is unavailable.
+"""
+
+from datetime import datetime, timedelta
+from typing import Generator
+from unittest.mock import MagicMock
+
+import pandas as pd
+import pytest
+import pytz
+
+pytest.importorskip("pymongo")
+
+from pymongo import MongoClient
+from testcontainers.mongodb import MongoDbContainer
+
+from feast import Entity, FeatureView, Field
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb import (
+    MongoDBOfflineStoreIbis,
+    MongoDBOfflineStoreIbisConfig,
+)
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_source import (
+    MongoDBSource,
+)
+from feast.repo_config import RepoConfig
+from feast.types import Float64, Int64
+from feast.value_type import ValueType
+
+# Check if Docker is available
+docker_available = False
+try:
+    import docker
+
+    try:
+        client = docker.from_env()
+        client.ping()
+        docker_available = True
+    except Exception:
+        pass
+except ImportError:
+    pass
+
+_requires_docker = pytest.mark.skipif(
+    not docker_available,
+    reason="Docker is not available or not running.",
+)
+
+
+@pytest.fixture(scope="module")
+def mongodb_container() -> Generator[MongoDbContainer, None, None]:
+    """Start a MongoDB container for testing."""
+    container = MongoDbContainer(
+        "mongo:latest",
+        username="test",
+        password="test",  # pragma: allowlist secret
+    ).with_exposed_ports(27017)
+    container.start()
+    yield container
+    container.stop()
+
+
+@pytest.fixture
+def mongodb_connection_string(mongodb_container: MongoDbContainer) -> str:
+    """Get MongoDB connection string from the container."""
+    exposed_port = mongodb_container.get_exposed_port(27017)
+    return f"mongodb://test:test@localhost:{exposed_port}"  # pragma: allowlist secret
+
+
+@pytest.fixture
+def repo_config(mongodb_connection_string: str) -> RepoConfig:
+    """Create a RepoConfig with MongoDB offline store."""
+    return RepoConfig(
+        project="test_project",
+        registry="memory://",
+        provider="local",
+        offline_store=MongoDBOfflineStoreIbisConfig(
+            connection_string=mongodb_connection_string,
+            database="feast_test",
+        ),
+        online_store={"type": "sqlite"},
+        entity_key_serialization_version=3,
+    )
+
+
+@pytest.fixture
+def sample_data(mongodb_connection_string: str) -> datetime:
+    """Insert sample driver stats data into MongoDB.
+
+    Returns the 'now' timestamp used as the latest event_timestamp.
+
+    Note: The collection name 'driver_stats' is defined in the MongoDBSource
+    (see driver_source fixture), not in the RepoConfig. RepoConfig provides
+    connection_string and database; the source defines the collection.
+    """
+    client: MongoClient = MongoClient(mongodb_connection_string)
+    db = client["feast_test"]
+    collection = db["driver_stats"]
+    collection.drop()
+
+    now = datetime.now(tz=pytz.UTC)
+    docs = [
+        {
+            "driver_id": 1,
+            "conv_rate": 0.5,
+            "acc_rate": 0.9,
+            "event_timestamp": now - timedelta(hours=2),
+        },
+        {
+            "driver_id": 1,
+            "conv_rate": 0.6,
+            "acc_rate": 0.85,
+            "event_timestamp": now - timedelta(hours=1),
+        },
+        {"driver_id": 1, "conv_rate": 0.7, "acc_rate": 0.8, "event_timestamp": now},
+        {
+            "driver_id": 2,
+            "conv_rate": 0.3,
+            "acc_rate": 0.95,
+            "event_timestamp": now - timedelta(hours=2),
+        },
+        # Driver 2 has no "now" timestamp - only data from 2 hours ago
+        # This tests that pull_latest correctly handles entities with different latest timestamps
+    ]
+    collection.insert_many(docs)
+    client.close()
+    return now
+
+
+@pytest.fixture
+def driver_source() -> MongoDBSource:
+    """Create a MongoDBSource for driver stats."""
+    return MongoDBSource(
+        name="driver_stats",
+        database="feast_test",
+        collection="driver_stats",
+        timestamp_field="event_timestamp",
+    )
+
+
+@pytest.fixture
+def driver_fv(driver_source: MongoDBSource) -> FeatureView:
+    """Create a FeatureView for driver stats.
+
+    The ttl (time-to-live) parameter defines how far back in time Feast will look
+    for feature values during point-in-time joins. If a feature's event_timestamp
+    is older than (entity_timestamp - ttl), that feature value is considered stale
+    and will be returned as NULL.
+
+    This is different from MongoDB TTL indexes which automatically delete documents
+    after a period of time. Feast TTL is a query-time filter, not a storage policy.
+    """
+    driver_entity = Entity(
+        name="driver_id", join_keys=["driver_id"], value_type=ValueType.INT64
+    )
+    return FeatureView(
+        name="driver_stats",
+        entities=[driver_entity],
+        schema=[
+            # Include entity column in schema so entity_columns is populated
+            Field(name="driver_id", dtype=Int64),
+            Field(name="conv_rate", dtype=Float64),
+            Field(name="acc_rate", dtype=Float64),
+        ],
+        source=driver_source,
+        ttl=timedelta(days=1),
+    )
+
+
+@_requires_docker
+def test_pull_latest_from_table_or_query(
+    repo_config: RepoConfig, sample_data: datetime, driver_source: MongoDBSource
+) -> None:
+    """Test pulling latest features per entity from MongoDB.
+
+    This test verifies that pull_latest returns only the most recent feature
+    values for each entity (driver_id), even when entities have different
+    latest timestamps. Driver 1 has data at now, but driver 2's latest data
+    is from 2 hours ago.
+    """
+    now = sample_data
+    job = MongoDBOfflineStoreIbis.pull_latest_from_table_or_query(
+        config=repo_config,
+        data_source=driver_source,
+        join_key_columns=["driver_id"],
+        feature_name_columns=["conv_rate", "acc_rate"],
+        timestamp_field="event_timestamp",
+        created_timestamp_column=None,
+        start_date=now - timedelta(days=1),
+        end_date=now + timedelta(hours=1),
+    )
+
+    df = job.to_df()
+
+    # Validate DataFrame structure
+    assert isinstance(df, pd.DataFrame)
+    assert set(df.columns) == {"driver_id", "conv_rate", "acc_rate", "event_timestamp"}
+    assert len(df) == 2  # Two unique drivers
+
+    # Extract rows for each driver
+    driver1_rows = df[df["driver_id"] == 1]
+    driver2_rows = df[df["driver_id"] == 2]
+
+    # Each driver should have exactly one row (the latest)
+    assert len(driver1_rows) == 1
+    assert len(driver2_rows) == 1
+
+    driver1 = driver1_rows.iloc[0]
+    driver2 = driver2_rows.iloc[0]
+
+    # Validate types
+    assert isinstance(driver1["conv_rate"], float)
+    assert isinstance(driver1["acc_rate"], float)
+
+    # Driver 1's latest values (from "now")
+    assert driver1["conv_rate"] == pytest.approx(0.7)
+    assert driver1["acc_rate"] == pytest.approx(0.8)
+
+    # Driver 2's latest values (from 2 hours ago - driver 2 has no "now" data)
+    # This demonstrates that pull_latest correctly handles entities with
+    # different "latest" timestamps
+    assert driver2["conv_rate"] == pytest.approx(0.3)
+    assert driver2["acc_rate"] == pytest.approx(0.95)
+
+
+@_requires_docker
+def test_get_historical_features_pit_join(
+    repo_config: RepoConfig, sample_data: datetime, driver_fv: FeatureView
+) -> None:
+    """Test point-in-time join retrieves correct feature values.
+
+    Point-in-time (PIT) join ensures that for each entity row, we get the
+    feature values that were valid AT THAT POINT IN TIME - not future data
+    that would cause data leakage in ML training.
+    """
+    now = sample_data
+
+    # Entity dataframe: request features at specific timestamps
+    # Each row says "give me driver X's features as they were at time T"
+    entity_df = pd.DataFrame(
+        {
+            "driver_id": [1, 1, 2],
+            "event_timestamp": [
+                now
+                - timedelta(
+                    hours=1, minutes=30
+                ),  # Should get conv_rate=0.5 (before 0.6 was written)
+                now
+                - timedelta(
+                    minutes=30
+                ),  # Should get conv_rate=0.6 (before 0.7 was written)
+                now
+                - timedelta(hours=1),  # Should get conv_rate=0.3 (only data available)
+            ],
+        }
+    )
+
+    job = MongoDBOfflineStoreIbis.get_historical_features(
+        config=repo_config,
+        feature_views=[driver_fv],
+        feature_refs=["driver_stats:conv_rate", "driver_stats:acc_rate"],
+        entity_df=entity_df,
+        registry=MagicMock(),
+        project=repo_config.project,
+        full_feature_names=False,
+    )
+
+    result_df = job.to_df()
+    assert isinstance(result_df, pd.DataFrame)
+    assert len(result_df) == 3
+
+    # Sort by driver_id and event_timestamp for predictable assertions
+    result_df = result_df.sort_values(["driver_id", "event_timestamp"]).reset_index(
+        drop=True
+    )
+
+    # Driver 1, first request (1.5 hours ago) → should get value from 2 hours ago
+    assert result_df.loc[0, "conv_rate"] == pytest.approx(0.5)
+
+    # Driver 1, second request (30 min ago) → should get value from 1 hour ago
+    assert result_df.loc[1, "conv_rate"] == pytest.approx(0.6)
+
+    # Driver 2, request (1 hour ago) → should get value from 2 hours ago
+    assert result_df.loc[2, "conv_rate"] == pytest.approx(0.3)
+
+
+@_requires_docker
+def test_pull_all_from_table_or_query(
+    repo_config: RepoConfig, sample_data: datetime, driver_source: MongoDBSource
+) -> None:
+    """Test pulling all features within a time range (no deduplication)."""
+    now = sample_data
+    job = MongoDBOfflineStoreIbis.pull_all_from_table_or_query(
+        config=repo_config,
+        data_source=driver_source,
+        join_key_columns=["driver_id"],
+        feature_name_columns=["conv_rate", "acc_rate"],
+        timestamp_field="event_timestamp",
+        created_timestamp_column=None,
+        start_date=now - timedelta(hours=1, minutes=30),
+        end_date=now + timedelta(hours=1),
+    )
+
+    df = job.to_df()
+    assert isinstance(df, pd.DataFrame)
+    # Should get 2 rows: driver 1 (1hr ago, now)
+    # Excludes: driver 1 row from 2 hours ago (before start_date)
+    #           driver 2 row from 2 hours ago (before start_date)
+    assert len(df) == 2
+
+
+@_requires_docker
+def test_ttl_excludes_stale_features(
+    repo_config: RepoConfig,
+    mongodb_connection_string: str,
+    driver_source: MongoDBSource,
+) -> None:
+    """Test that TTL causes stale feature values to be returned as NULL.
+
+    Feast TTL (time-to-live) is a query-time filter: if a feature's event_timestamp
+    is older than (entity_timestamp - ttl), that feature is considered stale.
+    This is different from MongoDB TTL indexes which delete documents.
+    """
+    # Insert data with a very old timestamp
+    client: MongoClient = MongoClient(mongodb_connection_string)
+    db = client["feast_test"]
+    collection = db["driver_stats_ttl_test"]
+    collection.drop()
+
+    now = datetime.now(tz=pytz.UTC)
+    docs = [
+        # Fresh data (within TTL)
+        {"driver_id": 1, "conv_rate": 0.9, "event_timestamp": now - timedelta(hours=1)},
+        # Stale data (outside 1-day TTL when queried from "now")
+        {"driver_id": 2, "conv_rate": 0.5, "event_timestamp": now - timedelta(days=2)},
+    ]
+    collection.insert_many(docs)
+    client.close()
+
+    # Create source and feature view with 1-day TTL
+    ttl_source = MongoDBSource(
+        name="driver_stats_ttl_test",
+        database="feast_test",
+        collection="driver_stats_ttl_test",
+        timestamp_field="event_timestamp",
+    )
+    driver_entity = Entity(
+        name="driver_id", join_keys=["driver_id"], value_type=ValueType.INT64
+    )
+    ttl_fv = FeatureView(
+        name="driver_stats_ttl_test",
+        entities=[driver_entity],
+        schema=[
+            Field(name="driver_id", dtype=Int64),
+            Field(name="conv_rate", dtype=Float64),
+        ],
+        source=ttl_source,
+        ttl=timedelta(days=1),  # Features older than 1 day are stale
+    )
+
+    # Request features "as of now" for both drivers
+    entity_df = pd.DataFrame(
+        {
+            "driver_id": [1, 2],
+            "event_timestamp": [now, now],
+        }
+    )
+
+    job = MongoDBOfflineStoreIbis.get_historical_features(
+        config=repo_config,
+        feature_views=[ttl_fv],
+        feature_refs=["driver_stats_ttl_test:conv_rate"],
+        entity_df=entity_df,
+        registry=MagicMock(),
+        project=repo_config.project,
+        full_feature_names=False,
+    )
+
+    result_df = job.to_df().sort_values("driver_id").reset_index(drop=True)
+
+    # Driver 1: fresh data within TTL → should have value
+    assert result_df.loc[0, "conv_rate"] == pytest.approx(0.9)
+
+    # Driver 2: stale data outside TTL → should be NULL
+    assert pd.isna(result_df.loc[1, "conv_rate"])

From ec2e7ba7b1828ef04f6abedc9981c0747e06c19e Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Tue, 17 Mar 2026 12:42:09 -0400
Subject: [PATCH 06/30] Added test of multiple feature views and compound join
 keys

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../contrib/test_mongodb_offline_retrieval.py | 262 +++++++++++++++++-
 1 file changed, 261 insertions(+), 1 deletion(-)

diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py b/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
index cd83e33c0d..225d18d3e9 100644
--- a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
@@ -27,7 +27,7 @@
     MongoDBSource,
 )
 from feast.repo_config import RepoConfig
-from feast.types import Float64, Int64
+from feast.types import Float64, Int64, String
 from feast.value_type import ValueType
 
 # Check if Docker is available
@@ -386,3 +386,263 @@ def test_ttl_excludes_stale_features(
 
     # Driver 2: stale data outside TTL → should be NULL
     assert pd.isna(result_df.loc[1, "conv_rate"])
+
+
+@_requires_docker
+def test_multiple_feature_views(
+    repo_config: RepoConfig, mongodb_connection_string: str
+) -> None:
+    """Test joining features from multiple MongoDB collections/FeatureViews.
+
+    This simulates a real-world scenario where features come from different
+    data sources (e.g., driver stats from one collection, vehicle stats from another).
+    """
+    client: MongoClient = MongoClient(mongodb_connection_string)
+    db = client["feast_test"]
+
+    # Collection 1: Driver stats
+    driver_collection = db["driver_stats_multi"]
+    driver_collection.drop()
+    now = datetime.now(tz=pytz.UTC)
+    driver_docs = [
+        {"driver_id": 1, "rating": 4.8, "event_timestamp": now - timedelta(hours=1)},
+        {"driver_id": 2, "rating": 4.5, "event_timestamp": now - timedelta(hours=1)},
+    ]
+    driver_collection.insert_many(driver_docs)
+
+    # Collection 2: Vehicle stats (same driver_id, different features)
+    vehicle_collection = db["vehicle_stats_multi"]
+    vehicle_collection.drop()
+    vehicle_docs = [
+        {
+            "driver_id": 1,
+            "vehicle_age": 2,
+            "mileage": 50000,
+            "event_timestamp": now - timedelta(hours=1),
+        },
+        {
+            "driver_id": 2,
+            "vehicle_age": 5,
+            "mileage": 120000,
+            "event_timestamp": now - timedelta(hours=1),
+        },
+    ]
+    vehicle_collection.insert_many(vehicle_docs)
+    client.close()
+
+    # Create sources for each collection
+    driver_source = MongoDBSource(
+        name="driver_stats_multi",
+        database="feast_test",
+        collection="driver_stats_multi",
+        timestamp_field="event_timestamp",
+    )
+    vehicle_source = MongoDBSource(
+        name="vehicle_stats_multi",
+        database="feast_test",
+        collection="vehicle_stats_multi",
+        timestamp_field="event_timestamp",
+    )
+
+    # Create entities and feature views
+    driver_entity = Entity(
+        name="driver_id", join_keys=["driver_id"], value_type=ValueType.INT64
+    )
+
+    driver_fv = FeatureView(
+        name="driver_stats_multi",
+        entities=[driver_entity],
+        schema=[
+            Field(name="driver_id", dtype=Int64),
+            Field(name="rating", dtype=Float64),
+        ],
+        source=driver_source,
+        ttl=timedelta(days=1),
+    )
+
+    vehicle_fv = FeatureView(
+        name="vehicle_stats_multi",
+        entities=[
+            driver_entity
+        ],  # todo these two FeatureViews have the same entities list [driver_entity]
+        schema=[
+            Field(name="driver_id", dtype=Int64),
+            Field(name="vehicle_age", dtype=Int64),
+            Field(name="mileage", dtype=Int64),
+        ],
+        source=vehicle_source,
+        ttl=timedelta(days=1),
+    )
+
+    # Entity dataframe requesting features for both drivers
+    entity_df = pd.DataFrame(
+        {
+            "driver_id": [1, 2],
+            "event_timestamp": [now, now],
+        }
+    )
+
+    # Request features from BOTH feature views
+    job = MongoDBOfflineStoreIbis.get_historical_features(
+        config=repo_config,
+        feature_views=[driver_fv, vehicle_fv],
+        feature_refs=[
+            "driver_stats_multi:rating",
+            "vehicle_stats_multi:vehicle_age",
+            "vehicle_stats_multi:mileage",
+        ],
+        entity_df=entity_df,
+        registry=MagicMock(),
+        project=repo_config.project,
+        full_feature_names=False,
+    )
+
+    result_df = job.to_df().sort_values("driver_id").reset_index(drop=True)
+
+    # Verify we got features from both collections joined correctly
+    assert len(result_df) == 2
+    assert set(result_df.columns) >= {"driver_id", "rating", "vehicle_age", "mileage"}
+
+    # Driver 1
+    assert result_df.loc[0, "rating"] == pytest.approx(4.8)
+    assert result_df.loc[0, "vehicle_age"] == 2
+    assert result_df.loc[0, "mileage"] == 50000
+
+    # Driver 2
+    assert result_df.loc[1, "rating"] == pytest.approx(4.5)
+    assert result_df.loc[1, "vehicle_age"] == 5
+    assert result_df.loc[1, "mileage"] == 120000
+
+
+@_requires_docker
+def test_compound_join_keys(
+    repo_config: RepoConfig, mongodb_connection_string: str
+) -> None:
+    """Test with compound/composite join keys (multiple entity columns).
+
+    This tests scenarios where entities are identified by multiple keys,
+    e.g., (user_id, device_id) or (store_id, product_id).
+    """
+    client: MongoClient = MongoClient(mongodb_connection_string)
+    db = client["feast_test"]
+
+    # Create collection with compound key (user_id + device_id)
+    collection = db["user_device_features"]
+    collection.drop()
+    now = datetime.now(tz=pytz.UTC)
+
+    # Same user_id can have different device_ids with different features
+    docs = [
+        {
+            "user_id": 1,
+            "device_id": "mobile",
+            "app_opens": 50,
+            "event_timestamp": now - timedelta(hours=2),
+        },
+        {
+            "user_id": 1,
+            "device_id": "mobile",
+            "app_opens": 55,
+            "event_timestamp": now - timedelta(hours=1),
+        },
+        {
+            "user_id": 1,
+            "device_id": "desktop",
+            "app_opens": 10,
+            "event_timestamp": now - timedelta(hours=1),
+        },
+        {
+            "user_id": 2,
+            "device_id": "mobile",
+            "app_opens": 100,
+            "event_timestamp": now - timedelta(hours=1),
+        },
+        {
+            "user_id": 2,
+            "device_id": "tablet",
+            "app_opens": 25,
+            "event_timestamp": now - timedelta(hours=1),
+        },
+    ]
+    collection.insert_many(docs)
+    client.close()
+
+    # Create source
+    source = MongoDBSource(
+        name="user_device_features",
+        database="feast_test",
+        collection="user_device_features",
+        timestamp_field="event_timestamp",
+    )
+
+    # Create entities with compound keys
+    user_entity = Entity(
+        name="user_id", join_keys=["user_id"], value_type=ValueType.INT64
+    )
+    device_entity = Entity(
+        name="device_id", join_keys=["device_id"], value_type=ValueType.STRING
+    )
+
+    fv = FeatureView(
+        name="user_device_features",
+        entities=[user_entity, device_entity],
+        schema=[
+            Field(name="user_id", dtype=Int64),
+            Field(name="device_id", dtype=String),
+            Field(name="app_opens", dtype=Int64),
+        ],
+        source=source,
+        ttl=timedelta(days=1),
+    )
+
+    # Test pull_latest: should get one row per unique (user_id, device_id) combination
+    job = MongoDBOfflineStoreIbis.pull_latest_from_table_or_query(
+        config=repo_config,
+        data_source=source,
+        join_key_columns=["user_id", "device_id"],
+        feature_name_columns=["app_opens"],
+        timestamp_field="event_timestamp",
+        created_timestamp_column=None,
+        start_date=now - timedelta(days=1),
+        end_date=now + timedelta(hours=1),
+    )
+
+    df = job.to_df()
+    assert len(df) == 4  # 4 unique (user_id, device_id) combinations
+
+    # Verify user 1, mobile got the LATEST value (55, not 50)
+    user1_mobile = df[(df["user_id"] == 1) & (df["device_id"] == "mobile")]
+    assert len(user1_mobile) == 1
+    assert user1_mobile.iloc[0]["app_opens"] == 55
+
+    # Test get_historical_features with compound keys
+    entity_df = pd.DataFrame(
+        {
+            "user_id": [1, 1, 2],
+            "device_id": ["mobile", "desktop", "tablet"],
+            "event_timestamp": [now, now, now],
+        }
+    )
+
+    job = MongoDBOfflineStoreIbis.get_historical_features(
+        config=repo_config,
+        feature_views=[fv],
+        feature_refs=["user_device_features:app_opens"],
+        entity_df=entity_df,
+        registry=MagicMock(),
+        project=repo_config.project,
+        full_feature_names=False,
+    )
+
+    result_df = job.to_df()
+    assert len(result_df) == 3
+
+    # Sort for predictable assertions
+    result_df = result_df.sort_values(["user_id", "device_id"]).reset_index(drop=True)
+
+    # user 1, desktop
+    assert result_df.loc[0, "app_opens"] == 10
+    # user 1, mobile (latest value)
+    assert result_df.loc[1, "app_opens"] == 55
+    # user 2, tablet
+    assert result_df.loc[2, "app_opens"] == 25

From a4d2886138e2efc756e766660ef683c6d42f3c2f Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Tue, 17 Mar 2026 13:58:58 -0400
Subject: [PATCH 07/30] Initial implementation of native single-collection
 offline store

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../mongodb_offline_store/mongodb_native.py   | 622 ++++++++++++++++++
 .../test_mongodb_offline_retrieval_native.py  | 609 +++++++++++++++++
 2 files changed, 1231 insertions(+)
 create mode 100644 sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
 create mode 100644 sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval_native.py

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
new file mode 100644
index 0000000000..a6f0a8acfc
--- /dev/null
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
@@ -0,0 +1,622 @@
+# Copyright 2026 The Feast Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Native MongoDB Offline Store Implementation.
+
+This module implements a MongoDB offline store using native MQL aggregation
+pipelines. It uses a single-collection schema where all feature views share
+one collection, discriminated by a ``feature_view`` field.
+
+Schema:
+    {
+        "_id": ObjectId(),
+        "entity_id": "<serialized_entity_key>",
+        "feature_view": "driver_stats",
+        "features": {
+            "rating": 4.91,
+            "trips_last_7d": 132
+        },
+        "event_timestamp": ISODate("2026-01-20T12:00:00Z"),
+        "created_at": ISODate("2026-01-20T12:00:05Z")
+    }
+
+Recommended Index:
+    db.feature_history.create_index([
+        ("entity_id", ASCENDING),
+        ("feature_view", ASCENDING),
+        ("event_timestamp", DESCENDING),
+    ])
+"""
+
+import json
+import warnings
+from datetime import datetime, timezone
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
+
+import pandas as pd
+import pyarrow
+
+try:
+    from pymongo import MongoClient
+except ImportError:
+    MongoClient = None  # type: ignore[assignment,misc]
+
+from pydantic import StrictStr
+
+from feast.data_source import DataSource
+from feast.errors import DataSourceNoNameException, FeastExtrasDependencyImportError
+from feast.feature_view import FeatureView
+from feast.infra.key_encoding_utils import serialize_entity_key
+from feast.infra.offline_stores.offline_store import (
+    OfflineStore,
+    RetrievalJob,
+    RetrievalMetadata,
+)
+from feast.infra.offline_stores.offline_utils import (
+    infer_event_timestamp_from_entity_df,
+)
+from feast.infra.registry.base_registry import BaseRegistry
+from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
+from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
+from feast.protos.feast.types.Value_pb2 import Value as ValueProto
+from feast.repo_config import FeastConfigBaseModel, RepoConfig
+from feast.type_map import mongodb_to_feast_value_type
+from feast.value_type import ValueType
+
+
+class MongoDBOfflineStoreNativeConfig(FeastConfigBaseModel):
+    """Configuration for the Native MongoDB offline store.
+
+    Uses a single shared collection for all feature views, with documents
+    containing an ``entity_id``, ``feature_view`` discriminator, and nested
+    ``features`` subdocument.
+    """
+
+    type: StrictStr = "feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_native.MongoDBOfflineStoreNative"
+    """Offline store type selector"""
+
+    connection_string: StrictStr = "mongodb://localhost:27017"
+    """MongoDB connection URI"""
+
+    database: StrictStr = "feast"
+    """MongoDB database name"""
+
+    collection: StrictStr = "feature_history"
+    """Single collection name for all feature views"""
+
+
+class MongoDBSourceNative(DataSource):
+    """A MongoDB data source for the Native offline store.
+
+    Unlike MongoDBSource (Ibis), this source does not specify a collection
+    per FeatureView. Instead, all FeatureViews share a single collection
+    (configured at the store level), and are discriminated by the
+    ``feature_view`` field in each document.
+
+    The ``name`` parameter becomes the ``feature_view`` discriminator value
+    used to filter documents in queries.
+    """
+
+    def __init__(
+        self,
+        name: Optional[str] = None,
+        timestamp_field: str = "event_timestamp",
+        created_timestamp_column: str = "created_at",
+        field_mapping: Optional[Dict[str, str]] = None,
+        description: Optional[str] = "",
+        tags: Optional[Dict[str, str]] = None,
+        owner: Optional[str] = "",
+    ):
+        if name is None:
+            raise DataSourceNoNameException()
+
+        super().__init__(
+            name=name,
+            timestamp_field=timestamp_field,
+            created_timestamp_column=created_timestamp_column,
+            field_mapping=field_mapping,
+            description=description,
+            tags=tags,
+            owner=owner,
+        )
+
+    def __hash__(self):
+        return super().__hash__()
+
+    def __eq__(self, other):
+        if not isinstance(other, MongoDBSourceNative):
+            raise TypeError(
+                "Comparisons should only involve MongoDBSourceNative class objects."
+            )
+        return (
+            super().__eq__(other)
+            and self.timestamp_field == other.timestamp_field
+            and self.created_timestamp_column == other.created_timestamp_column
+            and self.field_mapping == other.field_mapping
+        )
+
+    @property
+    def feature_view_name(self) -> str:
+        """The feature_view discriminator value (same as source name)."""
+        return self.name
+
+    def source_type(self) -> DataSourceProto.SourceType.ValueType:
+        return DataSourceProto.CUSTOM_SOURCE
+
+    @staticmethod
+    def from_proto(data_source: DataSourceProto) -> "MongoDBSourceNative":
+        assert data_source.HasField("custom_options")
+        return MongoDBSourceNative(
+            name=data_source.name,
+            timestamp_field=data_source.timestamp_field,
+            created_timestamp_column=data_source.created_timestamp_column,
+            field_mapping=dict(data_source.field_mapping),
+            description=data_source.description,
+            tags=dict(data_source.tags),
+            owner=data_source.owner,
+        )
+
+    def _to_proto_impl(self) -> DataSourceProto:
+        return DataSourceProto(
+            name=self.name,
+            type=DataSourceProto.CUSTOM_SOURCE,
+            data_source_class_type="feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_native.MongoDBSourceNative",
+            field_mapping=self.field_mapping,
+            custom_options=DataSourceProto.CustomSourceOptions(
+                configuration=json.dumps({"feature_view": self.name}).encode()
+            ),
+            description=self.description,
+            tags=self.tags,
+            owner=self.owner,
+            timestamp_field=self.timestamp_field,
+            created_timestamp_column=self.created_timestamp_column,
+        )
+
+    def validate(self, config: RepoConfig):
+        pass
+
+    @staticmethod
+    def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]:
+        return mongodb_to_feast_value_type
+
+    def get_table_query_string(self) -> str:
+        return f"feature_history[feature_view={self.name}]"
+
+    def get_table_column_names_and_types(
+        self, config: RepoConfig
+    ) -> Iterable[Tuple[str, str]]:
+        """Sample documents to infer feature names and types.
+
+        Queries documents matching this source's feature_view name and
+        inspects the ``features`` subdocument to determine schema.
+        """
+        if MongoClient is None:
+            raise FeastExtrasDependencyImportError(
+                "mongodb", "pymongo is not installed."
+            )
+        connection_string = config.offline_store.connection_string
+        db_name = config.offline_store.database
+        collection_name = config.offline_store.collection
+        client: Any = MongoClient(connection_string)
+        try:
+            pipeline = [
+                {"$match": {"feature_view": self.name}},
+                {"$sample": {"size": 100}},
+            ]
+            docs = list(client[db_name][collection_name].aggregate(pipeline))
+        finally:
+            client.close()
+
+        field_type_counts: Dict[str, Dict[str, int]] = {}
+        for doc in docs:
+            features = doc.get("features", {})
+            for field, value in features.items():
+                type_str = _infer_python_type_str(value)
+                if type_str is None:
+                    continue
+                field_type_counts.setdefault(field, {})
+                field_type_counts[field][type_str] = (
+                    field_type_counts[field].get(type_str, 0) + 1
+                )
+
+        return [
+            (field, max(counts, key=lambda t: counts[t]))
+            for field, counts in field_type_counts.items()
+        ]
+
+
+def _infer_python_type_str(value: Any) -> Optional[str]:
+    """Infer a Feast-compatible type string from a Python value."""
+    if value is None:
+        return None
+    if isinstance(value, bool):
+        return "bool"
+    if isinstance(value, int):
+        return "int"
+    if isinstance(value, float):
+        return "float"
+    if isinstance(value, str):
+        return "str"
+    if isinstance(value, bytes):
+        return "bytes"
+    if isinstance(value, datetime):
+        return "datetime"
+    if isinstance(value, list):
+        if not value:
+            return "list[str]"
+        elem_type = _infer_python_type_str(value[0])
+        if elem_type:
+            return f"list[{elem_type}]"
+        return "list[str]"
+    return None
+
+
+def _fetch_documents(
+    connection_string: str,
+    database: str,
+    collection: str,
+    pipeline: List[Dict],
+) -> List[Dict]:
+    """Execute an aggregation pipeline and return documents."""
+    if MongoClient is None:
+        raise FeastExtrasDependencyImportError("mongodb", "pymongo is not installed.")
+    client: Any = MongoClient(connection_string)
+    try:
+        return list(client[database][collection].aggregate(pipeline))
+    finally:
+        client.close()
+
+
+class MongoDBNativeRetrievalJob(RetrievalJob):
+    """Retrieval job for native MongoDB offline store queries."""
+
+    def __init__(
+        self,
+        query_fn: Callable[[], pyarrow.Table],
+        full_feature_names: bool,
+        on_demand_feature_views: Optional[List[Any]] = None,
+        metadata: Optional[RetrievalMetadata] = None,
+    ):
+        self._query_fn = query_fn
+        self._full_feature_names = full_feature_names
+        self._on_demand_feature_views = on_demand_feature_views or []
+        self._metadata = metadata
+
+    @property
+    def full_feature_names(self) -> bool:
+        return self._full_feature_names
+
+    @property
+    def on_demand_feature_views(self) -> List[Any]:
+        return self._on_demand_feature_views
+
+    def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame:
+        return self._to_arrow_internal(timeout).to_pandas()
+
+    def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table:
+        return self._query_fn()
+
+    @property
+    def metadata(self) -> Optional[RetrievalMetadata]:
+        return self._metadata
+
+    def persist(
+        self,
+        storage: Any,
+        allow_overwrite: bool = False,
+        timeout: Optional[int] = None,
+    ) -> None:
+        # TODO: Implement persist for native store
+        raise NotImplementedError("persist() not yet implemented for native store")
+
+
+def _serialize_entity_key_from_row(
+    row: pd.Series, join_keys: List[str], entity_key_serialization_version: int
+) -> bytes:
+    """Serialize entity key from a DataFrame row."""
+    entity_key = EntityKeyProto()
+    for key in sorted(join_keys):
+        entity_key.join_keys.append(key)
+        value = row[key]
+        val = ValueProto()
+        if isinstance(value, int):
+            val.int64_val = value
+        elif isinstance(value, str):
+            val.string_val = value
+        elif isinstance(value, float):
+            val.double_val = value
+        else:
+            val.string_val = str(value)
+        entity_key.entity_values.append(val)
+    return serialize_entity_key(entity_key, entity_key_serialization_version)
+
+
+class MongoDBOfflineStoreNative(OfflineStore):
+    """Native MongoDB offline store using single-collection schema.
+
+    All feature views share one collection (``feature_history``), with documents
+    containing:
+    - ``entity_id``: serialized entity key (bytes)
+    - ``feature_view``: discriminator field matching FeatureView name
+    - ``features``: subdocument with feature name/value pairs
+    - ``event_timestamp``: event time
+    - ``created_at``: ingestion time
+    """
+
+    @staticmethod
+    def pull_latest_from_table_or_query(
+        config: RepoConfig,
+        data_source: DataSource,
+        join_key_columns: List[str],
+        feature_name_columns: List[str],
+        timestamp_field: str,
+        created_timestamp_column: Optional[str],
+        start_date: datetime,
+        end_date: datetime,
+    ) -> RetrievalJob:
+        if not isinstance(data_source, MongoDBSourceNative):
+            raise ValueError(
+                f"MongoDBOfflineStoreNative expected MongoDBSourceNative, "
+                f"got {type(data_source).__name__!r}."
+            )
+        warnings.warn(
+            "MongoDB offline store (native) is in preview. API may change without notice.",
+            RuntimeWarning,
+        )
+
+        connection_string = config.offline_store.connection_string
+        db_name = config.offline_store.database
+        collection = config.offline_store.collection
+        feature_view_name = data_source.feature_view_name
+
+        start_utc = start_date.astimezone(tz=timezone.utc)
+        end_utc = end_date.astimezone(tz=timezone.utc)
+
+        # Build aggregation pipeline
+        pipeline: List[Dict[str, Any]] = [
+            {
+                "$match": {
+                    "feature_view": feature_view_name,
+                    "event_timestamp": {"$gte": start_utc, "$lte": end_utc},
+                }
+            },
+            {"$sort": {"entity_id": 1, "event_timestamp": -1}},
+            {
+                "$group": {
+                    "_id": "$entity_id",
+                    "doc": {"$first": "$$ROOT"},
+                }
+            },
+        ]
+
+        def _run() -> pyarrow.Table:
+            docs = _fetch_documents(connection_string, db_name, collection, pipeline)
+            if not docs:
+                return pyarrow.Table.from_pydict({})
+
+            # Flatten documents
+            rows = []
+            for d in docs:
+                doc = d["doc"]
+                row = {
+                    "entity_id": doc["entity_id"],
+                    "event_timestamp": doc["event_timestamp"],
+                }
+                features = doc.get("features", {})
+                for feat in feature_name_columns:
+                    row[feat] = features.get(feat)
+                rows.append(row)
+
+            df = pd.DataFrame(rows)
+            # Ensure timestamp is tz-aware
+            if not df.empty and df["event_timestamp"].dt.tz is None:
+                df["event_timestamp"] = pd.to_datetime(df["event_timestamp"], utc=True)
+            return pyarrow.Table.from_pandas(df, preserve_index=False)
+
+        return MongoDBNativeRetrievalJob(query_fn=_run, full_feature_names=False)
+
+    @staticmethod
+    def pull_all_from_table_or_query(
+        config: RepoConfig,
+        data_source: DataSource,
+        join_key_columns: List[str],
+        feature_name_columns: List[str],
+        timestamp_field: str,
+        created_timestamp_column: Optional[str] = None,
+        start_date: Optional[datetime] = None,
+        end_date: Optional[datetime] = None,
+    ) -> RetrievalJob:
+        if not isinstance(data_source, MongoDBSourceNative):
+            raise ValueError(
+                f"MongoDBOfflineStoreNative expected MongoDBSourceNative, "
+                f"got {type(data_source).__name__!r}."
+            )
+        warnings.warn(
+            "MongoDB offline store (native) is in preview. API may change without notice.",
+            RuntimeWarning,
+        )
+
+        connection_string = config.offline_store.connection_string
+        db_name = config.offline_store.database
+        collection = config.offline_store.collection
+        feature_view_name = data_source.feature_view_name
+
+        # Build match filter
+        match_filter: Dict[str, Any] = {"feature_view": feature_view_name}
+        if start_date or end_date:
+            ts_filter: Dict[str, Any] = {}
+            if start_date:
+                ts_filter["$gte"] = start_date.astimezone(tz=timezone.utc)
+            if end_date:
+                ts_filter["$lte"] = end_date.astimezone(tz=timezone.utc)
+            match_filter["event_timestamp"] = ts_filter
+
+        pipeline = [{"$match": match_filter}]
+
+        def _run() -> pyarrow.Table:
+            docs = _fetch_documents(connection_string, db_name, collection, pipeline)
+            if not docs:
+                return pyarrow.Table.from_pydict({})
+
+            rows = []
+            for doc in docs:
+                row = {
+                    "entity_id": doc["entity_id"],
+                    "event_timestamp": doc["event_timestamp"],
+                }
+                features = doc.get("features", {})
+                for feat in feature_name_columns:
+                    row[feat] = features.get(feat)
+                rows.append(row)
+
+            df = pd.DataFrame(rows)
+            if not df.empty and df["event_timestamp"].dt.tz is None:
+                df["event_timestamp"] = pd.to_datetime(df["event_timestamp"], utc=True)
+            return pyarrow.Table.from_pandas(df, preserve_index=False)
+
+        return MongoDBNativeRetrievalJob(query_fn=_run, full_feature_names=False)
+
+    @staticmethod
+    def get_historical_features(
+        config: RepoConfig,
+        feature_views: List[FeatureView],
+        feature_refs: List[str],
+        entity_df: Union[pd.DataFrame, str],
+        registry: BaseRegistry,
+        project: str,
+        full_feature_names: bool = False,
+    ) -> RetrievalJob:
+        if isinstance(entity_df, str):
+            raise ValueError(
+                "MongoDBOfflineStoreNative does not support SQL entity_df strings. "
+                "Pass a pandas DataFrame instead."
+            )
+        warnings.warn(
+            "MongoDB offline store (native) is in preview. API may change without notice.",
+            RuntimeWarning,
+        )
+
+        connection_string = config.offline_store.connection_string
+        db_name = config.offline_store.database
+        collection = config.offline_store.collection
+        entity_key_version = config.entity_key_serialization_version
+
+        entity_schema = dict(zip(entity_df.columns, entity_df.dtypes))
+        event_timestamp_col = infer_event_timestamp_from_entity_df(entity_schema)
+
+        # Map "feature_view:feature" refs → {fv_name: [feature, ...]}
+        fv_to_features: Dict[str, List[str]] = {}
+        for ref in feature_refs:
+            fv_name, feat_name = ref.split(":", 1)
+            fv_to_features.setdefault(fv_name, []).append(feat_name)
+
+        fv_by_name = {fv.name: fv for fv in feature_views}
+
+        def _run() -> pyarrow.Table:
+            result = entity_df.copy()
+
+            # Ensure entity timestamp is tz-aware UTC
+            if result[event_timestamp_col].dt.tz is None:
+                result[event_timestamp_col] = pd.to_datetime(
+                    result[event_timestamp_col], utc=True
+                )
+            result = result.sort_values(event_timestamp_col)
+
+            # Get join keys from entity_df columns (excluding event_timestamp)
+            entity_columns = [c for c in result.columns if c != event_timestamp_col]
+
+            # Serialize entity keys for lookup
+            result["_entity_id"] = result.apply(
+                lambda row: _serialize_entity_key_from_row(
+                    row, entity_columns, entity_key_version
+                ),
+                axis=1,
+            )
+
+            for fv_name, features in fv_to_features.items():
+                fv = fv_by_name[fv_name]
+                source = fv.batch_source
+                if not isinstance(source, MongoDBSourceNative):
+                    raise ValueError(
+                        f"MongoDBOfflineStoreNative: feature view {fv_name!r} has "
+                        f"non-MongoDBSourceNative source ({type(source).__name__!r})."
+                    )
+
+                # Fetch all documents for this feature view
+                pipeline = [{"$match": {"feature_view": fv_name}}]
+                docs = _fetch_documents(
+                    connection_string, db_name, collection, pipeline
+                )
+
+                if not docs:
+                    for f in features:
+                        col = f"{fv_name}__{f}" if full_feature_names else f
+                        result[col] = None
+                    continue
+
+                # Build feature DataFrame
+                feature_rows = []
+                for doc in docs:
+                    row = {
+                        "_entity_id": doc["entity_id"],
+                        "_fv_ts": doc["event_timestamp"],
+                    }
+                    feat_data = doc.get("features", {})
+                    for f in features:
+                        row[f] = feat_data.get(f)
+                    feature_rows.append(row)
+
+                feature_df = pd.DataFrame(feature_rows)
+                if feature_df["_fv_ts"].dt.tz is None:
+                    feature_df["_fv_ts"] = pd.to_datetime(
+                        feature_df["_fv_ts"], utc=True
+                    )
+                feature_df = feature_df.sort_values("_fv_ts")
+
+                # Rename features if full_feature_names
+                col_rename = {
+                    f: (f"{fv_name}__{f}" if full_feature_names else f)
+                    for f in features
+                }
+                feature_df = feature_df.rename(columns=col_rename)
+                out_features = list(col_rename.values())
+
+                # Point-in-time join using merge_asof
+                merged = pd.merge_asof(
+                    result,
+                    feature_df,
+                    left_on=event_timestamp_col,
+                    right_on="_fv_ts",
+                    by="_entity_id",
+                    direction="backward",
+                )
+
+                # Apply TTL: null out stale features
+                if fv.ttl:
+                    cutoff = merged[event_timestamp_col] - fv.ttl
+                    too_old = merged["_fv_ts"] < cutoff
+                    for col in out_features:
+                        merged.loc[too_old, col] = None
+
+                result = merged.drop(columns=["_fv_ts"], errors="ignore")
+
+            # Remove internal entity_id column
+            result = result.drop(columns=["_entity_id"], errors="ignore")
+            return pyarrow.Table.from_pandas(result, preserve_index=False)
+
+        return MongoDBNativeRetrievalJob(
+            query_fn=_run,
+            full_feature_names=full_feature_names,
+        )
diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval_native.py b/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval_native.py
new file mode 100644
index 0000000000..5c02299254
--- /dev/null
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval_native.py
@@ -0,0 +1,609 @@
+"""
+Unit tests for MongoDB Native offline store implementation.
+
+This tests the single-collection schema where all feature views share one
+collection (``feature_history``), discriminated by ``feature_view`` field.
+
+Schema:
+    {
+        "entity_id": bytes,  # serialized entity key
+        "feature_view": str,
+        "features": { "feat1": val, ... },
+        "event_timestamp": datetime,
+        "created_at": datetime
+    }
+
+Docker-dependent tests are marked with ``@_requires_docker`` and are skipped
+when Docker is unavailable.
+"""
+
+from datetime import datetime, timedelta
+from typing import Generator
+from unittest.mock import MagicMock
+
+import pandas as pd
+import pytest
+import pytz
+
+pytest.importorskip("pymongo")
+
+from pymongo import MongoClient
+from testcontainers.mongodb import MongoDbContainer
+
+from feast import Entity, FeatureView, Field
+from feast.infra.key_encoding_utils import serialize_entity_key
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_native import (
+    MongoDBOfflineStoreNative,
+    MongoDBOfflineStoreNativeConfig,
+    MongoDBSourceNative,
+)
+from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
+from feast.protos.feast.types.Value_pb2 import Value as ValueProto
+from feast.repo_config import RepoConfig
+from feast.types import Float64, Int64, String
+from feast.value_type import ValueType
+
+# Check if Docker is available
+docker_available = False
+try:
+    import docker
+
+    try:
+        client = docker.from_env()
+        client.ping()
+        docker_available = True
+    except Exception:
+        pass
+except ImportError:
+    pass
+
+_requires_docker = pytest.mark.skipif(
+    not docker_available,
+    reason="Docker is not available or not running.",
+)
+
+ENTITY_KEY_VERSION = 3
+
+
+def _make_entity_id(join_keys: dict) -> bytes:
+    """Create serialized entity key from join key dict."""
+    entity_key = EntityKeyProto()
+    for key in sorted(join_keys.keys()):
+        entity_key.join_keys.append(key)
+        val = ValueProto()
+        value = join_keys[key]
+        if isinstance(value, int):
+            val.int64_val = value
+        elif isinstance(value, str):
+            val.string_val = value
+        else:
+            val.string_val = str(value)
+        entity_key.entity_values.append(val)
+    return serialize_entity_key(entity_key, ENTITY_KEY_VERSION)
+
+
+@pytest.fixture(scope="module")
+def mongodb_container() -> Generator[MongoDbContainer, None, None]:
+    """Start a MongoDB container for testing."""
+    container = MongoDbContainer(
+        "mongo:latest",
+        username="test",
+        password="test",  # pragma: allowlist secret
+    ).with_exposed_ports(27017)
+    container.start()
+    yield container
+    container.stop()
+
+
+@pytest.fixture
+def mongodb_connection_string(mongodb_container: MongoDbContainer) -> str:
+    """Get MongoDB connection string from the container."""
+    exposed_port = mongodb_container.get_exposed_port(27017)
+    return f"mongodb://test:test@localhost:{exposed_port}"  # pragma: allowlist secret
+
+
+@pytest.fixture
+def repo_config(mongodb_connection_string: str) -> RepoConfig:
+    """Create a RepoConfig with MongoDB Native offline store."""
+    return RepoConfig(
+        project="test_project",
+        registry="memory://",
+        provider="local",
+        offline_store=MongoDBOfflineStoreNativeConfig(
+            connection_string=mongodb_connection_string,
+            database="feast_test",
+            collection="feature_history",
+        ),
+        online_store={"type": "sqlite"},
+        entity_key_serialization_version=ENTITY_KEY_VERSION,
+    )
+
+
+@pytest.fixture
+def sample_data(mongodb_connection_string: str) -> datetime:
+    """Insert sample data using the single-collection schema.
+
+    Creates documents for 'driver_stats' feature view with entity_id,
+    feature_view discriminator, and nested features subdocument.
+    """
+    client: MongoClient = MongoClient(mongodb_connection_string)
+    db = client["feast_test"]
+    collection = db["feature_history"]
+    collection.drop()
+
+    now = datetime.now(tz=pytz.UTC)
+
+    # Create documents using the native schema
+    docs = [
+        {
+            "entity_id": _make_entity_id({"driver_id": 1}),
+            "feature_view": "driver_stats",
+            "features": {"conv_rate": 0.5, "acc_rate": 0.9},
+            "event_timestamp": now - timedelta(hours=2),
+            "created_at": now - timedelta(hours=2),
+        },
+        {
+            "entity_id": _make_entity_id({"driver_id": 1}),
+            "feature_view": "driver_stats",
+            "features": {"conv_rate": 0.6, "acc_rate": 0.85},
+            "event_timestamp": now - timedelta(hours=1),
+            "created_at": now - timedelta(hours=1),
+        },
+        {
+            "entity_id": _make_entity_id({"driver_id": 1}),
+            "feature_view": "driver_stats",
+            "features": {"conv_rate": 0.7, "acc_rate": 0.8},
+            "event_timestamp": now,
+            "created_at": now,
+        },
+        {
+            "entity_id": _make_entity_id({"driver_id": 2}),
+            "feature_view": "driver_stats",
+            "features": {"conv_rate": 0.3, "acc_rate": 0.95},
+            "event_timestamp": now - timedelta(hours=2),
+            "created_at": now - timedelta(hours=2),
+        },
+    ]
+    collection.insert_many(docs)
+    client.close()
+    return now
+
+
+@pytest.fixture
+def driver_source() -> MongoDBSourceNative:
+    """Create a MongoDBSourceNative for driver stats."""
+    return MongoDBSourceNative(
+        name="driver_stats",
+        timestamp_field="event_timestamp",
+        created_timestamp_column="created_at",
+    )
+
+
+@pytest.fixture
+def driver_fv(driver_source: MongoDBSourceNative) -> FeatureView:
+    """Create a FeatureView for driver stats."""
+    driver_entity = Entity(
+        name="driver_id", join_keys=["driver_id"], value_type=ValueType.INT64
+    )
+    return FeatureView(
+        name="driver_stats",
+        entities=[driver_entity],
+        schema=[
+            Field(name="driver_id", dtype=Int64),
+            Field(name="conv_rate", dtype=Float64),
+            Field(name="acc_rate", dtype=Float64),
+        ],
+        source=driver_source,
+        ttl=timedelta(days=1),
+    )
+
+
+@_requires_docker
+def test_pull_latest_from_table_or_query(
+    repo_config: RepoConfig, sample_data: datetime, driver_source: MongoDBSourceNative
+) -> None:
+    """Test pulling latest features per entity from the single collection."""
+    now = sample_data
+    job = MongoDBOfflineStoreNative.pull_latest_from_table_or_query(
+        config=repo_config,
+        data_source=driver_source,
+        join_key_columns=["driver_id"],
+        feature_name_columns=["conv_rate", "acc_rate"],
+        timestamp_field="event_timestamp",
+        created_timestamp_column="created_at",
+        start_date=now - timedelta(days=1),
+        end_date=now + timedelta(hours=1),
+    )
+
+    df = job.to_df()
+
+    assert isinstance(df, pd.DataFrame)
+    assert len(df) == 2  # Two unique entity_ids
+
+    # Sort by entity_id for predictable assertions
+    # Note: entity_id is bytes, so we check features directly
+    conv_rates = sorted(df["conv_rate"].tolist())
+    assert conv_rates[0] == pytest.approx(0.3)  # Driver 2's only value
+    assert conv_rates[1] == pytest.approx(0.7)  # Driver 1's latest value
+
+
+@_requires_docker
+def test_get_historical_features_pit_join(
+    repo_config: RepoConfig, sample_data: datetime, driver_fv: FeatureView
+) -> None:
+    """Test point-in-time join retrieves correct feature values."""
+    now = sample_data
+
+    # Entity dataframe with driver_id column (must match join keys)
+    entity_df = pd.DataFrame(
+        {
+            "driver_id": [1, 1, 2],
+            "event_timestamp": [
+                now - timedelta(hours=1, minutes=30),  # Should get conv_rate=0.5
+                now - timedelta(minutes=30),  # Should get conv_rate=0.6
+                now - timedelta(hours=1),  # Should get conv_rate=0.3
+            ],
+        }
+    )
+
+    job = MongoDBOfflineStoreNative.get_historical_features(
+        config=repo_config,
+        feature_views=[driver_fv],
+        feature_refs=["driver_stats:conv_rate", "driver_stats:acc_rate"],
+        entity_df=entity_df,
+        registry=MagicMock(),
+        project=repo_config.project,
+        full_feature_names=False,
+    )
+
+    result_df = job.to_df()
+    assert isinstance(result_df, pd.DataFrame)
+    assert len(result_df) == 3
+
+    # Sort by driver_id and event_timestamp for predictable assertions
+    result_df = result_df.sort_values(["driver_id", "event_timestamp"]).reset_index(
+        drop=True
+    )
+
+    # Driver 1, first request (1.5 hours ago) → should get value from 2 hours ago
+    assert result_df.loc[0, "conv_rate"] == pytest.approx(0.5)
+
+    # Driver 1, second request (30 min ago) → should get value from 1 hour ago
+    assert result_df.loc[1, "conv_rate"] == pytest.approx(0.6)
+
+    # Driver 2, request (1 hour ago) → should get value from 2 hours ago
+    assert result_df.loc[2, "conv_rate"] == pytest.approx(0.3)
+
+
+@_requires_docker
+def test_pull_all_from_table_or_query(
+    repo_config: RepoConfig, sample_data: datetime, driver_source: MongoDBSourceNative
+) -> None:
+    """Test pulling all features within a time range (no deduplication)."""
+    now = sample_data
+    job = MongoDBOfflineStoreNative.pull_all_from_table_or_query(
+        config=repo_config,
+        data_source=driver_source,
+        join_key_columns=["driver_id"],
+        feature_name_columns=["conv_rate", "acc_rate"],
+        timestamp_field="event_timestamp",
+        created_timestamp_column="created_at",
+        start_date=now - timedelta(hours=1, minutes=30),
+        end_date=now + timedelta(hours=1),
+    )
+
+    df = job.to_df()
+    assert isinstance(df, pd.DataFrame)
+    # Should get 2 rows: driver 1 (1hr ago, now)
+    # Excludes: driver 1 from 2 hours ago, driver 2 from 2 hours ago
+    assert len(df) == 2
+
+
+@_requires_docker
+def test_ttl_excludes_stale_features(
+    repo_config: RepoConfig, mongodb_connection_string: str
+) -> None:
+    """Test that TTL causes stale feature values to be returned as NULL."""
+    client: MongoClient = MongoClient(mongodb_connection_string)
+    db = client["feast_test"]
+    collection = db["feature_history"]
+
+    now = datetime.now(tz=pytz.UTC)
+
+    # Insert docs with different ages
+    ttl_docs = [
+        {
+            "entity_id": _make_entity_id({"driver_id": 1}),
+            "feature_view": "driver_stats_ttl",
+            "features": {"conv_rate": 0.9},
+            "event_timestamp": now - timedelta(hours=1),
+            "created_at": now - timedelta(hours=1),
+        },
+        {
+            "entity_id": _make_entity_id({"driver_id": 2}),
+            "feature_view": "driver_stats_ttl",
+            "features": {"conv_rate": 0.5},
+            "event_timestamp": now - timedelta(days=2),  # Stale
+            "created_at": now - timedelta(days=2),
+        },
+    ]
+    collection.insert_many(ttl_docs)
+    client.close()
+
+    ttl_source = MongoDBSourceNative(
+        name="driver_stats_ttl",
+        timestamp_field="event_timestamp",
+    )
+    driver_entity = Entity(
+        name="driver_id", join_keys=["driver_id"], value_type=ValueType.INT64
+    )
+    ttl_fv = FeatureView(
+        name="driver_stats_ttl",
+        entities=[driver_entity],
+        schema=[
+            Field(name="driver_id", dtype=Int64),
+            Field(name="conv_rate", dtype=Float64),
+        ],
+        source=ttl_source,
+        ttl=timedelta(days=1),
+    )
+
+    entity_df = pd.DataFrame(
+        {
+            "driver_id": [1, 2],
+            "event_timestamp": [now, now],
+        }
+    )
+
+    job = MongoDBOfflineStoreNative.get_historical_features(
+        config=repo_config,
+        feature_views=[ttl_fv],
+        feature_refs=["driver_stats_ttl:conv_rate"],
+        entity_df=entity_df,
+        registry=MagicMock(),
+        project=repo_config.project,
+        full_feature_names=False,
+    )
+
+    result_df = job.to_df().sort_values("driver_id").reset_index(drop=True)
+
+    # Driver 1: fresh → has value
+    assert result_df.loc[0, "conv_rate"] == pytest.approx(0.9)
+
+    # Driver 2: stale → NULL
+    assert pd.isna(result_df.loc[1, "conv_rate"])
+
+
+@_requires_docker
+def test_multiple_feature_views(
+    repo_config: RepoConfig, mongodb_connection_string: str
+) -> None:
+    """Test joining features from multiple feature views in the same collection."""
+    client: MongoClient = MongoClient(mongodb_connection_string)
+    db = client["feast_test"]
+    collection = db["feature_history"]
+
+    now = datetime.now(tz=pytz.UTC)
+
+    # Insert documents for two different feature views
+    multi_docs = [
+        # driver_stats_multi
+        {
+            "entity_id": _make_entity_id({"driver_id": 1}),
+            "feature_view": "driver_stats_multi",
+            "features": {"rating": 4.8},
+            "event_timestamp": now - timedelta(hours=1),
+            "created_at": now - timedelta(hours=1),
+        },
+        {
+            "entity_id": _make_entity_id({"driver_id": 2}),
+            "feature_view": "driver_stats_multi",
+            "features": {"rating": 4.5},
+            "event_timestamp": now - timedelta(hours=1),
+            "created_at": now - timedelta(hours=1),
+        },
+        # vehicle_stats_multi
+        {
+            "entity_id": _make_entity_id({"driver_id": 1}),
+            "feature_view": "vehicle_stats_multi",
+            "features": {"vehicle_age": 2, "mileage": 50000},
+            "event_timestamp": now - timedelta(hours=1),
+            "created_at": now - timedelta(hours=1),
+        },
+        {
+            "entity_id": _make_entity_id({"driver_id": 2}),
+            "feature_view": "vehicle_stats_multi",
+            "features": {"vehicle_age": 5, "mileage": 120000},
+            "event_timestamp": now - timedelta(hours=1),
+            "created_at": now - timedelta(hours=1),
+        },
+    ]
+    collection.insert_many(multi_docs)
+    client.close()
+
+    # Create sources and feature views
+    driver_source = MongoDBSourceNative(name="driver_stats_multi")
+    vehicle_source = MongoDBSourceNative(name="vehicle_stats_multi")
+
+    driver_entity = Entity(
+        name="driver_id", join_keys=["driver_id"], value_type=ValueType.INT64
+    )
+
+    driver_fv = FeatureView(
+        name="driver_stats_multi",
+        entities=[driver_entity],
+        schema=[
+            Field(name="driver_id", dtype=Int64),
+            Field(name="rating", dtype=Float64),
+        ],
+        source=driver_source,
+        ttl=timedelta(days=1),
+    )
+
+    vehicle_fv = FeatureView(
+        name="vehicle_stats_multi",
+        entities=[driver_entity],
+        schema=[
+            Field(name="driver_id", dtype=Int64),
+            Field(name="vehicle_age", dtype=Int64),
+            Field(name="mileage", dtype=Int64),
+        ],
+        source=vehicle_source,
+        ttl=timedelta(days=1),
+    )
+
+    entity_df = pd.DataFrame(
+        {
+            "driver_id": [1, 2],
+            "event_timestamp": [now, now],
+        }
+    )
+
+    job = MongoDBOfflineStoreNative.get_historical_features(
+        config=repo_config,
+        feature_views=[driver_fv, vehicle_fv],
+        feature_refs=[
+            "driver_stats_multi:rating",
+            "vehicle_stats_multi:vehicle_age",
+            "vehicle_stats_multi:mileage",
+        ],
+        entity_df=entity_df,
+        registry=MagicMock(),
+        project=repo_config.project,
+        full_feature_names=False,
+    )
+
+    result_df = job.to_df().sort_values("driver_id").reset_index(drop=True)
+
+    assert len(result_df) == 2
+    assert set(result_df.columns) >= {"driver_id", "rating", "vehicle_age", "mileage"}
+
+    # Driver 1
+    assert result_df.loc[0, "rating"] == pytest.approx(4.8)
+    assert result_df.loc[0, "vehicle_age"] == 2
+    assert result_df.loc[0, "mileage"] == 50000
+
+    # Driver 2
+    assert result_df.loc[1, "rating"] == pytest.approx(4.5)
+    assert result_df.loc[1, "vehicle_age"] == 5
+    assert result_df.loc[1, "mileage"] == 120000
+
+
+@_requires_docker
+def test_compound_join_keys(
+    repo_config: RepoConfig, mongodb_connection_string: str
+) -> None:
+    """Test with compound/composite join keys (multiple entity columns)."""
+    client: MongoClient = MongoClient(mongodb_connection_string)
+    db = client["feast_test"]
+    collection = db["feature_history"]
+
+    now = datetime.now(tz=pytz.UTC)
+
+    # Insert documents with compound keys (user_id + device_id)
+    compound_docs = [
+        {
+            "entity_id": _make_entity_id({"user_id": 1, "device_id": "mobile"}),
+            "feature_view": "user_device_features",
+            "features": {"app_opens": 50},
+            "event_timestamp": now - timedelta(hours=2),
+            "created_at": now - timedelta(hours=2),
+        },
+        {
+            "entity_id": _make_entity_id({"user_id": 1, "device_id": "mobile"}),
+            "feature_view": "user_device_features",
+            "features": {"app_opens": 55},  # Latest for this entity
+            "event_timestamp": now - timedelta(hours=1),
+            "created_at": now - timedelta(hours=1),
+        },
+        {
+            "entity_id": _make_entity_id({"user_id": 1, "device_id": "desktop"}),
+            "feature_view": "user_device_features",
+            "features": {"app_opens": 10},
+            "event_timestamp": now - timedelta(hours=1),
+            "created_at": now - timedelta(hours=1),
+        },
+        {
+            "entity_id": _make_entity_id({"user_id": 2, "device_id": "tablet"}),
+            "feature_view": "user_device_features",
+            "features": {"app_opens": 25},
+            "event_timestamp": now - timedelta(hours=1),
+            "created_at": now - timedelta(hours=1),
+        },
+    ]
+    collection.insert_many(compound_docs)
+    client.close()
+
+    source = MongoDBSourceNative(name="user_device_features")
+
+    user_entity = Entity(
+        name="user_id", join_keys=["user_id"], value_type=ValueType.INT64
+    )
+    device_entity = Entity(
+        name="device_id", join_keys=["device_id"], value_type=ValueType.STRING
+    )
+
+    fv = FeatureView(
+        name="user_device_features",
+        entities=[user_entity, device_entity],
+        schema=[
+            Field(name="user_id", dtype=Int64),
+            Field(name="device_id", dtype=String),
+            Field(name="app_opens", dtype=Int64),
+        ],
+        source=source,
+        ttl=timedelta(days=1),
+    )
+
+    # Test pull_latest: should get one row per unique (user_id, device_id)
+    job = MongoDBOfflineStoreNative.pull_latest_from_table_or_query(
+        config=repo_config,
+        data_source=source,
+        join_key_columns=["user_id", "device_id"],
+        feature_name_columns=["app_opens"],
+        timestamp_field="event_timestamp",
+        created_timestamp_column="created_at",
+        start_date=now - timedelta(days=1),
+        end_date=now + timedelta(hours=1),
+    )
+
+    df = job.to_df()
+    assert len(df) == 3  # 3 unique (user_id, device_id) combinations
+
+    # Verify we got the latest value (55) for user 1, mobile
+    app_opens_values = sorted(df["app_opens"].tolist())
+    assert 55 in app_opens_values  # Latest for user 1, mobile
+    assert 10 in app_opens_values  # user 1, desktop
+    assert 25 in app_opens_values  # user 2, tablet
+
+    # Test get_historical_features with compound keys
+    entity_df = pd.DataFrame(
+        {
+            "user_id": [1, 1, 2],
+            "device_id": ["mobile", "desktop", "tablet"],
+            "event_timestamp": [now, now, now],
+        }
+    )
+
+    job = MongoDBOfflineStoreNative.get_historical_features(
+        config=repo_config,
+        feature_views=[fv],
+        feature_refs=["user_device_features:app_opens"],
+        entity_df=entity_df,
+        registry=MagicMock(),
+        project=repo_config.project,
+        full_feature_names=False,
+    )
+
+    result_df = job.to_df()
+    assert len(result_df) == 3
+
+    # Sort for predictable assertions
+    result_df = result_df.sort_values(["user_id", "device_id"]).reset_index(drop=True)
+
+    # user 1, desktop
+    assert result_df.loc[0, "app_opens"] == 10
+    # user 1, mobile (latest value)
+    assert result_df.loc[1, "app_opens"] == 55
+    # user 2, tablet
+    assert result_df.loc[2, "app_opens"] == 25

From e9de6f3017d58f6f36846f570cf4bf16c5718469 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Wed, 18 Mar 2026 11:30:42 -0400
Subject: [PATCH 08/30] Added DriverInfo to MongoDBClients

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../contrib/mongodb_offline_store/__init__.py   |  7 +++++++
 .../contrib/mongodb_offline_store/mongodb.py    | 17 ++++++++++++-----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/__init__.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/__init__.py
index 8b13789179..535583bc38 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/__init__.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/__init__.py
@@ -1 +1,8 @@
+import feast.version
 
+try:
+    from pymongo.driver_info import DriverInfo
+
+    DRIVER_METADATA = DriverInfo(name="Feast", version=feast.version.get_version())
+except ImportError:
+    DRIVER_METADATA = None  # type: ignore[assignment]
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
index ee37b11c41..51100ef827 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
@@ -33,6 +33,7 @@
     SavedDatasetLocationAlreadyExists,
 )
 from feast.feature_view import FeatureView
+from feast.infra.offline_stores.contrib.mongodb_offline_store import DRIVER_METADATA
 from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_source import (
     MongoDBSource,
 )
@@ -178,7 +179,7 @@ def reader(data_source: DataSource, repo_path: str) -> Table:
             )
         connection_string = config.offline_store.connection_string
         db_name = data_source.database or config.offline_store.database
-        client: Any = MongoClient(connection_string)
+        client: Any = MongoClient(connection_string, driver=DRIVER_METADATA)
         try:
             docs = list(client[db_name][data_source.collection].find({}, {"_id": 0}))
         finally:
@@ -230,7 +231,9 @@ def writer(
         connection_string = config.offline_store.connection_string
         db_name = data_source.database or config.offline_store.database
         location = f"{db_name}.{data_source.collection}"
-        client: Any = MongoClient(connection_string, tz_aware=True)
+        client: Any = MongoClient(
+            connection_string, driver=DRIVER_METADATA, tz_aware=True
+        )
         try:
             coll = client[db_name][data_source.collection]
             if mode == "overwrite":
@@ -277,7 +280,7 @@ def _fetch_collection_as_arrow(
     """
     if MongoClient is None:
         raise FeastExtrasDependencyImportError("mongodb", "pymongo is not installed.")
-    client: Any = MongoClient(connection_string, tz_aware=True)
+    client: Any = MongoClient(connection_string, driver=DRIVER_METADATA, tz_aware=True)
     try:
         if pipeline is not None:
             docs = list(client[db_name][collection].aggregate(pipeline))
@@ -355,7 +358,9 @@ def persist(
         connection_string = self._config.offline_store.connection_string
         db_name = data_source.database or self._config.offline_store.database
         location = f"{db_name}.{data_source.collection}"
-        client: Any = MongoClient(connection_string, tz_aware=True)
+        client: Any = MongoClient(
+            connection_string, driver=DRIVER_METADATA, tz_aware=True
+        )
         try:
             coll = client[db_name][data_source.collection]
             if not allow_overwrite and coll.estimated_document_count() > 0:
@@ -400,7 +405,9 @@ def offline_write_batch(
         connection_string = config.offline_store.connection_string
         db_name = data_source.database or config.offline_store.database
         records = table.to_pylist()
-        client: Any = MongoClient(connection_string, tz_aware=True)
+        client: Any = MongoClient(
+            connection_string, driver=DRIVER_METADATA, tz_aware=True
+        )
         try:
             coll = client[db_name][data_source.collection]
             if records:

From 81d194c55c8790c0aa1535f7a7c013dd431e5565 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Wed, 18 Mar 2026 14:29:38 -0400
Subject: [PATCH 09/30] Optimized MQL. Applied FV-level TTL

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../mongodb_offline_store/mongodb_native.py   | 307 ++++++++++++------
 1 file changed, 203 insertions(+), 104 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
index a6f0a8acfc..0b0dfcc06f 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
@@ -41,6 +41,7 @@
 """
 
 import json
+import uuid
 import warnings
 from datetime import datetime, timezone
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
@@ -59,6 +60,7 @@
 from feast.errors import DataSourceNoNameException, FeastExtrasDependencyImportError
 from feast.feature_view import FeatureView
 from feast.infra.key_encoding_utils import serialize_entity_key
+from feast.infra.offline_stores.contrib.mongodb_offline_store import DRIVER_METADATA
 from feast.infra.offline_stores.offline_store import (
     OfflineStore,
     RetrievalJob,
@@ -209,7 +211,7 @@ def get_table_column_names_and_types(
         connection_string = config.offline_store.connection_string
         db_name = config.offline_store.database
         collection_name = config.offline_store.collection
-        client: Any = MongoClient(connection_string)
+        client: Any = MongoClient(connection_string, driver=DRIVER_METADATA)
         try:
             pipeline = [
                 {"$match": {"feature_view": self.name}},
@@ -272,7 +274,7 @@ def _fetch_documents(
     """Execute an aggregation pipeline and return documents."""
     if MongoClient is None:
         raise FeastExtrasDependencyImportError("mongodb", "pymongo is not installed.")
-    client: Any = MongoClient(connection_string)
+    client: Any = MongoClient(connection_string, driver=DRIVER_METADATA)
     try:
         return list(client[database][collection].aggregate(pipeline))
     finally:
@@ -343,6 +345,55 @@ def _serialize_entity_key_from_row(
     return serialize_entity_key(entity_key, entity_key_serialization_version)
 
 
+def _ttl_to_ms(fv: FeatureView) -> Optional[int]:
+    """Convert FeatureView TTL to milliseconds."""
+    if fv.ttl is None:
+        return None
+    return int(fv.ttl.total_seconds() * 1000)
+
+
+def _build_ttl_gte_expr(feature_views: List[FeatureView]) -> Optional[Dict[str, Any]]:
+    """Build a $gte expression with per-FV TTL using $switch.
+
+    Returns a MongoDB expression that evaluates to:
+        event_timestamp >= (entity_timestamp - ttl_for_this_feature_view)
+
+    Each feature_view can have a different TTL, handled via $switch branches.
+    If no feature views have TTL, returns None (no filtering needed).
+    """
+    branches = []
+
+    for fv in feature_views:
+        ttl_ms = _ttl_to_ms(fv)
+        if ttl_ms is None:
+            # No TTL for this FV - skip (effectively infinite history)
+            continue
+
+        branches.append(
+            {
+                "case": {"$eq": ["$feature_view", fv.name]},
+                "then": {"$subtract": ["$$ts", ttl_ms]},
+            }
+        )
+
+    # If no TTLs at all, no lower bound needed
+    if not branches:
+        return None
+
+    return {
+        "$gte": [
+            "$event_timestamp",
+            {
+                "$switch": {
+                    "branches": branches,
+                    # Default: no lower bound (for FVs without TTL)
+                    "default": {"$literal": 0},
+                }
+            },
+        ]
+    }
+
+
 class MongoDBOfflineStoreNative(OfflineStore):
     """Native MongoDB offline store using single-collection schema.
 
@@ -384,6 +435,17 @@ def pull_latest_from_table_or_query(
         start_utc = start_date.astimezone(tz=timezone.utc)
         end_utc = end_date.astimezone(tz=timezone.utc)
 
+        # Build projection to flatten features subdoc to top-level fields
+        project_stage: Dict[str, Any] = {
+            "_id": 0,
+            "entity_id": "$doc.entity_id",
+            "event_timestamp": "$doc.event_timestamp",
+        }
+        if created_timestamp_column:
+            project_stage["created_at"] = "$doc.created_at"
+        for feat in feature_name_columns:
+            project_stage[feat] = f"$doc.features.{feat}"
+
         # Build aggregation pipeline
         pipeline: List[Dict[str, Any]] = [
             {
@@ -399,6 +461,7 @@ def pull_latest_from_table_or_query(
                     "doc": {"$first": "$$ROOT"},
                 }
             },
+            {"$project": project_stage},
         ]
 
         def _run() -> pyarrow.Table:
@@ -406,23 +469,12 @@ def _run() -> pyarrow.Table:
             if not docs:
                 return pyarrow.Table.from_pydict({})
 
-            # Flatten documents
-            rows = []
-            for d in docs:
-                doc = d["doc"]
-                row = {
-                    "entity_id": doc["entity_id"],
-                    "event_timestamp": doc["event_timestamp"],
-                }
-                features = doc.get("features", {})
-                for feat in feature_name_columns:
-                    row[feat] = features.get(feat)
-                rows.append(row)
-
-            df = pd.DataFrame(rows)
-            # Ensure timestamp is tz-aware
-            if not df.empty and df["event_timestamp"].dt.tz is None:
-                df["event_timestamp"] = pd.to_datetime(df["event_timestamp"], utc=True)
+            df = pd.DataFrame(docs)
+            if not df.empty and "event_timestamp" in df.columns:
+                if df["event_timestamp"].dt.tz is None:
+                    df["event_timestamp"] = pd.to_datetime(
+                        df["event_timestamp"], utc=True
+                    )
             return pyarrow.Table.from_pandas(df, preserve_index=False)
 
         return MongoDBNativeRetrievalJob(query_fn=_run, full_feature_names=False)
@@ -453,7 +505,7 @@ def pull_all_from_table_or_query(
         collection = config.offline_store.collection
         feature_view_name = data_source.feature_view_name
 
-        # Build match filter
+        # Build match filter: feature_view + optional time range
         match_filter: Dict[str, Any] = {"feature_view": feature_view_name}
         if start_date or end_date:
             ts_filter: Dict[str, Any] = {}
@@ -463,27 +515,35 @@ def pull_all_from_table_or_query(
                 ts_filter["$lte"] = end_date.astimezone(tz=timezone.utc)
             match_filter["event_timestamp"] = ts_filter
 
-        pipeline = [{"$match": match_filter}]
+        # Build projection: flatten features subdoc to top-level fields
+        # This uses $getField to extract each feature from the features subdoc
+        project_stage: Dict[str, Any] = {
+            "_id": 0,
+            "entity_id": 1,
+            "event_timestamp": 1,
+        }
+        if created_timestamp_column:
+            project_stage["created_at"] = 1
+        for feat in feature_name_columns:
+            project_stage[feat] = f"$features.{feat}"
+
+        # Simple range scan pipeline - no sorting for efficiency
+        pipeline: List[Dict[str, Any]] = [
+            {"$match": match_filter},
+            {"$project": project_stage},
+        ]
 
         def _run() -> pyarrow.Table:
             docs = _fetch_documents(connection_string, db_name, collection, pipeline)
             if not docs:
                 return pyarrow.Table.from_pydict({})
 
-            rows = []
-            for doc in docs:
-                row = {
-                    "entity_id": doc["entity_id"],
-                    "event_timestamp": doc["event_timestamp"],
-                }
-                features = doc.get("features", {})
-                for feat in feature_name_columns:
-                    row[feat] = features.get(feat)
-                rows.append(row)
-
-            df = pd.DataFrame(rows)
-            if not df.empty and df["event_timestamp"].dt.tz is None:
-                df["event_timestamp"] = pd.to_datetime(df["event_timestamp"], utc=True)
+            df = pd.DataFrame(docs)
+            if not df.empty and "event_timestamp" in df.columns:
+                if df["event_timestamp"].dt.tz is None:
+                    df["event_timestamp"] = pd.to_datetime(
+                        df["event_timestamp"], utc=True
+                    )
             return pyarrow.Table.from_pandas(df, preserve_index=False)
 
         return MongoDBNativeRetrievalJob(query_fn=_run, full_feature_names=False)
@@ -510,7 +570,7 @@ def get_historical_features(
 
         connection_string = config.offline_store.connection_string
         db_name = config.offline_store.database
-        collection = config.offline_store.collection
+        feature_collection = config.offline_store.collection
         entity_key_version = config.entity_key_serialization_version
 
         entity_schema = dict(zip(entity_df.columns, entity_df.dtypes))
@@ -522,22 +582,28 @@ def get_historical_features(
             fv_name, feat_name = ref.split(":", 1)
             fv_to_features.setdefault(fv_name, []).append(feat_name)
 
-        fv_by_name = {fv.name: fv for fv in feature_views}
+        fv_names = list(fv_to_features.keys())
+
+        # Build per-FV TTL expression using $switch
+        ttl_expr = _build_ttl_gte_expr(feature_views)
 
         def _run() -> pyarrow.Table:
-            result = entity_df.copy()
+            if MongoClient is None:
+                raise FeastExtrasDependencyImportError(
+                    "mongodb", "pymongo is not installed."
+                )
 
-            # Ensure entity timestamp is tz-aware UTC
+            # Prepare entity_df: ensure timestamps are UTC and serialize entity keys
+            result = entity_df.copy()
             if result[event_timestamp_col].dt.tz is None:
                 result[event_timestamp_col] = pd.to_datetime(
                     result[event_timestamp_col], utc=True
                 )
-            result = result.sort_values(event_timestamp_col)
 
-            # Get join keys from entity_df columns (excluding event_timestamp)
+            # Get join keys (all columns except event_timestamp)
             entity_columns = [c for c in result.columns if c != event_timestamp_col]
 
-            # Serialize entity keys for lookup
+            # Serialize entity keys to bytes (same format as online store)
             result["_entity_id"] = result.apply(
                 lambda row: _serialize_entity_key_from_row(
                     row, entity_columns, entity_key_version
@@ -545,76 +611,109 @@ def _run() -> pyarrow.Table:
                 axis=1,
             )
 
-            for fv_name, features in fv_to_features.items():
-                fv = fv_by_name[fv_name]
-                source = fv.batch_source
-                if not isinstance(source, MongoDBSourceNative):
-                    raise ValueError(
-                        f"MongoDBOfflineStoreNative: feature view {fv_name!r} has "
-                        f"non-MongoDBSourceNative source ({type(source).__name__!r})."
-                    )
-
-                # Fetch all documents for this feature view
-                pipeline = [{"$match": {"feature_view": fv_name}}]
-                docs = _fetch_documents(
-                    connection_string, db_name, collection, pipeline
+            # Build temp collection documents
+            temp_docs = []
+            for _, row in result.iterrows():
+                temp_docs.append(
+                    {
+                        "entity_id": row["_entity_id"],
+                        "event_timestamp": row[event_timestamp_col],
+                        "_row_idx": _,  # Preserve original order
+                    }
                 )
 
-                if not docs:
-                    for f in features:
-                        col = f"{fv_name}__{f}" if full_feature_names else f
-                        result[col] = None
-                    continue
+            # Create temp collection with unique name
+            temp_collection_name = f"entity_df_{uuid.uuid4().hex[:12]}"
+
+            client: Any = MongoClient(connection_string, driver=DRIVER_METADATA)
+            try:
+                db = client[db_name]
+                temp_collection = db[temp_collection_name]
+                temp_collection.insert_many(temp_docs)
+
+                # Build $lookup subpipeline with PIT join logic
+                # Match: entity_id, feature_view in list, event_timestamp <= entity.ts
+                match_conditions: List[Dict[str, Any]] = [
+                    {"$eq": ["$entity_id", "$$entity_id"]},
+                    {"$in": ["$feature_view", fv_names]},
+                    {"$lte": ["$event_timestamp", "$$ts"]},
+                ]
+                # Add per-FV TTL filter using $switch
+                if ttl_expr is not None:
+                    match_conditions.append(ttl_expr)
+
+                lookup_pipeline: List[Dict[str, Any]] = [
+                    {"$match": {"$expr": {"$and": match_conditions}}},
+                    {"$sort": {"feature_view": 1, "event_timestamp": -1}},
+                    {
+                        "$group": {
+                            "_id": "$feature_view",
+                            "doc": {"$first": "$$ROOT"},
+                        }
+                    },
+                ]
+
+                # Main aggregation pipeline
+                pipeline: List[Dict[str, Any]] = [
+                    {
+                        "$lookup": {
+                            "from": feature_collection,
+                            "let": {
+                                "entity_id": "$entity_id",
+                                "ts": "$event_timestamp",
+                            },
+                            "pipeline": lookup_pipeline,
+                            "as": "feature_rows",
+                        }
+                    },
+                    {"$sort": {"_row_idx": 1}},  # Preserve original order
+                ]
+
+                docs = list(temp_collection.aggregate(pipeline))
+
+            finally:
+                # Cleanup temp collection
+                client[db_name][temp_collection_name].drop()
+                client.close()
 
-                # Build feature DataFrame
-                feature_rows = []
-                for doc in docs:
-                    row = {
-                        "_entity_id": doc["entity_id"],
-                        "_fv_ts": doc["event_timestamp"],
-                    }
-                    feat_data = doc.get("features", {})
-                    for f in features:
-                        row[f] = feat_data.get(f)
-                    feature_rows.append(row)
-
-                feature_df = pd.DataFrame(feature_rows)
-                if feature_df["_fv_ts"].dt.tz is None:
-                    feature_df["_fv_ts"] = pd.to_datetime(
-                        feature_df["_fv_ts"], utc=True
-                    )
-                feature_df = feature_df.sort_values("_fv_ts")
+            if not docs:
+                return pyarrow.Table.from_pydict({})
 
-                # Rename features if full_feature_names
-                col_rename = {
-                    f: (f"{fv_name}__{f}" if full_feature_names else f)
-                    for f in features
+            # Build result DataFrame
+            rows = []
+            for doc in docs:
+                # Start with entity columns from original entity_df
+                row_idx = doc["_row_idx"]
+                row = result.iloc[row_idx][
+                    entity_columns + [event_timestamp_col]
+                ].to_dict()
+
+                # Extract features from each feature_view's matched doc
+                feature_rows_by_fv = {
+                    fr["_id"]: fr["doc"] for fr in doc.get("feature_rows", [])
                 }
-                feature_df = feature_df.rename(columns=col_rename)
-                out_features = list(col_rename.values())
-
-                # Point-in-time join using merge_asof
-                merged = pd.merge_asof(
-                    result,
-                    feature_df,
-                    left_on=event_timestamp_col,
-                    right_on="_fv_ts",
-                    by="_entity_id",
-                    direction="backward",
-                )
 
-                # Apply TTL: null out stale features
-                if fv.ttl:
-                    cutoff = merged[event_timestamp_col] - fv.ttl
-                    too_old = merged["_fv_ts"] < cutoff
-                    for col in out_features:
-                        merged.loc[too_old, col] = None
+                # Extract features from each feature_view's matched doc
+                # TTL is already applied server-side via $switch expression
+                for fv_name, features in fv_to_features.items():
+                    fv_doc = feature_rows_by_fv.get(fv_name)
 
-                result = merged.drop(columns=["_fv_ts"], errors="ignore")
+                    for feat in features:
+                        col_name = f"{fv_name}__{feat}" if full_feature_names else feat
+                        if fv_doc is None:
+                            row[col_name] = None
+                        else:
+                            row[col_name] = fv_doc.get("features", {}).get(feat)
 
-            # Remove internal entity_id column
-            result = result.drop(columns=["_entity_id"], errors="ignore")
-            return pyarrow.Table.from_pandas(result, preserve_index=False)
+                rows.append(row)
+
+            result_df = pd.DataFrame(rows)
+            if not result_df.empty and event_timestamp_col in result_df.columns:
+                if result_df[event_timestamp_col].dt.tz is None:
+                    result_df[event_timestamp_col] = pd.to_datetime(
+                        result_df[event_timestamp_col], utc=True
+                    )
+            return pyarrow.Table.from_pandas(result_df, preserve_index=False)
 
         return MongoDBNativeRetrievalJob(
             query_fn=_run,

From ad853855db03cbdecc0a39c0c36d2bc7e6e37a23 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Wed, 18 Mar 2026 17:10:40 -0400
Subject: [PATCH 10/30] filter TTL by relevant FVs only, cautiously reset df
 index; add created_at tie-breaker in sort

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../contrib/mongodb_offline_store/mongodb_native.py       | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
index 0b0dfcc06f..214d5657d3 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
@@ -454,7 +454,7 @@ def pull_latest_from_table_or_query(
                     "event_timestamp": {"$gte": start_utc, "$lte": end_utc},
                 }
             },
-            {"$sort": {"entity_id": 1, "event_timestamp": -1}},
+            {"$sort": {"entity_id": 1, "event_timestamp": -1, "created_at": -1}},
             {
                 "$group": {
                     "_id": "$entity_id",
@@ -585,7 +585,8 @@ def get_historical_features(
         fv_names = list(fv_to_features.keys())
 
         # Build per-FV TTL expression using $switch
-        ttl_expr = _build_ttl_gte_expr(feature_views)
+        relevant_fvs = [fv for fv in feature_views if fv.name in fv_to_features]
+        ttl_expr = _build_ttl_gte_expr(relevant_fvs)
 
         def _run() -> pyarrow.Table:
             if MongoClient is None:
@@ -623,7 +624,7 @@ def _run() -> pyarrow.Table:
                 )
 
             # Create temp collection with unique name
-            temp_collection_name = f"entity_df_{uuid.uuid4().hex[:12]}"
+            temp_collection_name = f"tmp_entity_df_{uuid.uuid4().hex[:12]}"
 
             client: Any = MongoClient(connection_string, driver=DRIVER_METADATA)
             try:
@@ -680,6 +681,7 @@ def _run() -> pyarrow.Table:
                 return pyarrow.Table.from_pydict({})
 
             # Build result DataFrame
+            result = result.reset_index(drop=True)
             rows = []
             for doc in docs:
                 # Start with entity columns from original entity_df

From 4d02febe572a5e129549be8eb5e9d1445af304d1 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Wed, 18 Mar 2026 17:47:39 -0400
Subject: [PATCH 11/30] Updated docstrings

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../mongodb_offline_store/mongodb_native.py   | 82 +++++++++++++------
 1 file changed, 59 insertions(+), 23 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
index 214d5657d3..ba2d9e29a0 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
@@ -17,9 +17,19 @@
 
 This module implements a MongoDB offline store using native MQL aggregation
 pipelines. It uses a single-collection schema where all feature views share
-one collection, discriminated by a ``feature_view`` field.
+one collection. It is event-based: each document represents an observation
+of a FeatureView at a specific point in time. Each document may contain a
+subset (0 or more) of the features defined in that FeatureView, all sharing
+a single event_timestamp.
 
-Schema:
+Collection Index:
+    db.feature_history.create_index([
+        ("feature_view", ASCENDING),
+        ("entity_id", ASCENDING),
+        ("event_timestamp", DESCENDING),
+    ])
+
+Document Schema (example):
     {
         "_id": ObjectId(),
         "entity_id": "<serialized_entity_key>",
@@ -32,12 +42,42 @@
         "created_at": ISODate("2026-01-20T12:00:05Z")
     }
 
-Recommended Index:
-    db.feature_history.create_index([
-        ("entity_id", ASCENDING),
-        ("feature_view", ASCENDING),
-        ("event_timestamp", DESCENDING),
-    ])
+Feature Freshness Semantics:
+    This implementation operates at *document-level freshness*, not
+    per-feature freshness. During retrieval (e.g. point-in-time joins),
+    the system selects the most recent document for a given
+    (entity_id, feature_view) that satisfies time constraints, and then
+    extracts all requested features from that document.
+
+    As a result, if a newer document contains only a subset of features,
+    missing features will be returned as NULL—even if older documents
+    contained values for those features. The system does not backfill
+    individual feature values from earlier events.
+
+    This behavior matches common Feast offline store semantics, but may
+    differ from systems that compute "latest value per feature".
+
+Schema Evolution ("Feature Creep"):
+    Because features are stored in a flexible subdocument, different
+    documents for the same FeatureView may contain different sets of
+    feature fields over time. This supports:
+        - adding new features without backfilling historical data
+        - partial writes or sparse feature computation
+
+    However, it also implies:
+        - newly added features will be NULL for older events
+        - partially populated documents may lead to NULL values even
+          when older data contained those features
+
+    Users should ensure that feature computation pipelines write
+    complete feature sets when consistent availability is required.
+
+Notes:
+    - Entity keys are serialized to ensure consistency with Feast’s
+      online store and to avoid type ambiguity.
+    - Point-in-time correctness is enforced per FeatureView.
+    - TTL (time-to-live) constraints are applied per FeatureView during
+      historical retrieval.
 """
 
 import json
@@ -79,12 +119,7 @@
 
 
 class MongoDBOfflineStoreNativeConfig(FeastConfigBaseModel):
-    """Configuration for the Native MongoDB offline store.
-
-    Uses a single shared collection for all feature views, with documents
-    containing an ``entity_id``, ``feature_view`` discriminator, and nested
-    ``features`` subdocument.
-    """
+    """Configuration for the Native MongoDB offline store."""
 
     type: StrictStr = "feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_native.MongoDBOfflineStoreNative"
     """Offline store type selector"""
@@ -100,15 +135,16 @@ class MongoDBOfflineStoreNativeConfig(FeastConfigBaseModel):
 
 
 class MongoDBSourceNative(DataSource):
-    """A MongoDB data source for the Native offline store.
+    """A MongoDB data source for the native offline store.
 
-    Unlike MongoDBSource (Ibis), this source does not specify a collection
-    per FeatureView. Instead, all FeatureViews share a single collection
-    (configured at the store level), and are discriminated by the
-    ``feature_view`` field in each document.
+    Unlike many data source implementations, this source does not map each
+    FeatureView to its own table or collection. Instead, all FeatureViews
+    share a single MongoDB collection (configured at the store level).
 
-    The ``name`` parameter becomes the ``feature_view`` discriminator value
-    used to filter documents in queries.
+    Each document in that collection includes a ``feature_view`` field that
+    identifies which FeatureView it belongs to. The ``name`` of this data
+    source corresponds to that value and is used to filter documents during
+    queries.
     """
 
     def __init__(
@@ -400,7 +436,7 @@ class MongoDBOfflineStoreNative(OfflineStore):
     All feature views share one collection (``feature_history``), with documents
     containing:
     - ``entity_id``: serialized entity key (bytes)
-    - ``feature_view``: discriminator field matching FeatureView name
+    - ``feature_view``: field matching FeatureView name
     - ``features``: subdocument with feature name/value pairs
     - ``event_timestamp``: event time
     - ``created_at``: ingestion time
@@ -623,7 +659,7 @@ def _run() -> pyarrow.Table:
                     }
                 )
 
-            # Create temp collection with unique name
+            # Create temporary collection for query
             temp_collection_name = f"tmp_entity_df_{uuid.uuid4().hex[:12]}"
 
             client: Any = MongoClient(connection_string, driver=DRIVER_METADATA)

From 8d86cdd54861c61eae71056f66c1e13330246b76 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Wed, 18 Mar 2026 18:21:39 -0400
Subject: [PATCH 12/30] Lazy index creation via _get_client_and_ensure_indexes

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../mongodb_offline_store/mongodb_native.py   | 103 ++++++++++++------
 1 file changed, 69 insertions(+), 34 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
index ba2d9e29a0..c9cbae587a 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
@@ -302,19 +302,13 @@ def _infer_python_type_str(value: Any) -> Optional[str]:
 
 
 def _fetch_documents(
-    connection_string: str,
+    client: Any,
     database: str,
     collection: str,
     pipeline: List[Dict],
 ) -> List[Dict]:
     """Execute an aggregation pipeline and return documents."""
-    if MongoClient is None:
-        raise FeastExtrasDependencyImportError("mongodb", "pymongo is not installed.")
-    client: Any = MongoClient(connection_string, driver=DRIVER_METADATA)
-    try:
-        return list(client[database][collection].aggregate(pipeline))
-    finally:
-        client.close()
+    return list(client[database][collection].aggregate(pipeline))
 
 
 class MongoDBNativeRetrievalJob(RetrievalJob):
@@ -442,6 +436,42 @@ class MongoDBOfflineStoreNative(OfflineStore):
     - ``created_at``: ingestion time
     """
 
+    _index_initialized: bool = False
+
+    @staticmethod
+    def _ensure_indexes(client: Any, db_name: str, collection_name: str) -> None:
+        """Create recommended indexes on the feature_history collection."""
+        collection = client[db_name][collection_name]
+        collection.create_index(
+            [
+                ("entity_id", 1),
+                ("feature_view", 1),
+                ("event_timestamp", -1),
+            ],
+            name="entity_fv_ts_idx",
+        )
+
+    @classmethod
+    def _get_client_and_ensure_indexes(cls, config: RepoConfig) -> Any:
+        """Get a MongoClient and ensure indexes exist (once per process)."""
+        if MongoClient is None:
+            raise FeastExtrasDependencyImportError(
+                "mongodb", "pymongo is not installed."
+            )
+        client: Any = MongoClient(
+            config.offline_store.connection_string, driver=DRIVER_METADATA
+        )
+
+        if not cls._index_initialized:
+            cls._ensure_indexes(
+                client,
+                config.offline_store.database,
+                config.offline_store.collection,
+            )
+            cls._index_initialized = True
+
+        return client
+
     @staticmethod
     def pull_latest_from_table_or_query(
         config: RepoConfig,
@@ -463,7 +493,6 @@ def pull_latest_from_table_or_query(
             RuntimeWarning,
         )
 
-        connection_string = config.offline_store.connection_string
         db_name = config.offline_store.database
         collection = config.offline_store.collection
         feature_view_name = data_source.feature_view_name
@@ -501,17 +530,21 @@ def pull_latest_from_table_or_query(
         ]
 
         def _run() -> pyarrow.Table:
-            docs = _fetch_documents(connection_string, db_name, collection, pipeline)
-            if not docs:
-                return pyarrow.Table.from_pydict({})
-
-            df = pd.DataFrame(docs)
-            if not df.empty and "event_timestamp" in df.columns:
-                if df["event_timestamp"].dt.tz is None:
-                    df["event_timestamp"] = pd.to_datetime(
-                        df["event_timestamp"], utc=True
-                    )
-            return pyarrow.Table.from_pandas(df, preserve_index=False)
+            client = MongoDBOfflineStoreNative._get_client_and_ensure_indexes(config)
+            try:
+                docs = _fetch_documents(client, db_name, collection, pipeline)
+                if not docs:
+                    return pyarrow.Table.from_pydict({})
+
+                df = pd.DataFrame(docs)
+                if not df.empty and "event_timestamp" in df.columns:
+                    if df["event_timestamp"].dt.tz is None:
+                        df["event_timestamp"] = pd.to_datetime(
+                            df["event_timestamp"], utc=True
+                        )
+                return pyarrow.Table.from_pandas(df, preserve_index=False)
+            finally:
+                client.close()
 
         return MongoDBNativeRetrievalJob(query_fn=_run, full_feature_names=False)
 
@@ -536,7 +569,6 @@ def pull_all_from_table_or_query(
             RuntimeWarning,
         )
 
-        connection_string = config.offline_store.connection_string
         db_name = config.offline_store.database
         collection = config.offline_store.collection
         feature_view_name = data_source.feature_view_name
@@ -570,17 +602,21 @@ def pull_all_from_table_or_query(
         ]
 
         def _run() -> pyarrow.Table:
-            docs = _fetch_documents(connection_string, db_name, collection, pipeline)
-            if not docs:
-                return pyarrow.Table.from_pydict({})
-
-            df = pd.DataFrame(docs)
-            if not df.empty and "event_timestamp" in df.columns:
-                if df["event_timestamp"].dt.tz is None:
-                    df["event_timestamp"] = pd.to_datetime(
-                        df["event_timestamp"], utc=True
-                    )
-            return pyarrow.Table.from_pandas(df, preserve_index=False)
+            client = MongoDBOfflineStoreNative._get_client_and_ensure_indexes(config)
+            try:
+                docs = _fetch_documents(client, db_name, collection, pipeline)
+                if not docs:
+                    return pyarrow.Table.from_pydict({})
+
+                df = pd.DataFrame(docs)
+                if not df.empty and "event_timestamp" in df.columns:
+                    if df["event_timestamp"].dt.tz is None:
+                        df["event_timestamp"] = pd.to_datetime(
+                            df["event_timestamp"], utc=True
+                        )
+                return pyarrow.Table.from_pandas(df, preserve_index=False)
+            finally:
+                client.close()
 
         return MongoDBNativeRetrievalJob(query_fn=_run, full_feature_names=False)
 
@@ -604,7 +640,6 @@ def get_historical_features(
             RuntimeWarning,
         )
 
-        connection_string = config.offline_store.connection_string
         db_name = config.offline_store.database
         feature_collection = config.offline_store.collection
         entity_key_version = config.entity_key_serialization_version
@@ -662,7 +697,7 @@ def _run() -> pyarrow.Table:
             # Create temporary collection for query
             temp_collection_name = f"tmp_entity_df_{uuid.uuid4().hex[:12]}"
 
-            client: Any = MongoClient(connection_string, driver=DRIVER_METADATA)
+            client = MongoDBOfflineStoreNative._get_client_and_ensure_indexes(config)
             try:
                 db = client[db_name]
                 temp_collection = db[temp_collection_name]

From a1e3c9386b69dbd4158a4eee88fd06ade2c5f9a2 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Wed, 18 Mar 2026 19:13:48 -0400
Subject: [PATCH 13/30] Add performance benchmarks comparing Ibis vs Native
 MongoDB offline stores

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../benchmark_mongodb_offline_stores.py       | 836 ++++++++++++++++++
 1 file changed, 836 insertions(+)
 create mode 100644 sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py

diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py b/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py
new file mode 100644
index 0000000000..177023dd6f
--- /dev/null
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py
@@ -0,0 +1,836 @@
+"""
+Performance benchmarks comparing Ibis vs Native MongoDB offline store implementations.
+
+These tests measure performance across different scaling dimensions:
+1. Row count scaling (entity_df size)
+2. Feature width scaling (features per FeatureView)
+3. Entity distribution (unique vs skewed/repeated entity_ids)
+
+Metrics captured:
+- Runtime (wall clock)
+- Memory (peak Python memory via tracemalloc)
+- MongoDB server metrics (opcounters, execution stats)
+
+Run with: pytest benchmark_mongodb_offline_stores.py -v -s
+Skip slow tests: pytest benchmark_mongodb_offline_stores.py -v -s -m "not slow"
+"""
+
+import time
+import tracemalloc
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from typing import Any, Dict, Generator, Optional
+
+import pandas as pd
+import pytest
+import pytz
+
+pytest.importorskip("pymongo")
+
+from unittest.mock import MagicMock
+
+from pymongo import MongoClient
+from testcontainers.mongodb import MongoDbContainer
+
+from feast import Entity, FeatureView, Field
+from feast.infra.key_encoding_utils import serialize_entity_key
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb import (
+    MongoDBOfflineStoreIbis,
+    MongoDBOfflineStoreIbisConfig,
+)
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_native import (
+    MongoDBOfflineStoreNative,
+    MongoDBOfflineStoreNativeConfig,
+    MongoDBSourceNative,
+)
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_source import (
+    MongoDBSource,
+)
+from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
+from feast.protos.feast.types.Value_pb2 import Value as ValueProto
+from feast.repo_config import RepoConfig
+from feast.types import Float64, Int64
+from feast.value_type import ValueType
+
+# Check if Docker is available
+docker_available = False
+try:
+    import docker
+
+    try:
+        client = docker.from_env()
+        client.ping()
+        docker_available = True
+    except Exception:
+        pass
+except ImportError:
+    pass
+
+_requires_docker = pytest.mark.skipif(
+    not docker_available,
+    reason="Docker is not available or not running.",
+)
+
+ENTITY_KEY_VERSION = 3
+
+
+@dataclass
+class BenchmarkResult:
+    """Container for benchmark results."""
+
+    implementation: str
+    test_name: str
+    dimension: str
+    value: int
+    duration_seconds: float
+    rows_per_second: float
+    peak_memory_mb: float = 0.0
+    mongo_docs_examined: int = 0
+    mongo_keys_examined: int = 0
+    mongo_execution_time_ms: int = 0
+
+
+@dataclass
+class MongoMetrics:
+    """MongoDB server metrics captured before/after a query."""
+
+    opcounters: Dict[str, int] = field(default_factory=dict)
+    docs_examined: int = 0
+    keys_examined: int = 0
+
+    @staticmethod
+    def capture(client: Any) -> "MongoMetrics":
+        """Capture current MongoDB server metrics."""
+        status = client.admin.command("serverStatus")
+        return MongoMetrics(
+            opcounters=dict(status.get("opcounters", {})),
+        )
+
+    def delta(self, after: "MongoMetrics") -> Dict[str, int]:
+        """Calculate delta between two metric snapshots."""
+        return {
+            k: after.opcounters.get(k, 0) - self.opcounters.get(k, 0)
+            for k in after.opcounters
+        }
+
+
+def _make_entity_id(driver_id: int) -> bytes:
+    """Create serialized entity key."""
+    entity_key = EntityKeyProto()
+    entity_key.join_keys.append("driver_id")
+    val = ValueProto()
+    val.int64_val = driver_id
+    entity_key.entity_values.append(val)
+    return serialize_entity_key(entity_key, ENTITY_KEY_VERSION)
+
+
+@pytest.fixture(scope="module")
+def mongodb_container() -> Generator[MongoDbContainer, None, None]:
+    """Start a MongoDB container for benchmarks."""
+    container = MongoDbContainer(
+        "mongo:latest",
+        username="test",
+        password="test",  # pragma: allowlist secret
+    ).with_exposed_ports(27017)
+    container.start()
+    yield container
+    container.stop()
+
+
+@pytest.fixture
+def mongodb_connection_string(mongodb_container: MongoDbContainer) -> str:
+    """Get MongoDB connection string."""
+    exposed_port = mongodb_container.get_exposed_port(27017)
+    return f"mongodb://test:test@localhost:{exposed_port}"  # pragma: allowlist secret
+
+
+@pytest.fixture
+def ibis_config(mongodb_connection_string: str) -> RepoConfig:
+    """RepoConfig for Ibis implementation."""
+    return RepoConfig(
+        project="benchmark",
+        registry="memory://",
+        provider="local",
+        offline_store=MongoDBOfflineStoreIbisConfig(
+            connection_string=mongodb_connection_string,
+            database="benchmark_db",
+        ),
+        online_store={"type": "sqlite"},
+        entity_key_serialization_version=ENTITY_KEY_VERSION,
+    )
+
+
+@pytest.fixture
+def native_config(mongodb_connection_string: str) -> RepoConfig:
+    """RepoConfig for Native implementation."""
+    return RepoConfig(
+        project="benchmark",
+        registry="memory://",
+        provider="local",
+        offline_store=MongoDBOfflineStoreNativeConfig(
+            connection_string=mongodb_connection_string,
+            database="benchmark_db",
+            collection="feature_history",
+        ),
+        online_store={"type": "sqlite"},
+        entity_key_serialization_version=ENTITY_KEY_VERSION,
+    )
+
+
+def _generate_ibis_data(
+    client: MongoClient,
+    db_name: str,
+    collection_name: str,
+    num_entities: int,
+    num_features: int,
+    rows_per_entity: int = 5,
+) -> datetime:
+    """Generate test data for Ibis (one collection per FV, flat schema)."""
+    collection = client[db_name][collection_name]
+    collection.drop()
+
+    now = datetime.now(tz=pytz.UTC)
+    docs = []
+
+    for entity_id in range(num_entities):
+        for row in range(rows_per_entity):
+            doc = {
+                "driver_id": entity_id,
+                "event_timestamp": now - timedelta(hours=row),
+            }
+            for f in range(num_features):
+                doc[f"feature_{f}"] = float(entity_id * 100 + f + row * 0.1)
+            docs.append(doc)
+
+    collection.insert_many(docs)
+    return now
+
+
+def _generate_native_data(
+    client: MongoClient,
+    db_name: str,
+    collection_name: str,
+    feature_view_name: str,
+    num_entities: int,
+    num_features: int,
+    rows_per_entity: int = 5,
+) -> datetime:
+    """Generate test data for Native (single collection, nested features)."""
+    collection = client[db_name][collection_name]
+    # Don't drop - may have multiple FVs in same collection
+
+    now = datetime.now(tz=pytz.UTC)
+    docs = []
+
+    for entity_id in range(num_entities):
+        for row in range(rows_per_entity):
+            features = {}
+            for f in range(num_features):
+                features[f"feature_{f}"] = float(entity_id * 100 + f + row * 0.1)
+
+            doc = {
+                "entity_id": _make_entity_id(entity_id),
+                "feature_view": feature_view_name,
+                "features": features,
+                "event_timestamp": now - timedelta(hours=row),
+                "created_at": now - timedelta(hours=row),
+            }
+            docs.append(doc)
+
+    collection.insert_many(docs)
+    return now
+
+
+def _create_ibis_fv(num_features: int) -> tuple:
+    """Create Ibis source and FeatureView."""
+    source = MongoDBSource(
+        name="driver_benchmark",
+        database="benchmark_db",
+        collection="driver_benchmark",
+        timestamp_field="event_timestamp",
+    )
+    entity = Entity(
+        name="driver_id", join_keys=["driver_id"], value_type=ValueType.INT64
+    )
+
+    schema = [Field(name="driver_id", dtype=Int64)]
+    for f in range(num_features):
+        schema.append(Field(name=f"feature_{f}", dtype=Float64))
+
+    fv = FeatureView(
+        name="driver_benchmark",
+        entities=[entity],
+        schema=schema,
+        source=source,
+        ttl=timedelta(days=1),
+    )
+    return source, fv
+
+
+def _create_native_fv(num_features: int) -> tuple:
+    """Create Native source and FeatureView."""
+    source = MongoDBSourceNative(
+        name="driver_benchmark",
+        timestamp_field="event_timestamp",
+    )
+    entity = Entity(
+        name="driver_id", join_keys=["driver_id"], value_type=ValueType.INT64
+    )
+
+    schema = [Field(name="driver_id", dtype=Int64)]
+    for f in range(num_features):
+        schema.append(Field(name=f"feature_{f}", dtype=Float64))
+
+    fv = FeatureView(
+        name="driver_benchmark",
+        entities=[entity],
+        schema=schema,
+        source=source,
+        ttl=timedelta(days=1),
+    )
+    return source, fv
+
+
+def _run_benchmark(func, name: str) -> float:
+    """Run a function and return elapsed time."""
+    start = time.perf_counter()
+    func()  # Execute the function
+    elapsed = time.perf_counter() - start
+    return elapsed
+
+
+@dataclass
+class FullBenchmarkResult:
+    """Full benchmark results with all metrics."""
+
+    elapsed_seconds: float
+    peak_memory_mb: float
+    mongo_opcounters_delta: Dict[str, int]
+
+
+def _run_benchmark_full(
+    func,
+    mongo_client: Optional[Any] = None,
+) -> FullBenchmarkResult:
+    """Run a benchmark capturing runtime, memory, and MongoDB metrics."""
+    # Capture MongoDB metrics before
+    mongo_before = None
+    if mongo_client:
+        mongo_before = MongoMetrics.capture(mongo_client)
+
+    # Start memory tracking
+    tracemalloc.start()
+
+    # Run the benchmark
+    start = time.perf_counter()
+    func()
+    elapsed = time.perf_counter() - start
+
+    # Capture peak memory
+    _, peak_memory = tracemalloc.get_traced_memory()
+    tracemalloc.stop()
+    peak_memory_mb = peak_memory / (1024 * 1024)
+
+    # Capture MongoDB metrics after
+    mongo_delta = {}
+    if mongo_client and mongo_before:
+        mongo_after = MongoMetrics.capture(mongo_client)
+        mongo_delta = mongo_before.delta(mongo_after)
+
+    return FullBenchmarkResult(
+        elapsed_seconds=elapsed,
+        peak_memory_mb=peak_memory_mb,
+        mongo_opcounters_delta=mongo_delta,
+    )
+
+
+def _print_benchmark_result(
+    impl: str,
+    dimension_name: str,
+    dimension_value: int,
+    result: FullBenchmarkResult,
+    num_rows: Optional[int] = None,
+) -> None:
+    """Pretty print benchmark results."""
+    print(f"\n[{impl}] {dimension_name}: {dimension_value:,}")
+    print(f"  Time:   {result.elapsed_seconds:.3f}s")
+    print(f"  Memory: {result.peak_memory_mb:.1f} MB")
+    if num_rows:
+        rate = num_rows / result.elapsed_seconds if result.elapsed_seconds > 0 else 0
+        print(f"  Rate:   {rate:,.0f} rows/s")
+    if result.mongo_opcounters_delta:
+        print(f"  Mongo ops: {result.mongo_opcounters_delta}")
+
+
+# =============================================================================
+# Test 1: Scale Rows (entity_df size)
+# =============================================================================
+
+ROW_COUNTS = [
+    1000,
+    5000,
+    10000,
+]  # Reduced for CI; use [10000, 50000, 100000, 500000] for full benchmark
+
+
+@_requires_docker
+@pytest.mark.parametrize("num_rows", ROW_COUNTS)
+def test_scale_rows_ibis(
+    mongodb_connection_string: str, ibis_config: RepoConfig, num_rows: int
+) -> None:
+    """Benchmark Ibis implementation with varying entity_df sizes.
+
+    Measures: runtime, peak memory, MongoDB opcounters.
+    """
+    num_features = 10
+    num_entities = num_rows  # One row per entity for simplicity
+
+    client = MongoClient(mongodb_connection_string)
+    try:
+        now = _generate_ibis_data(
+            client,
+            "benchmark_db",
+            "driver_benchmark",
+            num_entities=num_entities,
+            num_features=num_features,
+            rows_per_entity=3,
+        )
+
+        _, fv = _create_ibis_fv(num_features)
+
+        entity_df = pd.DataFrame(
+            {
+                "driver_id": list(range(num_entities)),
+                "event_timestamp": [now] * num_entities,
+            }
+        )
+
+        feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
+
+        def run_query():
+            job = MongoDBOfflineStoreIbis.get_historical_features(
+                config=ibis_config,
+                feature_views=[fv],
+                feature_refs=feature_refs,
+                entity_df=entity_df,
+                registry=MagicMock(),
+                project="benchmark",
+                full_feature_names=False,
+            )
+            return job.to_df()
+
+        result = _run_benchmark_full(run_query, mongo_client=client)
+        _print_benchmark_result("IBIS", "Rows", num_rows, result, num_rows=num_rows)
+
+    finally:
+        client.close()
+
+
+@_requires_docker
+@pytest.mark.parametrize("num_rows", ROW_COUNTS)
+def test_scale_rows_native(
+    mongodb_connection_string: str, native_config: RepoConfig, num_rows: int
+) -> None:
+    """Benchmark Native implementation with varying entity_df sizes.
+
+    Measures: runtime, peak memory, MongoDB opcounters.
+    """
+    num_features = 10
+    num_entities = num_rows
+
+    client = MongoClient(mongodb_connection_string)
+    try:
+        client["benchmark_db"]["feature_history"].drop()
+        now = _generate_native_data(
+            client,
+            "benchmark_db",
+            "feature_history",
+            "driver_benchmark",
+            num_entities=num_entities,
+            num_features=num_features,
+            rows_per_entity=3,
+        )
+
+        _, fv = _create_native_fv(num_features)
+
+        entity_df = pd.DataFrame(
+            {
+                "driver_id": list(range(num_entities)),
+                "event_timestamp": [now] * num_entities,
+            }
+        )
+
+        feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
+
+        def run_query():
+            job = MongoDBOfflineStoreNative.get_historical_features(
+                config=native_config,
+                feature_views=[fv],
+                feature_refs=feature_refs,
+                entity_df=entity_df,
+                registry=MagicMock(),
+                project="benchmark",
+                full_feature_names=False,
+            )
+            return job.to_df()
+
+        result = _run_benchmark_full(run_query, mongo_client=client)
+        _print_benchmark_result("NATIVE", "Rows", num_rows, result, num_rows=num_rows)
+
+    finally:
+        client.close()
+
+
+# =============================================================================
+# Test 2: Wide Feature Views (features per FV)
+# =============================================================================
+
+FEATURE_COUNTS = [10, 50, 100]  # Use [50, 100, 150, 200] for full benchmark
+
+
+@_requires_docker
+@pytest.mark.parametrize("num_features", FEATURE_COUNTS)
+def test_wide_features_ibis(
+    mongodb_connection_string: str, ibis_config: RepoConfig, num_features: int
+) -> None:
+    """Benchmark Ibis with varying feature width."""
+    num_entities = 1000
+
+    client = MongoClient(mongodb_connection_string)
+    try:
+        now = _generate_ibis_data(
+            client,
+            "benchmark_db",
+            "driver_benchmark",
+            num_entities=num_entities,
+            num_features=num_features,
+            rows_per_entity=3,
+        )
+
+        _, fv = _create_ibis_fv(num_features)
+
+        entity_df = pd.DataFrame(
+            {
+                "driver_id": list(range(num_entities)),
+                "event_timestamp": [now] * num_entities,
+            }
+        )
+
+        feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
+
+        def run_query():
+            job = MongoDBOfflineStoreIbis.get_historical_features(
+                config=ibis_config,
+                feature_views=[fv],
+                feature_refs=feature_refs,
+                entity_df=entity_df,
+                registry=MagicMock(),
+                project="benchmark",
+                full_feature_names=False,
+            )
+            return job.to_df()
+
+        result = _run_benchmark_full(run_query, mongo_client=client)
+        _print_benchmark_result(
+            "IBIS", "Features", num_features, result, num_rows=num_entities
+        )
+
+    finally:
+        client.close()
+
+
+@_requires_docker
+@pytest.mark.parametrize("num_features", FEATURE_COUNTS)
+def test_wide_features_native(
+    mongodb_connection_string: str, native_config: RepoConfig, num_features: int
+) -> None:
+    """Benchmark Native with varying feature width."""
+    num_entities = 1000
+
+    client = MongoClient(mongodb_connection_string)
+    try:
+        client["benchmark_db"]["feature_history"].drop()
+        now = _generate_native_data(
+            client,
+            "benchmark_db",
+            "feature_history",
+            "driver_benchmark",
+            num_entities=num_entities,
+            num_features=num_features,
+            rows_per_entity=3,
+        )
+
+        _, fv = _create_native_fv(num_features)
+
+        entity_df = pd.DataFrame(
+            {
+                "driver_id": list(range(num_entities)),
+                "event_timestamp": [now] * num_entities,
+            }
+        )
+
+        feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
+
+        def run_query():
+            job = MongoDBOfflineStoreNative.get_historical_features(
+                config=native_config,
+                feature_views=[fv],
+                feature_refs=feature_refs,
+                entity_df=entity_df,
+                registry=MagicMock(),
+                project="benchmark",
+                full_feature_names=False,
+            )
+            return job.to_df()
+
+        result = _run_benchmark_full(run_query, mongo_client=client)
+        _print_benchmark_result(
+            "NATIVE", "Features", num_features, result, num_rows=num_entities
+        )
+
+    finally:
+        client.close()
+
+
+# =============================================================================
+# Test 3: Skewed Entity Distribution
+# =============================================================================
+
+
+@_requires_docker
+@pytest.mark.parametrize("unique_ratio", [1.0, 0.5, 0.1])  # 100%, 50%, 10% unique
+def test_entity_skew_ibis(
+    mongodb_connection_string: str, ibis_config: RepoConfig, unique_ratio: float
+) -> None:
+    """Benchmark Ibis with varying entity uniqueness in entity_df."""
+    import numpy as np
+
+    total_rows = 5000
+    num_features = 10
+    num_unique_entities = int(total_rows * unique_ratio)
+    num_unique_entities = max(num_unique_entities, 1)
+
+    client = MongoClient(mongodb_connection_string)
+    try:
+        now = _generate_ibis_data(
+            client,
+            "benchmark_db",
+            "driver_benchmark",
+            num_entities=num_unique_entities,
+            num_features=num_features,
+            rows_per_entity=5,
+        )
+
+        _, fv = _create_ibis_fv(num_features)
+
+        # Create entity_df with repeated entity_ids
+        entity_ids = np.random.choice(
+            num_unique_entities, size=total_rows, replace=True
+        )
+        entity_df = pd.DataFrame(
+            {
+                "driver_id": entity_ids,
+                "event_timestamp": [
+                    now - timedelta(minutes=i % 60) for i in range(total_rows)
+                ],
+            }
+        )
+
+        feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
+
+        def run_query():
+            job = MongoDBOfflineStoreIbis.get_historical_features(
+                config=ibis_config,
+                feature_views=[fv],
+                feature_refs=feature_refs,
+                entity_df=entity_df,
+                registry=MagicMock(),
+                project="benchmark",
+                full_feature_names=False,
+            )
+            return job.to_df()
+
+        result = _run_benchmark_full(run_query, mongo_client=client)
+        print(
+            f"\n[IBIS] Unique ratio: {unique_ratio:.0%} ({num_unique_entities:,} unique / {total_rows:,} rows)"
+        )
+        print(f"  Time:   {result.elapsed_seconds:.3f}s")
+        print(f"  Memory: {result.peak_memory_mb:.1f} MB")
+        print(f"  Mongo ops: {result.mongo_opcounters_delta}")
+
+    finally:
+        client.close()
+
+
+@_requires_docker
+@pytest.mark.parametrize("unique_ratio", [1.0, 0.5, 0.1])
+def test_entity_skew_native(
+    mongodb_connection_string: str, native_config: RepoConfig, unique_ratio: float
+) -> None:
+    """Benchmark Native with varying entity uniqueness in entity_df."""
+    import numpy as np
+
+    total_rows = 5000
+    num_features = 10
+    num_unique_entities = int(total_rows * unique_ratio)
+    num_unique_entities = max(num_unique_entities, 1)
+
+    client = MongoClient(mongodb_connection_string)
+    try:
+        client["benchmark_db"]["feature_history"].drop()
+        now = _generate_native_data(
+            client,
+            "benchmark_db",
+            "feature_history",
+            "driver_benchmark",
+            num_entities=num_unique_entities,
+            num_features=num_features,
+            rows_per_entity=5,
+        )
+
+        _, fv = _create_native_fv(num_features)
+
+        entity_ids = np.random.choice(
+            num_unique_entities, size=total_rows, replace=True
+        )
+        entity_df = pd.DataFrame(
+            {
+                "driver_id": entity_ids,
+                "event_timestamp": [
+                    now - timedelta(minutes=i % 60) for i in range(total_rows)
+                ],
+            }
+        )
+
+        feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
+
+        def run_query():
+            job = MongoDBOfflineStoreNative.get_historical_features(
+                config=native_config,
+                feature_views=[fv],
+                feature_refs=feature_refs,
+                entity_df=entity_df,
+                registry=MagicMock(),
+                project="benchmark",
+                full_feature_names=False,
+            )
+            return job.to_df()
+
+        result = _run_benchmark_full(run_query, mongo_client=client)
+        print(
+            f"\n[NATIVE] Unique ratio: {unique_ratio:.0%} ({num_unique_entities:,} unique / {total_rows:,} rows)"
+        )
+        print(f"  Time:   {result.elapsed_seconds:.3f}s")
+        print(f"  Memory: {result.peak_memory_mb:.1f} MB")
+        print(f"  Mongo ops: {result.mongo_opcounters_delta}")
+
+    finally:
+        client.close()
+
+
+# =============================================================================
+# Summary comparison test
+# =============================================================================
+
+
+@_requires_docker
+def test_summary_comparison(
+    mongodb_connection_string: str, ibis_config: RepoConfig, native_config: RepoConfig
+) -> None:
+    """Run a standard comparison and print summary with full metrics."""
+    num_entities = 2000
+    num_features = 20
+
+    client = MongoClient(mongodb_connection_string)
+    try:
+        # Setup Ibis data
+        now = _generate_ibis_data(
+            client,
+            "benchmark_db",
+            "driver_benchmark",
+            num_entities=num_entities,
+            num_features=num_features,
+            rows_per_entity=5,
+        )
+
+        # Setup Native data
+        client["benchmark_db"]["feature_history"].drop()
+        _generate_native_data(
+            client,
+            "benchmark_db",
+            "feature_history",
+            "driver_benchmark",
+            num_entities=num_entities,
+            num_features=num_features,
+            rows_per_entity=5,
+        )
+
+        entity_df = pd.DataFrame(
+            {
+                "driver_id": list(range(num_entities)),
+                "event_timestamp": [now] * num_entities,
+            }
+        )
+
+        feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
+
+        # Ibis benchmark
+        _, ibis_fv = _create_ibis_fv(num_features)
+
+        def run_ibis():
+            job = MongoDBOfflineStoreIbis.get_historical_features(
+                config=ibis_config,
+                feature_views=[ibis_fv],
+                feature_refs=feature_refs,
+                entity_df=entity_df,
+                registry=MagicMock(),
+                project="benchmark",
+                full_feature_names=False,
+            )
+            return job.to_df()
+
+        ibis_result = _run_benchmark_full(run_ibis, mongo_client=client)
+
+        # Native benchmark
+        _, native_fv = _create_native_fv(num_features)
+
+        def run_native():
+            job = MongoDBOfflineStoreNative.get_historical_features(
+                config=native_config,
+                feature_views=[native_fv],
+                feature_refs=feature_refs,
+                entity_df=entity_df,
+                registry=MagicMock(),
+                project="benchmark",
+                full_feature_names=False,
+            )
+            return job.to_df()
+
+        native_result = _run_benchmark_full(run_native, mongo_client=client)
+
+        # Print summary
+        print("\n" + "=" * 70)
+        print("SUMMARY COMPARISON")
+        print("=" * 70)
+        print(f"Entities: {num_entities:,} | Features: {num_features}")
+        print("-" * 70)
+        print(f"{'Metric':<20} {'Ibis':>20} {'Native':>20}")
+        print("-" * 70)
+        print(
+            f"{'Time (s)':<20} {ibis_result.elapsed_seconds:>20.3f} {native_result.elapsed_seconds:>20.3f}"
+        )
+        print(
+            f"{'Memory (MB)':<20} {ibis_result.peak_memory_mb:>20.1f} {native_result.peak_memory_mb:>20.1f}"
+        )
+        print(
+            f"{'Rows/sec':<20} {num_entities / ibis_result.elapsed_seconds:>20,.0f} {num_entities / native_result.elapsed_seconds:>20,.0f}"
+        )
+        print("-" * 70)
+
+        if native_result.elapsed_seconds > 0:
+            ratio = native_result.elapsed_seconds / ibis_result.elapsed_seconds
+            print(f"Ibis is {ratio:.1f}x faster than Native")
+        print("=" * 70)
+
+    finally:
+        client.close()

From b8fcba5d5735fd871bdfe4bd2caa5c8257147bb8 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Wed, 18 Mar 2026 19:28:00 -0400
Subject: [PATCH 14/30] Refactor Native get_historical_features: replace  with
 fetch+pandas join
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Eliminate -based PIT join which scaled poorly (O(n×m))
- Use single  query to fetch all matching feature data
- Batch entity_ids into chunks of 1000 for large queries
- Flatten features subdoc with pd.json_normalize
- Apply pd.merge_asof for efficient PIT join per FeatureView
- Handle TTL filtering in pandas instead of MQL \
- Remove unused _ttl_to_ms and _build_ttl_gte_expr helpers

Performance improvement:
- Before: 10k rows in ~188s (53 rows/s)
- After:  10k rows in ~7.4s (1,354 rows/s)
- Now competitive with Ibis implementation

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../mongodb_offline_store/mongodb_native.py   | 273 ++++++++----------
 1 file changed, 124 insertions(+), 149 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
index c9cbae587a..6e1f610d37 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
@@ -81,7 +81,6 @@
 """
 
 import json
-import uuid
 import warnings
 from datetime import datetime, timezone
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
@@ -375,55 +374,6 @@ def _serialize_entity_key_from_row(
     return serialize_entity_key(entity_key, entity_key_serialization_version)
 
 
-def _ttl_to_ms(fv: FeatureView) -> Optional[int]:
-    """Convert FeatureView TTL to milliseconds."""
-    if fv.ttl is None:
-        return None
-    return int(fv.ttl.total_seconds() * 1000)
-
-
-def _build_ttl_gte_expr(feature_views: List[FeatureView]) -> Optional[Dict[str, Any]]:
-    """Build a $gte expression with per-FV TTL using $switch.
-
-    Returns a MongoDB expression that evaluates to:
-        event_timestamp >= (entity_timestamp - ttl_for_this_feature_view)
-
-    Each feature_view can have a different TTL, handled via $switch branches.
-    If no feature views have TTL, returns None (no filtering needed).
-    """
-    branches = []
-
-    for fv in feature_views:
-        ttl_ms = _ttl_to_ms(fv)
-        if ttl_ms is None:
-            # No TTL for this FV - skip (effectively infinite history)
-            continue
-
-        branches.append(
-            {
-                "case": {"$eq": ["$feature_view", fv.name]},
-                "then": {"$subtract": ["$$ts", ttl_ms]},
-            }
-        )
-
-    # If no TTLs at all, no lower bound needed
-    if not branches:
-        return None
-
-    return {
-        "$gte": [
-            "$event_timestamp",
-            {
-                "$switch": {
-                    "branches": branches,
-                    # Default: no lower bound (for FVs without TTL)
-                    "default": {"$literal": 0},
-                }
-            },
-        ]
-    }
-
-
 class MongoDBOfflineStoreNative(OfflineStore):
     """Native MongoDB offline store using single-collection schema.
 
@@ -630,6 +580,13 @@ def get_historical_features(
         project: str,
         full_feature_names: bool = False,
     ) -> RetrievalJob:
+        """Fetch historical features using a "fetch + pandas join" strategy.
+
+        Instead of using $lookup (which scales poorly), this:
+        1. Extracts unique entity_ids and computes timestamp bounds
+        2. Fetches all matching feature data in one query
+        3. Uses pd.merge_asof for efficient point-in-time joins in Python
+        """
         if isinstance(entity_df, str):
             raise ValueError(
                 "MongoDBOfflineStoreNative does not support SQL entity_df strings. "
@@ -654,18 +611,10 @@ def get_historical_features(
             fv_to_features.setdefault(fv_name, []).append(feat_name)
 
         fv_names = list(fv_to_features.keys())
-
-        # Build per-FV TTL expression using $switch
-        relevant_fvs = [fv for fv in feature_views if fv.name in fv_to_features]
-        ttl_expr = _build_ttl_gte_expr(relevant_fvs)
+        fv_by_name = {fv.name: fv for fv in feature_views}
 
         def _run() -> pyarrow.Table:
-            if MongoClient is None:
-                raise FeastExtrasDependencyImportError(
-                    "mongodb", "pymongo is not installed."
-                )
-
-            # Prepare entity_df: ensure timestamps are UTC and serialize entity keys
+            # Prepare entity_df: ensure timestamps are UTC
             result = entity_df.copy()
             if result[event_timestamp_col].dt.tz is None:
                 result[event_timestamp_col] = pd.to_datetime(
@@ -683,110 +632,136 @@ def _run() -> pyarrow.Table:
                 axis=1,
             )
 
-            # Build temp collection documents
-            temp_docs = []
-            for _, row in result.iterrows():
-                temp_docs.append(
-                    {
-                        "entity_id": row["_entity_id"],
-                        "event_timestamp": row[event_timestamp_col],
-                        "_row_idx": _,  # Preserve original order
-                    }
-                )
+            # Extract unique entity_ids and timestamp bounds
+            unique_entity_ids = result["_entity_id"].unique().tolist()
+            max_ts = result[event_timestamp_col].max()
 
-            # Create temporary collection for query
-            temp_collection_name = f"tmp_entity_df_{uuid.uuid4().hex[:12]}"
+            # Batch entity_ids into chunks to avoid huge $in queries
+            BATCH_SIZE = 1000
+            all_feature_docs: List[Dict] = []
 
             client = MongoDBOfflineStoreNative._get_client_and_ensure_indexes(config)
             try:
-                db = client[db_name]
-                temp_collection = db[temp_collection_name]
-                temp_collection.insert_many(temp_docs)
-
-                # Build $lookup subpipeline with PIT join logic
-                # Match: entity_id, feature_view in list, event_timestamp <= entity.ts
-                match_conditions: List[Dict[str, Any]] = [
-                    {"$eq": ["$entity_id", "$$entity_id"]},
-                    {"$in": ["$feature_view", fv_names]},
-                    {"$lte": ["$event_timestamp", "$$ts"]},
-                ]
-                # Add per-FV TTL filter using $switch
-                if ttl_expr is not None:
-                    match_conditions.append(ttl_expr)
-
-                lookup_pipeline: List[Dict[str, Any]] = [
-                    {"$match": {"$expr": {"$and": match_conditions}}},
-                    {"$sort": {"feature_view": 1, "event_timestamp": -1}},
-                    {
-                        "$group": {
-                            "_id": "$feature_view",
-                            "doc": {"$first": "$$ROOT"},
-                        }
-                    },
-                ]
+                coll = client[db_name][feature_collection]
 
-                # Main aggregation pipeline
-                pipeline: List[Dict[str, Any]] = [
-                    {
-                        "$lookup": {
-                            "from": feature_collection,
-                            "let": {
-                                "entity_id": "$entity_id",
-                                "ts": "$event_timestamp",
-                            },
-                            "pipeline": lookup_pipeline,
-                            "as": "feature_rows",
-                        }
-                    },
-                    {"$sort": {"_row_idx": 1}},  # Preserve original order
-                ]
+                for i in range(0, len(unique_entity_ids), BATCH_SIZE):
+                    batch_ids = unique_entity_ids[i : i + BATCH_SIZE]
 
-                docs = list(temp_collection.aggregate(pipeline))
+                    # Single query: fetch all matching feature data
+                    query = {
+                        "entity_id": {"$in": batch_ids},
+                        "feature_view": {"$in": fv_names},
+                        "event_timestamp": {"$lte": max_ts},
+                    }
+                    docs = list(coll.find(query, {"_id": 0}))
+                    all_feature_docs.extend(docs)
 
             finally:
-                # Cleanup temp collection
-                client[db_name][temp_collection_name].drop()
                 client.close()
 
-            if not docs:
-                return pyarrow.Table.from_pydict({})
-
-            # Build result DataFrame
-            result = result.reset_index(drop=True)
-            rows = []
-            for doc in docs:
-                # Start with entity columns from original entity_df
-                row_idx = doc["_row_idx"]
-                row = result.iloc[row_idx][
-                    entity_columns + [event_timestamp_col]
-                ].to_dict()
-
-                # Extract features from each feature_view's matched doc
-                feature_rows_by_fv = {
-                    fr["_id"]: fr["doc"] for fr in doc.get("feature_rows", [])
-                }
-
-                # Extract features from each feature_view's matched doc
-                # TTL is already applied server-side via $switch expression
+            # Handle empty result
+            if not all_feature_docs:
+                # Return entity_df with NULL feature columns
                 for fv_name, features in fv_to_features.items():
-                    fv_doc = feature_rows_by_fv.get(fv_name)
+                    for feat in features:
+                        col_name = f"{fv_name}__{feat}" if full_feature_names else feat
+                        result[col_name] = None
+                result = result.drop(columns=["_entity_id"])
+                return pyarrow.Table.from_pandas(result, preserve_index=False)
+
+            # Convert to DataFrame and flatten features subdoc
+            feature_df = pd.DataFrame(all_feature_docs)
 
+            # Rename entity_id to _entity_id to match result DataFrame
+            feature_df = feature_df.rename(columns={"entity_id": "_entity_id"})
+
+            # Flatten nested 'features' dict into top-level columns
+            if "features" in feature_df.columns:
+                features_expanded = pd.json_normalize(feature_df["features"])
+                feature_df = pd.concat(
+                    [feature_df.drop(columns=["features"]), features_expanded], axis=1
+                )
+
+            # Ensure timestamps are tz-aware
+            if feature_df["event_timestamp"].dt.tz is None:
+                feature_df["event_timestamp"] = pd.to_datetime(
+                    feature_df["event_timestamp"], utc=True
+                )
+
+            # Split by feature_view and perform PIT join for each
+            result = result.sort_values(event_timestamp_col).reset_index(drop=True)
+
+            for fv_name, features in fv_to_features.items():
+                fv = fv_by_name.get(fv_name)
+
+                # Filter to this feature_view's data
+                fv_df = feature_df[feature_df["feature_view"] == fv_name].copy()
+
+                if fv_df.empty:
+                    # No data for this FV - fill with NULLs
                     for feat in features:
                         col_name = f"{fv_name}__{feat}" if full_feature_names else feat
-                        if fv_doc is None:
-                            row[col_name] = None
-                        else:
-                            row[col_name] = fv_doc.get("features", {}).get(feat)
-
-                rows.append(row)
-
-            result_df = pd.DataFrame(rows)
-            if not result_df.empty and event_timestamp_col in result_df.columns:
-                if result_df[event_timestamp_col].dt.tz is None:
-                    result_df[event_timestamp_col] = pd.to_datetime(
-                        result_df[event_timestamp_col], utc=True
+                        result[col_name] = None
+                    continue
+
+                # Sort by timestamp for merge_asof
+                fv_df = fv_df.sort_values("event_timestamp").reset_index(drop=True)
+
+                # Select columns for merge
+                merge_cols = ["_entity_id", "event_timestamp"] + [
+                    f for f in features if f in fv_df.columns
+                ]
+                fv_df_subset = fv_df[
+                    [c for c in merge_cols if c in fv_df.columns]
+                ].copy()
+
+                # Rename to avoid conflicts
+                fv_df_subset = fv_df_subset.rename(
+                    columns={"event_timestamp": "_fv_ts"}
+                )
+
+                # Point-in-time join using merge_asof
+                result = pd.merge_asof(
+                    result,
+                    fv_df_subset,
+                    left_on=event_timestamp_col,
+                    right_on="_fv_ts",
+                    by="_entity_id",
+                    direction="backward",
+                )
+
+                # Apply TTL: null out stale features
+                if fv and fv.ttl:
+                    cutoff = result[event_timestamp_col] - fv.ttl
+                    stale_mask = result["_fv_ts"] < cutoff
+                    for feat in features:
+                        if feat in result.columns:
+                            result.loc[stale_mask, feat] = None
+
+                # Rename features if full_feature_names
+                for feat in features:
+                    if feat in result.columns and full_feature_names:
+                        result = result.rename(columns={feat: f"{fv_name}__{feat}"})
+                    elif feat not in result.columns:
+                        # Feature wasn't in the data - add NULL column
+                        col_name = f"{fv_name}__{feat}" if full_feature_names else feat
+                        result[col_name] = None
+
+                # Drop temporary column
+                result = result.drop(columns=["_fv_ts"], errors="ignore")
+
+            # Remove internal entity_id column and restore original order
+            result = result.drop(columns=["_entity_id"], errors="ignore")
+            result = result.sort_index().reset_index(drop=True)
+
+            # Ensure timestamp column is still tz-aware
+            if not result.empty and event_timestamp_col in result.columns:
+                if result[event_timestamp_col].dt.tz is None:
+                    result[event_timestamp_col] = pd.to_datetime(
+                        result[event_timestamp_col], utc=True
                     )
-            return pyarrow.Table.from_pandas(result_df, preserve_index=False)
+
+            return pyarrow.Table.from_pandas(result, preserve_index=False)
 
         return MongoDBNativeRetrievalJob(
             query_fn=_run,

From 5d516a841ee84571b54c2e70edcbd23d845f71fd Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Thu, 19 Mar 2026 07:39:30 -0400
Subject: [PATCH 15/30] Refactor get_historical_features with chunked
 processing for large entity_df

- Add CHUNK_SIZE (5000) for entity_df processing to bound memory usage
- Extract _run_single helper function for processing each chunk
- Add _chunk_dataframe generator for yielding DataFrame slices
- Preserve original row ordering via _row_idx column
- Exclude internal columns (prefixed with _) from entity key serialization
- Concat chunk results and restore ordering at the end

This allows processing arbitrarily large entity_df while keeping
memory bounded by processing in 5000-row chunks.

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../mongodb_offline_store/mongodb_native.py   | 110 +++++++++++-------
 1 file changed, 70 insertions(+), 40 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
index 6e1f610d37..8c7822bca4 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
@@ -83,7 +83,17 @@
 import json
 import warnings
 from datetime import datetime, timezone
-from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generator,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
 
 import pandas as pd
 import pyarrow
@@ -584,8 +594,10 @@ def get_historical_features(
 
         Instead of using $lookup (which scales poorly), this:
         1. Extracts unique entity_ids and computes timestamp bounds
-        2. Fetches all matching feature data in one query
+        2. Fetches all matching feature data in batched queries
         3. Uses pd.merge_asof for efficient point-in-time joins in Python
+
+        For large entity_df, processing is chunked to bound memory usage.
         """
         if isinstance(entity_df, str):
             raise ValueError(
@@ -613,16 +625,33 @@ def get_historical_features(
         fv_names = list(fv_to_features.keys())
         fv_by_name = {fv.name: fv for fv in feature_views}
 
-        def _run() -> pyarrow.Table:
+        # Chunk size for entity_df processing (bounds memory usage)
+        CHUNK_SIZE = 5000
+        # Batch size for MongoDB $in queries
+        MONGO_BATCH_SIZE = 1000
+
+        def _chunk_dataframe(
+            df: pd.DataFrame, size: int
+        ) -> Generator[pd.DataFrame, None, None]:
+            """Yield successive chunks of a DataFrame."""
+            for i in range(0, len(df), size):
+                yield df.iloc[i : i + size]
+
+        def _run_single(entity_subset_df: pd.DataFrame) -> pd.DataFrame:
+            """Process a single chunk of entity_df and return joined features."""
             # Prepare entity_df: ensure timestamps are UTC
-            result = entity_df.copy()
+            result = entity_subset_df.copy()
             if result[event_timestamp_col].dt.tz is None:
                 result[event_timestamp_col] = pd.to_datetime(
                     result[event_timestamp_col], utc=True
                 )
 
-            # Get join keys (all columns except event_timestamp)
-            entity_columns = [c for c in result.columns if c != event_timestamp_col]
+            # Get join keys (all columns except event_timestamp and internal columns)
+            entity_columns = [
+                c
+                for c in result.columns
+                if c != event_timestamp_col and not c.startswith("_")
+            ]
 
             # Serialize entity keys to bytes (same format as online store)
             result["_entity_id"] = result.apply(
@@ -632,22 +661,20 @@ def _run() -> pyarrow.Table:
                 axis=1,
             )
 
-            # Extract unique entity_ids and timestamp bounds
+            # Extract unique entity_ids and timestamp bounds for this chunk
             unique_entity_ids = result["_entity_id"].unique().tolist()
             max_ts = result[event_timestamp_col].max()
 
-            # Batch entity_ids into chunks to avoid huge $in queries
-            BATCH_SIZE = 1000
+            # Fetch feature data in batches
             all_feature_docs: List[Dict] = []
 
             client = MongoDBOfflineStoreNative._get_client_and_ensure_indexes(config)
             try:
                 coll = client[db_name][feature_collection]
 
-                for i in range(0, len(unique_entity_ids), BATCH_SIZE):
-                    batch_ids = unique_entity_ids[i : i + BATCH_SIZE]
+                for i in range(0, len(unique_entity_ids), MONGO_BATCH_SIZE):
+                    batch_ids = unique_entity_ids[i : i + MONGO_BATCH_SIZE]
 
-                    # Single query: fetch all matching feature data
                     query = {
                         "entity_id": {"$in": batch_ids},
                         "feature_view": {"$in": fv_names},
@@ -661,66 +688,53 @@ def _run() -> pyarrow.Table:
 
             # Handle empty result
             if not all_feature_docs:
-                # Return entity_df with NULL feature columns
                 for fv_name, features in fv_to_features.items():
                     for feat in features:
                         col_name = f"{fv_name}__{feat}" if full_feature_names else feat
                         result[col_name] = None
-                result = result.drop(columns=["_entity_id"])
-                return pyarrow.Table.from_pandas(result, preserve_index=False)
+                return result.drop(columns=["_entity_id"])
 
             # Convert to DataFrame and flatten features subdoc
             feature_df = pd.DataFrame(all_feature_docs)
-
-            # Rename entity_id to _entity_id to match result DataFrame
             feature_df = feature_df.rename(columns={"entity_id": "_entity_id"})
 
-            # Flatten nested 'features' dict into top-level columns
             if "features" in feature_df.columns:
                 features_expanded = pd.json_normalize(feature_df["features"])
                 feature_df = pd.concat(
                     [feature_df.drop(columns=["features"]), features_expanded], axis=1
                 )
 
-            # Ensure timestamps are tz-aware
             if feature_df["event_timestamp"].dt.tz is None:
                 feature_df["event_timestamp"] = pd.to_datetime(
                     feature_df["event_timestamp"], utc=True
                 )
 
-            # Split by feature_view and perform PIT join for each
+            # Sort result for merge_asof
             result = result.sort_values(event_timestamp_col).reset_index(drop=True)
 
+            # Perform PIT join for each feature view
             for fv_name, features in fv_to_features.items():
                 fv = fv_by_name.get(fv_name)
-
-                # Filter to this feature_view's data
                 fv_df = feature_df[feature_df["feature_view"] == fv_name].copy()
 
                 if fv_df.empty:
-                    # No data for this FV - fill with NULLs
                     for feat in features:
                         col_name = f"{fv_name}__{feat}" if full_feature_names else feat
                         result[col_name] = None
                     continue
 
-                # Sort by timestamp for merge_asof
                 fv_df = fv_df.sort_values("event_timestamp").reset_index(drop=True)
 
-                # Select columns for merge
                 merge_cols = ["_entity_id", "event_timestamp"] + [
                     f for f in features if f in fv_df.columns
                 ]
                 fv_df_subset = fv_df[
                     [c for c in merge_cols if c in fv_df.columns]
                 ].copy()
-
-                # Rename to avoid conflicts
                 fv_df_subset = fv_df_subset.rename(
                     columns={"event_timestamp": "_fv_ts"}
                 )
 
-                # Point-in-time join using merge_asof
                 result = pd.merge_asof(
                     result,
                     fv_df_subset,
@@ -730,7 +744,7 @@ def _run() -> pyarrow.Table:
                     direction="backward",
                 )
 
-                # Apply TTL: null out stale features
+                # Apply TTL
                 if fv and fv.ttl:
                     cutoff = result[event_timestamp_col] - fv.ttl
                     stale_mask = result["_fv_ts"] < cutoff
@@ -743,25 +757,41 @@ def _run() -> pyarrow.Table:
                     if feat in result.columns and full_feature_names:
                         result = result.rename(columns={feat: f"{fv_name}__{feat}"})
                     elif feat not in result.columns:
-                        # Feature wasn't in the data - add NULL column
                         col_name = f"{fv_name}__{feat}" if full_feature_names else feat
                         result[col_name] = None
 
-                # Drop temporary column
                 result = result.drop(columns=["_fv_ts"], errors="ignore")
 
-            # Remove internal entity_id column and restore original order
-            result = result.drop(columns=["_entity_id"], errors="ignore")
-            result = result.sort_index().reset_index(drop=True)
+            return result.drop(columns=["_entity_id"], errors="ignore")
 
-            # Ensure timestamp column is still tz-aware
-            if not result.empty and event_timestamp_col in result.columns:
-                if result[event_timestamp_col].dt.tz is None:
-                    result[event_timestamp_col] = pd.to_datetime(
-                        result[event_timestamp_col], utc=True
+        def _run() -> pyarrow.Table:
+            # Add row index to preserve original ordering
+            working_df = entity_df.copy()
+            working_df["_row_idx"] = range(len(working_df))
+
+            if len(working_df) <= CHUNK_SIZE:
+                # Small workload: process in single pass
+                result_df = _run_single(working_df)
+            else:
+                # Large workload: process in chunks
+                chunk_results = []
+                for chunk in _chunk_dataframe(working_df, CHUNK_SIZE):
+                    chunk_results.append(_run_single(chunk))
+
+                result_df = pd.concat(chunk_results, ignore_index=True)
+
+            # Restore original ordering and remove index column
+            result_df = result_df.sort_values("_row_idx").reset_index(drop=True)
+            result_df = result_df.drop(columns=["_row_idx"], errors="ignore")
+
+            # Ensure timestamp column is tz-aware
+            if not result_df.empty and event_timestamp_col in result_df.columns:
+                if result_df[event_timestamp_col].dt.tz is None:
+                    result_df[event_timestamp_col] = pd.to_datetime(
+                        result_df[event_timestamp_col], utc=True
                     )
 
-            return pyarrow.Table.from_pandas(result, preserve_index=False)
+            return pyarrow.Table.from_pandas(result_df, preserve_index=False)
 
         return MongoDBNativeRetrievalJob(
             query_fn=_run,

From c7281fb06705d7bd74343dcf67811bb4f75abf56 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Thu, 19 Mar 2026 09:58:50 -0400
Subject: [PATCH 16/30] Optimize Native get_historical_features: reuse client,
 increase batch sizes

Performance optimizations:
- Reuse MongoClient across chunks (was creating new client per chunk)
- Increase CHUNK_SIZE from 5,000 to 50,000 rows
- Increase MONGO_BATCH_SIZE from 1,000 to 10,000 entity_ids
- Pass collection to _run_single instead of creating client each time
- Make index creation idempotent (check for existing index)

Results (100k rows):
- Before: 21.7s
- After: 5.2s (4.2x faster)

Results (1M rows):
- Before: 1664s (28 min)
- After: 212s (3.5 min) (7.8x faster)

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../mongodb_offline_store/mongodb_native.py   | 84 +++++++++++--------
 1 file changed, 49 insertions(+), 35 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
index 8c7822bca4..aa0c88f033 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
@@ -400,15 +400,24 @@ class MongoDBOfflineStoreNative(OfflineStore):
 
     @staticmethod
     def _ensure_indexes(client: Any, db_name: str, collection_name: str) -> None:
-        """Create recommended indexes on the feature_history collection."""
+        """Create recommended indexes on the feature_history collection.
+
+        Uses create_index with background=True. If index already exists
+        (with same or different name), this is a no-op.
+        """
         collection = client[db_name][collection_name]
+        # Check if an equivalent index already exists
+        existing_indexes = collection.index_information()
+        target_key = [("entity_id", 1), ("feature_view", 1), ("event_timestamp", -1)]
+
+        for idx_info in existing_indexes.values():
+            if idx_info.get("key") == target_key:
+                return  # Index already exists
+
         collection.create_index(
-            [
-                ("entity_id", 1),
-                ("feature_view", 1),
-                ("event_timestamp", -1),
-            ],
+            target_key,
             name="entity_fv_ts_idx",
+            background=True,
         )
 
     @classmethod
@@ -626,9 +635,9 @@ def get_historical_features(
         fv_by_name = {fv.name: fv for fv in feature_views}
 
         # Chunk size for entity_df processing (bounds memory usage)
-        CHUNK_SIZE = 5000
+        CHUNK_SIZE = 50_000
         # Batch size for MongoDB $in queries
-        MONGO_BATCH_SIZE = 1000
+        MONGO_BATCH_SIZE = 10_000
 
         def _chunk_dataframe(
             df: pd.DataFrame, size: int
@@ -637,8 +646,13 @@ def _chunk_dataframe(
             for i in range(0, len(df), size):
                 yield df.iloc[i : i + size]
 
-        def _run_single(entity_subset_df: pd.DataFrame) -> pd.DataFrame:
-            """Process a single chunk of entity_df and return joined features."""
+        def _run_single(entity_subset_df: pd.DataFrame, coll: Any) -> pd.DataFrame:
+            """Process a single chunk of entity_df and return joined features.
+
+            Args:
+                entity_subset_df: Chunk of entity DataFrame to process
+                coll: MongoDB collection object (reused across chunks)
+            """
             # Prepare entity_df: ensure timestamps are UTC
             result = entity_subset_df.copy()
             if result[event_timestamp_col].dt.tz is None:
@@ -668,23 +682,16 @@ def _run_single(entity_subset_df: pd.DataFrame) -> pd.DataFrame:
             # Fetch feature data in batches
             all_feature_docs: List[Dict] = []
 
-            client = MongoDBOfflineStoreNative._get_client_and_ensure_indexes(config)
-            try:
-                coll = client[db_name][feature_collection]
-
-                for i in range(0, len(unique_entity_ids), MONGO_BATCH_SIZE):
-                    batch_ids = unique_entity_ids[i : i + MONGO_BATCH_SIZE]
-
-                    query = {
-                        "entity_id": {"$in": batch_ids},
-                        "feature_view": {"$in": fv_names},
-                        "event_timestamp": {"$lte": max_ts},
-                    }
-                    docs = list(coll.find(query, {"_id": 0}))
-                    all_feature_docs.extend(docs)
+            for i in range(0, len(unique_entity_ids), MONGO_BATCH_SIZE):
+                batch_ids = unique_entity_ids[i : i + MONGO_BATCH_SIZE]
 
-            finally:
-                client.close()
+                query = {
+                    "entity_id": {"$in": batch_ids},
+                    "feature_view": {"$in": fv_names},
+                    "event_timestamp": {"$lte": max_ts},
+                }
+                docs = list(coll.find(query, {"_id": 0}))
+                all_feature_docs.extend(docs)
 
             # Handle empty result
             if not all_feature_docs:
@@ -769,16 +776,23 @@ def _run() -> pyarrow.Table:
             working_df = entity_df.copy()
             working_df["_row_idx"] = range(len(working_df))
 
-            if len(working_df) <= CHUNK_SIZE:
-                # Small workload: process in single pass
-                result_df = _run_single(working_df)
-            else:
-                # Large workload: process in chunks
-                chunk_results = []
-                for chunk in _chunk_dataframe(working_df, CHUNK_SIZE):
-                    chunk_results.append(_run_single(chunk))
+            # Create client once for all chunks
+            client = MongoDBOfflineStoreNative._get_client_and_ensure_indexes(config)
+            try:
+                coll = client[db_name][feature_collection]
+
+                if len(working_df) <= CHUNK_SIZE:
+                    # Small workload: process in single pass
+                    result_df = _run_single(working_df, coll)
+                else:
+                    # Large workload: process in chunks
+                    chunk_results = []
+                    for chunk in _chunk_dataframe(working_df, CHUNK_SIZE):
+                        chunk_results.append(_run_single(chunk, coll))
 
-                result_df = pd.concat(chunk_results, ignore_index=True)
+                    result_df = pd.concat(chunk_results, ignore_index=True)
+            finally:
+                client.close()
 
             # Restore original ordering and remove index column
             result_df = result_df.sort_values("_row_idx").reset_index(drop=True)

From 18bb99946cf93730cac6326137f9e425096e049b Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Thu, 19 Mar 2026 13:46:30 -0400
Subject: [PATCH 17/30] Remove duplicate MongoDBOfflineStoreNative from
 mongodb.py

The Native implementation now lives exclusively in mongodb_native.py
with the single-collection schema. This removes the confusing duplicate
that used the Ibis collection-per-FV schema.

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../contrib/mongodb_offline_store/mongodb.py  | 416 +-----------------
 1 file changed, 2 insertions(+), 414 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
index 51100ef827..10ffd6c533 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
@@ -13,12 +13,11 @@
 # limitations under the License.
 
 import warnings
-from datetime import datetime, timezone
-from typing import Any, Callable, Dict, List, Optional, Union
+from datetime import datetime
+from typing import Any, Callable, List, Optional, Union
 
 import ibis
 import pandas as pd
-import pyarrow
 from ibis.expr.types import Table
 from pydantic import StrictStr
 
@@ -45,14 +44,9 @@
 from feast.infra.offline_stores.offline_store import (
     OfflineStore,
     RetrievalJob,
-    RetrievalMetadata,
-)
-from feast.infra.offline_stores.offline_utils import (
-    infer_event_timestamp_from_entity_df,
 )
 from feast.infra.registry.base_registry import BaseRegistry
 from feast.repo_config import FeastConfigBaseModel, RepoConfig
-from feast.saved_dataset import SavedDatasetStorage
 
 
 class MongoDBOfflineStoreIbisConfig(FeastConfigBaseModel):
@@ -247,409 +241,3 @@ def writer(
             client.close()
 
     return writer
-
-
-# ---------------------------------------------------------------------------
-# Native MQL implementation
-# ---------------------------------------------------------------------------
-
-
-class MongoDBOfflineStoreNativeConfig(FeastConfigBaseModel):
-    """Configuration for the MongoDB native-MQL offline store."""
-
-    type: StrictStr = "feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb.MongoDBOfflineStoreNative"
-    """Offline store type selector"""
-
-    connection_string: StrictStr = "mongodb://localhost:27017"
-    """MongoDB connection URI"""
-
-    database: StrictStr = "feast"
-    """Default MongoDB database name"""
-
-
-def _fetch_collection_as_arrow(
-    connection_string: str,
-    db_name: str,
-    collection: str,
-    pipeline: Optional[List[Dict]] = None,
-) -> pyarrow.Table:
-    """Run an aggregation pipeline (or full scan) via PyMongo and return a pyarrow Table.
-
-    If *pipeline* is None the entire collection is scanned (``_id`` excluded).
-    The ``_id`` field is stripped from every result document before conversion.
-    """
-    if MongoClient is None:
-        raise FeastExtrasDependencyImportError("mongodb", "pymongo is not installed.")
-    client: Any = MongoClient(connection_string, driver=DRIVER_METADATA, tz_aware=True)
-    try:
-        if pipeline is not None:
-            docs = list(client[db_name][collection].aggregate(pipeline))
-        else:
-            docs = list(client[db_name][collection].find({}, {"_id": 0}))
-    finally:
-        client.close()
-
-    if not docs:
-        return pyarrow.table({})
-
-    for doc in docs:
-        doc.pop("_id", None)
-
-    return pyarrow.Table.from_pylist(docs)
-
-
-class MongoDBNativeRetrievalJob(RetrievalJob):
-    """A RetrievalJob whose results come from a lazy PyMongo query callable.
-
-    The callable is only executed when the caller materialises the job (e.g.
-    ``to_df()``, ``to_arrow()``, ``persist()``).
-    """
-
-    def __init__(
-        self,
-        query_fn: Callable[[], pyarrow.Table],
-        full_feature_names: bool,
-        on_demand_feature_views: List,
-        metadata: Optional[RetrievalMetadata],
-        config: RepoConfig,
-    ) -> None:
-        super().__init__()
-        self._query_fn = query_fn
-        self._full_feature_names = full_feature_names
-        self._on_demand_feature_views = on_demand_feature_views or []
-        self._metadata = metadata
-        self._config = config
-
-    def _to_arrow_internal(self, timeout: Optional[int] = None) -> pyarrow.Table:
-        return self._query_fn()
-
-    def _to_df_internal(self, timeout: Optional[int] = None) -> pd.DataFrame:
-        return self._to_arrow_internal().to_pandas()
-
-    @property
-    def full_feature_names(self) -> bool:
-        return self._full_feature_names
-
-    @property
-    def on_demand_feature_views(self) -> List:
-        return self._on_demand_feature_views
-
-    @property
-    def metadata(self) -> Optional[RetrievalMetadata]:
-        return self._metadata
-
-    def persist(
-        self,
-        storage: SavedDatasetStorage,
-        allow_overwrite: bool = False,
-        timeout: Optional[int] = None,
-    ) -> None:
-        if MongoClient is None:
-            raise FeastExtrasDependencyImportError(
-                "mongodb", "pymongo is not installed."
-            )
-        data_source = storage.to_data_source()
-        if not isinstance(data_source, MongoDBSource):
-            raise ValueError(
-                f"MongoDBNativeRetrievalJob.persist expected a MongoDBSource storage, "
-                f"got {type(data_source).__name__!r}."
-            )
-        table = self._to_arrow_internal()
-        connection_string = self._config.offline_store.connection_string
-        db_name = data_source.database or self._config.offline_store.database
-        location = f"{db_name}.{data_source.collection}"
-        client: Any = MongoClient(
-            connection_string, driver=DRIVER_METADATA, tz_aware=True
-        )
-        try:
-            coll = client[db_name][data_source.collection]
-            if not allow_overwrite and coll.estimated_document_count() > 0:
-                raise SavedDatasetLocationAlreadyExists(location=location)
-            coll.drop()
-            records = table.to_pylist()
-            if records:
-                coll.insert_many(records)
-        finally:
-            client.close()
-
-
-class MongoDBOfflineStoreNative(OfflineStore):
-    """Offline store backed by MongoDB using native MQL aggregation pipelines.
-
-    Compared with :class:`MongoDBOfflineStoreIbis`, this implementation avoids
-    the Ibis dependency entirely.  The three main workflows map to:
-
-    * ``offline_write_batch``           – Arrow → ``insert_many``
-    * ``pull_latest_from_table_or_query`` – ``$match`` → ``$sort`` → ``$group``
-    * ``pull_all_from_table_or_query``  – ``$match`` → ``$project``
-    * ``get_historical_features``       – per-collection fetch + ``merge_asof``
-    """
-
-    @staticmethod
-    def offline_write_batch(
-        config: RepoConfig,
-        feature_view: FeatureView,
-        table: pyarrow.Table,
-        progress: Optional[Callable[[int], Any]],
-    ) -> None:
-        if MongoClient is None:
-            raise FeastExtrasDependencyImportError(
-                "mongodb", "pymongo is not installed."
-            )
-        data_source = feature_view.batch_source
-        if not isinstance(data_source, MongoDBSource):
-            raise ValueError(
-                f"MongoDBOfflineStoreNative.offline_write_batch expected a MongoDBSource, "
-                f"got {type(data_source).__name__!r}."
-            )
-        connection_string = config.offline_store.connection_string
-        db_name = data_source.database or config.offline_store.database
-        records = table.to_pylist()
-        client: Any = MongoClient(
-            connection_string, driver=DRIVER_METADATA, tz_aware=True
-        )
-        try:
-            coll = client[db_name][data_source.collection]
-            if records:
-                coll.insert_many(records)
-                if progress:
-                    progress(len(records))
-        finally:
-            client.close()
-
-    @staticmethod
-    def pull_latest_from_table_or_query(
-        config: RepoConfig,
-        data_source: DataSource,
-        join_key_columns: List[str],
-        feature_name_columns: List[str],
-        timestamp_field: str,
-        created_timestamp_column: Optional[str],
-        start_date: datetime,
-        end_date: datetime,
-    ) -> RetrievalJob:
-        if not isinstance(data_source, MongoDBSource):
-            raise ValueError(
-                f"MongoDBOfflineStoreNative expected a MongoDBSource, "
-                f"got {type(data_source).__name__!r}."
-            )
-        warnings.warn(
-            "MongoDB offline store (native) is in preview. API may change without notice.",
-            RuntimeWarning,
-        )
-        start_utc = start_date.astimezone(tz=timezone.utc)
-        end_utc = end_date.astimezone(tz=timezone.utc)
-        connection_string = config.offline_store.connection_string
-        db_name = data_source.database or config.offline_store.database
-        collection = data_source.collection
-
-        # Sort by timestamp descending so $first in $group gets the latest document
-        sort_spec: Dict = {timestamp_field: -1}
-        if created_timestamp_column:
-            sort_spec[created_timestamp_column] = -1
-
-        # Group by entity/join keys. _id becomes a subdocument like {driver_id: 1}.
-        # $first grabs values from the first document in each group (the latest,
-        # due to prior $sort).
-        group_id = {k: f"${k}" for k in join_key_columns}
-        group_stage: Dict = {
-            "_id": group_id,
-            **{f: {"$first": f"${f}"} for f in feature_name_columns},
-            timestamp_field: {"$first": f"${timestamp_field}"},
-        }
-        if created_timestamp_column:
-            group_stage[created_timestamp_column] = {
-                "$first": f"${created_timestamp_column}"
-            }
-
-        # Project to flatten the output: extract join keys from _id subdocument,
-        # include feature columns directly. Excludes the _id field from output.
-        project_stage: Dict = {
-            "_id": 0,
-            **{k: f"$_id.{k}" for k in join_key_columns},
-            **{f: 1 for f in feature_name_columns},
-            timestamp_field: 1,
-        }
-        if created_timestamp_column:
-            project_stage[created_timestamp_column] = 1
-
-        pipeline = [
-            {"$match": {timestamp_field: {"$gte": start_utc, "$lte": end_utc}}},
-            {"$sort": sort_spec},
-            {"$group": group_stage},
-            {"$project": project_stage},
-        ]
-
-        def _run() -> pyarrow.Table:
-            return _fetch_collection_as_arrow(
-                connection_string, db_name, collection, pipeline
-            )
-
-        return MongoDBNativeRetrievalJob(
-            query_fn=_run,
-            full_feature_names=False,
-            on_demand_feature_views=[],
-            metadata=None,
-            config=config,
-        )
-
-    @staticmethod
-    def pull_all_from_table_or_query(
-        config: RepoConfig,
-        data_source: DataSource,
-        join_key_columns: List[str],
-        feature_name_columns: List[str],
-        timestamp_field: str,
-        created_timestamp_column: Optional[str] = None,
-        start_date: Optional[datetime] = None,
-        end_date: Optional[datetime] = None,
-    ) -> RetrievalJob:
-        if not isinstance(data_source, MongoDBSource):
-            raise ValueError(
-                f"MongoDBOfflineStoreNative expected a MongoDBSource, "
-                f"got {type(data_source).__name__!r}."
-            )
-        warnings.warn(
-            "MongoDB offline store (native) is in preview. API may change without notice.",
-            RuntimeWarning,
-        )
-        connection_string = config.offline_store.connection_string
-        db_name = data_source.database or config.offline_store.database
-        collection = data_source.collection
-
-        fields = join_key_columns + feature_name_columns + [timestamp_field]
-        if created_timestamp_column:
-            fields.append(created_timestamp_column)
-
-        match_filter: Dict = {}
-        if start_date or end_date:
-            ts_filter: Dict = {}
-            if start_date:
-                ts_filter["$gte"] = start_date.astimezone(tz=timezone.utc)
-            if end_date:
-                ts_filter["$lte"] = end_date.astimezone(tz=timezone.utc)
-            match_filter[timestamp_field] = ts_filter
-
-        pipeline = [
-            {"$match": match_filter},
-            {"$project": {"_id": 0, **{f: 1 for f in fields}}},
-        ]
-
-        def _run() -> pyarrow.Table:
-            return _fetch_collection_as_arrow(
-                connection_string, db_name, collection, pipeline
-            )
-
-        return MongoDBNativeRetrievalJob(
-            query_fn=_run,
-            full_feature_names=False,
-            on_demand_feature_views=[],
-            metadata=None,
-            config=config,
-        )
-
-    @staticmethod
-    def get_historical_features(
-        config: RepoConfig,
-        feature_views: List[FeatureView],
-        feature_refs: List[str],
-        entity_df: Union[pd.DataFrame, str],
-        registry: BaseRegistry,
-        project: str,
-        full_feature_names: bool = False,
-    ) -> RetrievalJob:
-        if isinstance(entity_df, str):
-            raise ValueError(
-                "MongoDBOfflineStoreNative does not support SQL entity_df strings. "
-                "Pass a pandas DataFrame instead."
-            )
-        warnings.warn(
-            "MongoDB offline store (native) is in preview. API may change without notice.",
-            RuntimeWarning,
-        )
-        connection_string = config.offline_store.connection_string
-        default_db = config.offline_store.database
-
-        entity_schema = dict(zip(entity_df.columns, entity_df.dtypes))
-        event_timestamp_col = infer_event_timestamp_from_entity_df(entity_schema)
-
-        # Map "feature_view:feature" refs → {fv_name: [feature, ...]}
-        fv_to_features: Dict[str, List[str]] = {}
-        for ref in feature_refs:
-            fv_name, feat_name = ref.split(":", 1)
-            fv_to_features.setdefault(fv_name, []).append(feat_name)
-
-        fv_by_name = {fv.name: fv for fv in feature_views}
-
-        def _run() -> pyarrow.Table:
-            result = entity_df.copy()
-            # Ensure the entity timestamp is tz-aware UTC for merge_asof
-            if result[event_timestamp_col].dt.tz is None:
-                result[event_timestamp_col] = pd.to_datetime(
-                    result[event_timestamp_col], utc=True
-                )
-            result = result.sort_values(event_timestamp_col)
-
-            for fv_name, features in fv_to_features.items():
-                fv = fv_by_name[fv_name]
-                source = fv.batch_source
-                if not isinstance(source, MongoDBSource):
-                    raise ValueError(
-                        f"MongoDBOfflineStoreNative: feature view {fv_name!r} has "
-                        f"a non-MongoDBSource batch source ({type(source).__name__!r})."
-                    )
-                db_name = source.database or default_db
-                ts_field = source.timestamp_field
-                join_keys = [e.name for e in fv.entity_columns]
-
-                arrow_table = _fetch_collection_as_arrow(
-                    connection_string, db_name, source.collection
-                )
-                if arrow_table.num_rows == 0:
-                    for f in features:
-                        col = f"{fv_name}__{f}" if full_feature_names else f
-                        result[col] = None
-                    continue
-
-                feature_df = arrow_table.to_pandas()
-                # Ensure tz-aware UTC
-                if feature_df[ts_field].dt.tz is None:
-                    feature_df[ts_field] = pd.to_datetime(
-                        feature_df[ts_field], utc=True
-                    )
-                feature_df = feature_df.sort_values(ts_field)
-
-                col_rename = {
-                    f: (f"{fv_name}__{f}" if full_feature_names else f)
-                    for f in features
-                }
-                cols_to_select = join_keys + features + [ts_field]
-                feature_df = feature_df[cols_to_select].rename(columns=col_rename)
-                out_features = list(col_rename.values())
-
-                merged = pd.merge_asof(
-                    result,
-                    feature_df,
-                    left_on=event_timestamp_col,
-                    right_on=ts_field,
-                    by=join_keys,
-                    direction="backward",
-                )
-                # Apply TTL: null out features whose timestamp is too far in the past
-                if fv.ttl:
-                    cutoff = merged[event_timestamp_col] - fv.ttl
-                    too_old = merged[ts_field] < cutoff
-                    for col in out_features:
-                        merged.loc[too_old, col] = None
-
-                result = merged.drop(columns=[ts_field], errors="ignore")
-
-            return pyarrow.Table.from_pandas(result, preserve_index=False)
-
-        return MongoDBNativeRetrievalJob(
-            query_fn=_run,
-            full_feature_names=full_feature_names,
-            on_demand_feature_views=[],
-            metadata=None,
-            config=config,
-        )

From 38d40f58ec74c6e04f839f03cbfdc36b542a9d65 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Thu, 19 Mar 2026 14:12:59 -0400
Subject: [PATCH 18/30] Consolidate mongodb_source.py into mongodb.py

- Move MongoDBSource, MongoDBOptions, SavedDatasetMongoDBStorage into mongodb.py
- Move _infer_python_type_str helper into mongodb.py
- Update imports in tests and benchmarks
- Remove mongodb_source.py

This consolidates the collection-per-FV implementation into a single file,
making the codebase easier to navigate.

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 design-notes/CASEY_SESSION_NOTES.md           | 109 +++++++
 design-notes/design-hybrid-with-batches.md    | 239 +++++++++++++++
 design-notes/native_implementation_notes.md   | 191 ++++++++++++
 design-notes/offline_store_design.md          |  98 ++++++
 ...ompt-mdb-fetch-pandas-join-with-batches.md | 108 +++++++
 .../contrib/mongodb_offline_store/mongodb.py  | 278 ++++++++++++++++-
 .../mongodb_offline_store/mongodb_source.py   | 283 ------------------
 .../benchmark_mongodb_offline_stores.py       |   4 +-
 .../contrib/test_mongodb_offline_retrieval.py |   2 -
 9 files changed, 1020 insertions(+), 292 deletions(-)
 create mode 100644 design-notes/CASEY_SESSION_NOTES.md
 create mode 100644 design-notes/design-hybrid-with-batches.md
 create mode 100644 design-notes/native_implementation_notes.md
 create mode 100644 design-notes/offline_store_design.md
 create mode 100644 design-notes/prompt-mdb-fetch-pandas-join-with-batches.md
 delete mode 100644 sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py

diff --git a/design-notes/CASEY_SESSION_NOTES.md b/design-notes/CASEY_SESSION_NOTES.md
new file mode 100644
index 0000000000..7a0b6f158f
--- /dev/null
+++ b/design-notes/CASEY_SESSION_NOTES.md
@@ -0,0 +1,109 @@
+# MongoDB Feast Integration — Session Notes
+_Last updated: 2026-03-16. Resume here after OS upgrade._
+
+---
+
+## Status at a Glance
+
+| Component | Branch | Status |
+|---|---|---|
+| **Online Store** | `INTPYTHON-297-MongoDB-Feast-Integration` | ✅ **Merged to upstream/master** |
+| **Offline Store** | `FEAST-OfflineStore-INTPYTHON-297` | 🔧 In progress — next focus |
+
+---
+
+## Online Store — COMPLETE ✅
+
+### What was done
+- Implemented `MongoDBOnlineStore` with full sync + async API
+- Refactored write path: extracted `_build_write_ops` static method to eliminate code
+  duplication between `online_write_batch` and `online_write_batch_async`
+- Added Feast driver metadata to MongoDB client instantiations
+- Registered MongoDB in the feast-operator (kubebuilder enums, `ValidOnlineStoreDBStorePersistenceTypes`, operator YAMLs)
+- Updated online store status from `alpha` → `preview` in docs
+- All 5 unit tests pass (including Docker-based testcontainers integration test)
+
+### Key files
+- `sdk/python/feast/infra/online_stores/mongodb_online_store/mongodb.py` — main implementation
+- `sdk/python/tests/unit/online_store/test_mongodb_online_retrieval.py` — test suite
+- `sdk/python/tests/universal/feature_repos/universal/online_store/mongodb.py` — universal test repo config
+
+### Git history cleanup (this session)
+The PR had two merge commits (`632e103a6`, `26ce79b37`) that blocked squash-and-merge.
+Resolution:
+1. `git fetch --all`
+2. Created clean branch `FEAST-OnlineStore-INTPYTHON-297` from `upstream/master`
+3. Cherry-picked all 47 commits (oldest → newest), skipping the two merge commits
+4. Resolved conflicts: directory rename (`tests/integration/` → `tests/universal/`),
+   `pixi.lock` auto-resolved, `detect-secrets` false positives got `# pragma: allowlist secret`
+5. Force-pushed to `INTPYTHON-297-MongoDB-Feast-Integration` — maintainer squash-merged ✅
+
+### Versioning
+Version is derived dynamically via `setuptools_scm` from git tags (no hardcoded version).
+Latest tag at time of merge: **`v0.60.0`**. Feature ships in the next release after that.
+Update JIRA with the next release tag once the maintainers cut it.
+
+---
+
+## Offline Store — IN PROGRESS 🔧
+
+### Branch
+```
+FEAST-OfflineStore-INTPYTHON-297
+```
+
+### Commits on branch (not yet in upstream/master)
+```
+cd3eef677  Started work on full Mongo/MQL implementation. Kept MongoDBOfflineStoreIbis and MongoDBOfflineStoreNative
+71469f69a  feat: restore test-python-universal-mongodb-online Makefile target
+904505244  fix: pass onerror to pkgutil.walk_packages
+946d84e4c  fix: broaden import exception handling in doctest runner
+55de0e9b5  fix: catch FeastExtrasDependencyImportError in doctest runner
+157a71d77  refactor: improve MongoDB offline store code quality
+67632af2f  feat: Add MongoDB offline store (ibis-based PIT join, v1 alpha)
+```
+
+### Key files
+- `sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py`
+  - Contains **two prototype implementations**:
+    - `MongoDBOfflineStoreIbis` — uses Ibis for point-in-time joins (delegates to `get_historical_features_ibis`)
+    - `MongoDBOfflineStoreNative` — native MQL implementation (started in `cd3eef677`, in progress)
+- `sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py` — `MongoDBSource` data source
+
+### Architecture: Ibis vs Native
+- **Ibis approach**: delegates PIT join to `feast.infra.offline_stores.ibis` helpers.
+  Pro: less code, consistency with other ibis-backed stores.
+  Con: requires ibis-mongodb connector; PIT correctness depends on ibis translation.
+- **Native approach**: implements PIT join directly in MQL (MongoDB aggregation pipeline).
+  Pro: no extra dependency, full control.
+  Con: more complex; MQL aggregation pipelines can be verbose.
+- Decision pending benchmarking / correctness validation between the two.
+
+### Next steps for offline store
+1. Finish `MongoDBOfflineStoreNative` MQL implementation (started in latest commit)
+2. Validate PIT correctness for both implementations against the Feast universal test suite
+3. Run: `make test-python-universal-mongodb-offline` (target may need creating — see `71469f69a`)
+4. Choose Ibis vs Native based on results; remove the other
+5. Add to operator (same pattern as online store: kubebuilder enums, install.yaml)
+6. Open PR — follow same DCO + linear history discipline as online store
+
+---
+
+## Environment Notes
+
+- **Python env**: always use `uv run pytest ...` (uses `.venv` in repo root, Python 3.11)
+- **Do NOT use**: system Python (`/Library/Frameworks/Python.framework/...`) or conda envs
+- **Docker**: must be running for the testcontainers integration test
+- **Stale container**: `72d14b345b6a` (mongo:latest, port 57120) — leftover from testing, safe to stop
+- **DCO**: all commits must be signed: `git commit -s`
+- **No push/merge without explicit user approval**
+
+---
+
+## Git Workflow Reminder
+To keep history clean (lesson from online store PR):
+- Always branch from `upstream/master` (after `git fetch --all`)
+- Never merge upstream into a feature branch — rebase or cherry-pick instead
+- Before opening a PR, verify with: `git log --merges <branch> ^upstream/master --oneline`
+  (must return empty)
+
diff --git a/design-notes/design-hybrid-with-batches.md b/design-notes/design-hybrid-with-batches.md
new file mode 100644
index 0000000000..080986579e
--- /dev/null
+++ b/design-notes/design-hybrid-with-batches.md
@@ -0,0 +1,239 @@
+Native MongoDB Offline Store (Hybrid Design)
+
+Design Document
+
+Overview
+
+This document describes the design of the Native MongoDB Offline Store for Feast using a hybrid execution model. The system combines MongoDB’s strengths in indexed data retrieval with Python’s strengths in relational and temporal joins.
+
+The implementation uses a single-collection schema in MongoDB to store feature data across all FeatureViews and performs point-in-time (PIT) joins using a “fetch + pandas join” strategy. This replaces an earlier fully in-database $lookup approach that proved unscalable for large workloads.
+
+The result is a design that is performant, scalable, and aligned with Feast’s semantics.
+
+⸻
+
+Data Model
+
+All FeatureViews share a single MongoDB collection (feature_history). Each document represents an observation of a FeatureView for a given entity at a specific timestamp.
+
+Each document contains:
+	•	A serialized entity identifier (entity_id)
+	•	A FeatureView identifier (feature_view)
+	•	A subdocument of feature values (features)
+	•	An event timestamp (event_timestamp)
+	•	An ingestion timestamp (created_at)
+
+This schema supports:
+	•	Sparse feature storage (not all features present in every document)
+	•	Flexible schema evolution over time
+	•	Efficient indexing across FeatureViews
+
+A compound index is maintained on:
+	•	(entity_id, feature_view, event_timestamp DESC)
+
+This index supports efficient filtering by entity, FeatureView, and time range.
+
+⸻
+
+Execution Model
+
+High-Level Strategy
+
+The system implements historical feature retrieval in three stages:
+	1.	Preprocessing (Python)
+	•	Normalize timestamps to UTC
+	•	Serialize entity keys into entity_id
+	•	Partition the input entity_df into manageable chunks
+	2.	Data Fetching (MongoDB)
+	•	Query MongoDB using $in on entity IDs
+	•	Filter by FeatureView and time bounds
+	•	Retrieve matching feature documents in batches
+	3.	Point-in-Time Join (Python)
+	•	Convert MongoDB results into pandas DataFrames
+	•	Perform per-FeatureView joins using merge_asof
+	•	Apply TTL constraints and feature selection
+
+This design avoids per-row database joins and instead performs a small number of efficient indexed scans.
+
+⸻
+
+Chunking and Batching
+
+To ensure scalability, the system separates concerns between:
+	•	Chunk size (entity_df)
+Controls memory usage in Python
+Default: ~5,000 rows
+	•	Batch size (MongoDB queries)
+Controls query size and index efficiency
+Default: ~1,000 entity IDs per query
+
+Each chunk of entity_df is processed independently:
+	•	Entity IDs are extracted and deduplicated
+	•	Feature data is fetched in batches
+	•	Results are joined and accumulated
+
+This ensures:
+	•	Bounded memory usage
+	•	Predictable query performance
+	•	Compatibility with large workloads
+
+⸻
+
+Point-in-Time Join Semantics
+
+For each FeatureView:
+	•	Feature data is sorted by (entity_id, event_timestamp)
+	•	The entity dataframe is similarly sorted
+	•	A backward merge_asof is performed
+
+This ensures:
+	•	Only feature values with timestamps ≤ entity timestamp are used
+	•	The most recent valid feature value is selected
+
+TTL constraints are applied after the join:
+	•	If the matched feature timestamp is older than the allowed TTL window, the value is set to NULL
+
+⸻
+
+Key Improvements in Current Design
+
+1. Projection (Reduced Data Transfer)
+
+The system now explicitly limits fields retrieved from MongoDB to only those required:
+	•	entity_id
+	•	feature_view
+	•	event_timestamp
+	•	Requested feature fields within features
+
+This reduces:
+	•	Network overhead
+	•	BSON decoding cost
+	•	Memory usage in pandas
+
+This is especially important for wide FeatureViews or large documents.
+
+⸻
+
+2. Bounded Time Filtering
+
+Queries now include both:
+	•	An upper bound (<= max_ts)
+	•	A lower bound (>= min_ts)
+
+This significantly reduces the amount of historical data scanned when:
+	•	The entity dataframe spans a narrow time window
+	•	The feature store contains deep history
+
+This optimization improves:
+	•	Query latency
+	•	Index selectivity
+	•	Memory footprint of retrieved data
+
+Future enhancements may incorporate TTL-aware lower bounds.
+
+⸻
+
+3. Correct Sorting for Temporal Joins
+
+The system ensures proper sorting before merge_asof:
+	•	Both dataframes are sorted by (entity_id, timestamp)
+
+This is critical for correctness when:
+	•	Multiple entities are processed in a single batch
+	•	Data is interleaved across entities
+
+Without this, joins may silently produce incorrect results.
+
+⸻
+
+Tradeoffs
+
+Advantages
+	•	Scalability: Avoids O(n × m) behavior of correlated joins
+	•	Flexibility: Supports sparse and evolving schemas
+	•	Performance: Leverages MongoDB indexes efficiently
+	•	Simplicity: Uses well-understood pandas join semantics
+
+Limitations
+	•	Memory-bound joins: Requires chunking for large workloads
+	•	Multiple passes: Each FeatureView requires a separate join
+	•	No server-side joins: MongoDB is used only for filtering, not relational logic
+
+⸻
+
+Comparison to Alternative Designs
+
+Full MongoDB Join ($lookup)
+
+Rejected due to:
+	•	Poor scaling with large entity sets
+	•	Repeated execution of correlated subqueries
+	•	High latency (orders of magnitude slower)
+
+⸻
+
+Ibis-Based Design
+	•	Uses one collection per FeatureView
+	•	Loads data into memory and performs joins in Python
+
+Comparison:
+	•	Similar performance after hybrid redesign
+	•	Simpler query model
+	•	Less flexible schema
+
+The Native design trades simplicity for:
+	•	Unified storage
+	•	Better alignment with document-based ingestion
+	•	More flexible feature evolution
+
+⸻
+
+Operational Considerations
+
+Index Management
+
+Indexes are created lazily at runtime:
+	•	Ensures correctness without manual setup
+	•	Avoids placing responsibility on users
+
+Future improvements may include:
+	•	Optional strict index validation
+	•	Configuration-driven index management
+
+⸻
+
+MongoDB Client Usage
+
+Each chunk currently uses a separate MongoDB client instance.
+
+This is acceptable for moderate workloads but may be optimized in the future by:
+	•	Reusing a shared client per retrieval job
+	•	Leveraging connection pooling more explicitly
+
+⸻
+
+Future Work
+
+Several enhancements are possible:
+	1.	Streaming Joins
+	•	Avoid materializing all feature data in memory
+	•	Process data incrementally
+	2.	Adaptive Chunking
+	•	Dynamically adjust chunk size based on memory pressure
+	3.	TTL Pushdown
+	•	Incorporate TTL constraints into MongoDB queries
+	4.	Parallel Execution
+	•	Process chunks concurrently for large workloads
+
+⸻
+
+Conclusion
+
+The hybrid MongoDB + pandas design represents a significant improvement over the initial fully in-database approach. It aligns system responsibilities with the strengths of each component:
+	•	MongoDB handles indexed filtering and retrieval
+	•	Python handles temporal join logic
+
+With the addition of projection, bounded time filtering, and correct sorting, the system is now both performant and correct for large-scale historical feature retrieval.
+
+This design provides a strong foundation for further optimization and production use.
+
diff --git a/design-notes/native_implementation_notes.md b/design-notes/native_implementation_notes.md
new file mode 100644
index 0000000000..891751e56c
--- /dev/null
+++ b/design-notes/native_implementation_notes.md
@@ -0,0 +1,191 @@
+# Native MongoDB Offline Store Implementation Review
+
+## Overview
+
+This document reviews the native MongoDB offline store implementation (`mongodb_native.py`) in the context of Feast idioms, the MongoDB online store implementation, and best practices.
+
+---
+
+## Schema Alignment: Online ↔ Offline
+
+### Online Store Schema (mongodb_online_store/mongodb.py)
+```javascript
+{
+  "_id": bytes,  // serialized entity key
+  "features": {
+    "<feature_view_name>": {
+      "<feature_name>": value
+    }
+  },
+  "event_timestamps": { "<feature_view>": datetime },
+  "created_timestamp": datetime
+}
+```
+
+### Offline Store Schema (Native)
+```javascript
+{
+  "_id": ObjectId(),
+  "entity_id": bytes,  // serialized entity key (same format as online _id)
+  "feature_view": "driver_stats",  // discriminator
+  "features": { "<feature_name>": value },
+  "event_timestamp": datetime,
+  "created_at": datetime
+}
+```
+
+### ✅ Alignment Strengths
+1. **Entity key serialization**: Both use `serialize_entity_key()` from `key_encoding_utils.py`
+2. **Nested features**: Both use `features: { ... }` subdocument pattern
+3. **Timestamps**: Both track event and created timestamps
+
+### ⚠️ Alignment Concerns
+1. **`_id` usage**: Online uses `_id` = entity_id; Offline uses `_id` = ObjectId() with separate `entity_id` field
+   - **Recommendation**: Consider using `_id` = `{entity_id, feature_view, event_timestamp}` compound key for offline, eliminating ObjectId overhead
+   
+2. **Feature nesting depth**: Online nests by feature_view then feature; Offline nests only by feature (feature_view is top-level)
+   - This is intentional (offline is one doc per event; online is one doc per entity with all FVs)
+
+---
+
+## Feast Idioms Compliance
+
+### ✅ Correctly Followed
+1. **RetrievalJob pattern**: Returns `MongoDBNativeRetrievalJob` wrapping a `query_fn` closure
+2. **Arrow output**: `_to_arrow_internal()` returns `pyarrow.Table` (hard requirement)
+3. **Warnings for preview**: Uses `warnings.warn()` with `RuntimeWarning`
+4. **Config inheritance**: `MongoDBOfflineStoreNativeConfig` extends `FeastConfigBaseModel`
+5. **DataSource pattern**: `MongoDBSourceNative` extends `DataSource` with `from_proto`/`_to_proto_impl`
+
+### ⚠️ Missing or Incomplete
+1. **`offline_write_batch`**: Not implemented (raises `NotImplementedError` in persist)
+   - Required for push sources and `feast materialize` reverse path
+   - Should accept `pyarrow.Table` and insert into `feature_history` collection
+
+2. **`write_logged_features`**: Not implemented
+   - Lower priority but needed for feature logging
+
+3. **`persist()` on RetrievalJob**: Not implemented
+   - Should write results to a new collection for saved datasets
+
+---
+
+## MQL Pipeline Quality
+
+### ✅ Well Implemented
+1. **`pull_all_from_table_or_query`**: Clean range scan with `$project` flattening features server-side
+2. **`pull_latest_from_table_or_query`**: Proper `$sort` → `$group` → `$project` pattern
+3. **`get_historical_features`**: Uses `$lookup` with correlated subpipeline for server-side PIT join
+4. **Per-FV TTL via `$switch`**: Elegant solution for different TTLs per feature view
+
+### ⚠️ Potential Improvements
+1. **Index usage in `$lookup`**: The `$expr` in `$match` may not use indexes efficiently
+   - MongoDB 5.0+ has better support for `$expr` index usage
+   - Consider adding `hint` option if performance is critical
+
+2. **Temp collection cleanup**: Currently uses `try/finally` but could benefit from context manager pattern
+
+3. **Connection pooling**: Each method creates a new `MongoClient`. The online store caches `_client` and `_collection`
+   - **Recommendation**: Add `_client` caching to the offline store class or use connection pooling
+
+---
+
+## Comparison with Online Store Patterns
+
+| Aspect | Online Store | Offline Store (Native) |
+|--------|--------------|------------------------|
+| Client caching | `_client`, `_collection` instance vars | New client per operation |
+| Async support | Yes (`AsyncMongoClient`) | No |
+| Batch operations | `bulk_write` with `UpdateOne` | `insert_many` |
+| Error handling | Raises `RuntimeError` for config mismatch | Raises `ValueError` |
+| DriverInfo | ✅ Yes | ✅ Yes |
+
+### Recommendations
+1. **Add client caching** to avoid connection overhead per query
+2. **Consider async support** for large entity_df scenarios
+3. **Standardize error types** (use `RuntimeError` or `FeastError` subclasses)
+
+---
+
+## Missing Features for Production Readiness
+
+### High Priority
+1. **`offline_write_batch`**: Insert Arrow table into feature_history
+   ```python
+   @staticmethod
+   def offline_write_batch(
+       config: RepoConfig,
+       feature_view: FeatureView,
+       table: pyarrow.Table,
+       progress: Optional[Callable[[int], Any]],
+   ):
+       # Convert Arrow → docs with schema:
+       # { entity_id, feature_view, features: {...}, event_timestamp, created_at }
+       # Then insert_many()
+   ```
+
+2. **Index creation helper**: Document or auto-create the compound index
+   ```javascript
+   db.feature_history.createIndex({
+     entity_id: 1,
+     feature_view: 1,
+     event_timestamp: -1
+   })
+   ```
+
+3. **Connection pooling / client reuse**
+
+### Medium Priority
+4. **`persist()` for saved datasets**: Write retrieval results to a collection
+5. **`write_logged_features`**: For feature logging support
+6. **Async operations**: Mirror online store's async pattern
+
+### Lower Priority
+7. **Streaming cursor support**: For very large result sets
+8. **Explain plan logging**: Debug mode to show MQL execution plan
+
+---
+
+## Code Quality Observations
+
+### ✅ Good
+- Clear docstrings explaining schema and index requirements
+- Type hints throughout
+- Helper functions extracted (`_ttl_to_ms`, `_build_ttl_gte_expr`, `_serialize_entity_key_from_row`)
+- Proper cleanup of temp collections in `finally` block
+
+### ⚠️ Could Improve
+- Some duplication in timestamp timezone handling (could extract helper)
+- Magic strings like `"event_timestamp"`, `"created_at"` could be constants
+- The `_run()` closures are large — consider extracting to separate methods
+
+---
+
+## Test Coverage Assessment
+
+Current tests cover:
+- ✅ `pull_latest_from_table_or_query`
+- ✅ `pull_all_from_table_or_query`
+- ✅ `get_historical_features` (PIT join)
+- ✅ TTL filtering
+- ✅ Multiple feature views
+- ✅ Compound join keys
+
+Missing tests:
+- ❌ `offline_write_batch` (not implemented)
+- ❌ Empty result handling edge cases
+- ❌ Very large entity_df (performance/memory)
+- ❌ Concurrent access to temp collections
+- ❌ Index usage verification (explain plans)
+
+---
+
+## Summary
+
+The native implementation is a solid foundation with proper use of MQL aggregation pipelines. Key next steps:
+
+1. **Implement `offline_write_batch`** — Required for push sources
+2. **Add client caching** — Match online store pattern
+3. **Document/automate index creation** — Critical for performance
+4. **Consider `_id` schema optimization** — Use compound `_id` instead of ObjectId + entity_id
+
diff --git a/design-notes/offline_store_design.md b/design-notes/offline_store_design.md
new file mode 100644
index 0000000000..fbe7120a3c
--- /dev/null
+++ b/design-notes/offline_store_design.md
@@ -0,0 +1,98 @@
+# Corrected MongoDB OfflineStore Design
+
+## What the interface actually requires
+
+`RetrievalJob._to_arrow_internal` must return a `pyarrow.Table`. This is non-negotiable
+because the compute engines call `retrieval_job.to_arrow()` directly:
+
+```python
+# sdk/python/feast/infra/compute_engines/local/nodes.py
+retrieval_job = create_offline_store_retrieval_job(...)
+arrow_table = retrieval_job.to_arrow()   # ← hard requirement
+```
+
+The compute engine then converts Arrow → proto tuples itself before calling
+`OnlineStore.online_write_batch(data: List[Tuple[EntityKeyProto, ...]])`.
+The offline store never sees the proto tuple format.
+
+`OfflineStore.offline_write_batch` (the push-source write path) takes a `pyarrow.Table`
+— so Arrow is also the *input* format for writes.
+
+## The right approach — native aggregation, then Arrow
+
+The Couchbase offline store is the correct reference. It:
+1. Expresses computation natively in the database (SQL++ window functions).
+2. Iterates the cursor in Python.
+3. Converts directly: `pa.Table.from_pylist(processed_rows)` — **no pandas intermediate**.
+
+MongoDB should follow the same pattern using its aggregation pipeline.
+
+## pull_latest_from_table_or_query
+
+The `$group` + `$sort` aggregation is the natural MongoDB equivalent of
+`ROW_NUMBER() OVER(PARTITION BY entity ORDER BY timestamp DESC) = 1`:
+
+```python
+pipeline = [
+    {"$match": {
+        timestamp_field: {"$gte": start_date, "$lte": end_date}
+    }},
+    {"$sort": {timestamp_field: -1, created_timestamp_column: -1}},
+    {"$group": {
+        "_id": {k: f"${k}" for k in join_key_columns},
+        **{f: {"$first": f"${f}"} for f in feature_name_columns},
+        timestamp_field: {"$first": f"${timestamp_field}"},
+    }},
+]
+# cursor → pa.Table.from_pylist([doc for doc in collection.aggregate(pipeline)])
+```
+
+No pandas. No Feast join utilities. The database does the work.
+
+## get_historical_features
+
+This is harder. The point-in-time join requires: for each (entity, entity_timestamp) row,
+find the feature row with the latest `event_timestamp <= entity_timestamp`.
+
+MongoDB has no SQL window functions, but the aggregation pipeline can express this:
+
+```
+For each feature view:
+  $match: entity_ids in entity_df AND event_timestamp <= max(entity_timestamps)
+  $sort: entity_id, event_timestamp DESC
+  $lookup or unwind against entity_df rows
+  $match: event_timestamp <= entity_row.entity_timestamp (and TTL if set)
+  $group by (entity_id, entity_row_id): $first of features
+```
+
+This is complex but keeps computation in MongoDB and avoids loading the full history
+into Python memory. The result cursor is then converted via `pa.Table.from_pylist()`.
+
+For an initial implementation it is acceptable to pull the filtered documents into
+memory and do the join in Python (like the Dask store) — but this should be noted
+as a known limitation, not the target design.
+
+## offline_write_batch
+
+Receives a `pyarrow.Table` from Feast (push-source path). Convert with
+`table.to_pylist()` and `insert_many()` into the collection.
+
+## What changes from the previous design
+
+| Previous (incorrect)                        | Corrected                                   |
+|---------------------------------------------|---------------------------------------------|
+| Pull docs into pandas, use offline_utils    | Use MongoDB aggregation pipeline            |
+| pandas is the intermediate format           | MongoDB cursor → `pa.Table.from_pylist()`   |
+| Arrow is an afterthought                    | Arrow is the required output of the job     |
+| Claimed online_write_batch takes Arrow      | It takes proto tuples; compute engine converts |
+
+## Implementation order (unchanged)
+
+1. `MongoDBSource` — DataSource subclass (connection_string, database, collection, timestamp_field).
+2. `MongoDBOfflineStoreConfig` — pydantic config.
+3. `MongoDBRetrievalJob` — wraps aggregation pipeline, implements `_to_arrow_internal`.
+4. `offline_write_batch` — `pyarrow.Table` → `insert_many`.
+5. `pull_latest_from_table_or_query` — `$sort` + `$group` aggregation.
+6. `pull_all_from_table_or_query` — `$match` time-range scan.
+7. `get_historical_features` — aggregation pipeline PIT join (or in-memory fallback).
+
diff --git a/design-notes/prompt-mdb-fetch-pandas-join-with-batches.md b/design-notes/prompt-mdb-fetch-pandas-join-with-batches.md
new file mode 100644
index 0000000000..9bd8fb437c
--- /dev/null
+++ b/design-notes/prompt-mdb-fetch-pandas-join-with-batches.md
@@ -0,0 +1,108 @@
+Enhance MongoDBOfflineStoreNative.get_historical_features to support chunked execution for large entity_df, while preserving the existing fetch + pandas PIT join logic.
+
+Goals
+	•	Prevent memory blowups for large entity_df
+	•	Reuse the current implementation as much as possible
+	•	Keep the code clean and idiomatic to Feast
+
+⸻
+
+Requirements
+
+1. Add chunking based on entity_df size
+	•	Introduce a constant:
+``` python
+CHUNK_SIZE = 5000  # make configurable configurable
+```
+	•	If len(entity_df) <= CHUNK_SIZE:
+	•	Run the existing _run() logic unchanged
+	•	Else:
+	•	Split entity_df into chunks of size CHUNK_SIZE
+
+⸻
+
+2. Extract existing logic into reusable function
+Refactor the current _run() implementation into a helper:
+``` python
+def _run_single(entity_subset_df: pd.DataFrame) -> pd.DataFrame:
+    ...
+```
+This function should:
+	•	Perform:
+	•	entity_id serialization
+	•	MongoDB fetch ($in query)
+	•	pandas normalization
+	•	per-feature-view merge_asof
+	•	Return a pandas DataFrame (not Arrow)
+3. Implement chunked execution
+In _run():
+``` python
+if len(entity_df) <= CHUNK_SIZE:
+    df = _run_single(entity_df)
+else:
+    dfs = []
+    for chunk in chunk_dataframe(entity_df, CHUNK_SIZE):
+        dfs.append(_run_single(chunk))
+    df = pd.concat(dfs, ignore_index=True)
+```
+4. Implement chunk helper
+Add:
+```
+def chunk_dataframe(df: pd.DataFrame, size: int):
+    for i in range(0, len(df), size):
+        yield df.iloc[i:i+size]
+```
+5. Preserve ordering
+	•	Ensure final DataFrame preserves original row order
+	•	Use a _row_idx column if necessary
+6. Handle edge cases
+Ensure chunked version correctly handles:
+	•	Empty MongoDB results
+	•	Missing feature_views
+	•	Missing features inside documents
+	•	TTL filtering (already implemented in pandas)
+
+⸻
+
+7. Return Arrow table
+Final _run() must still return:
+```
+pyarrow.Table.from_pandas(df, preserve_index=False)
+```
+Constraints
+	•	Do NOT reintroduce $lookup
+	•	Do NOT use temp collections
+	•	Do NOT duplicate large blocks of logic
+	•	Keep code readable and maintainable
+
+⸻
+
+Optional (nice-to-have)
+	•	Add logging or debug print:
+	•	number of chunks processed
+	•	rows per chunk
+
+⸻
+
+Outcome
+	•	Small workloads behave exactly as before
+	•	Large workloads are processed safely in chunks
+	•	Performance remains close to Ibis for moderate sizes
+	•	Memory usage is bounded
+
+⸻
+
+🧠 Why this design is the right one
+
+This keeps your system:
+
+✅ Fast
+	•	still uses vectorized joins
+
+✅ Scalable
+	•	bounded memory
+
+✅ Clean
+	•	no duplication
+	•	no branching chaos
+
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
index 10ffd6c533..241e69cbb4 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
@@ -12,9 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
 import warnings
 from datetime import datetime
-from typing import Any, Callable, List, Optional, Union
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
 
 import ibis
 import pandas as pd
@@ -28,14 +29,12 @@
 
 from feast.data_source import DataSource
 from feast.errors import (
+    DataSourceNoNameException,
     FeastExtrasDependencyImportError,
     SavedDatasetLocationAlreadyExists,
 )
 from feast.feature_view import FeatureView
 from feast.infra.offline_stores.contrib.mongodb_offline_store import DRIVER_METADATA
-from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_source import (
-    MongoDBSource,
-)
 from feast.infra.offline_stores.ibis import (
     get_historical_features_ibis,
     pull_all_from_table_or_query_ibis,
@@ -46,7 +45,278 @@
     RetrievalJob,
 )
 from feast.infra.registry.base_registry import BaseRegistry
+from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
+from feast.protos.feast.core.SavedDataset_pb2 import (
+    SavedDatasetStorage as SavedDatasetStorageProto,
+)
 from feast.repo_config import FeastConfigBaseModel, RepoConfig
+from feast.saved_dataset import SavedDatasetStorage
+from feast.type_map import mongodb_to_feast_value_type
+from feast.value_type import ValueType
+
+# ---------------------------------------------------------------------------
+# Helper functions
+# ---------------------------------------------------------------------------
+
+
+def _infer_python_type_str(value: Any) -> Optional[str]:
+    """Infer a Feast-compatible type string from a Python value returned by pymongo."""
+    if value is None:
+        return None
+    if isinstance(value, bool):
+        return "bool"
+    if isinstance(value, int):
+        return "int"
+    if isinstance(value, float):
+        return "float"
+    if isinstance(value, str):
+        return "str"
+    if isinstance(value, bytes):
+        return "bytes"
+    if isinstance(value, datetime):
+        return "datetime"
+    if isinstance(value, list):
+        if not value:
+            return "list[str]"
+        elem_type = _infer_python_type_str(value[0])
+        if elem_type:
+            return f"list[{elem_type}]"
+        return "list[str]"
+    return None
+
+
+# ---------------------------------------------------------------------------
+# MongoDBSource and related classes (collection-per-FeatureView schema)
+# ---------------------------------------------------------------------------
+
+
+class MongoDBOptions:
+    """Options for a MongoDB data source (database + collection)."""
+
+    def __init__(self, database: str, collection: str):
+        self._database = database
+        self._collection = collection
+
+    def to_proto(self) -> DataSourceProto.CustomSourceOptions:
+        """Serialize database and collection names as JSON into a CustomSourceOptions proto."""
+        return DataSourceProto.CustomSourceOptions(
+            configuration=json.dumps(
+                {"database": self._database, "collection": self._collection}
+            ).encode()
+        )
+
+    @classmethod
+    def from_proto(
+        cls, options_proto: DataSourceProto.CustomSourceOptions
+    ) -> "MongoDBOptions":
+        """Deserialize a CustomSourceOptions proto back into a MongoDBOptions instance."""
+        config = json.loads(options_proto.configuration.decode("utf8"))
+        return cls(database=config["database"], collection=config["collection"])
+
+
+class MongoDBSource(DataSource):
+    """A MongoDB collection used as a Feast offline data source.
+
+    ``name`` is the logical Feast name for this source. If omitted, it defaults
+    to the value of ``collection``.  At least one of ``name`` or ``collection``
+    must be supplied.
+
+    ``database`` is the MongoDB database that contains the collection.  When
+    omitted it falls back to ``MongoDBOfflineStoreConfig.database`` at query
+    time, so a single store-level default can be shared across many sources.
+
+    ``schema_sample_size`` controls how many documents are randomly sampled
+    when Feast infers the collection schema (used by ``feast apply`` and
+    ``get_table_column_names_and_types``).  Increase it for collections with
+    highly variable document shapes; decrease it to speed up ``feast apply``
+    at the cost of schema coverage.
+    """
+
+    def source_type(self) -> DataSourceProto.SourceType.ValueType:
+        return DataSourceProto.CUSTOM_SOURCE
+
+    def __init__(
+        self,
+        name: Optional[str] = None,
+        database: Optional[str] = None,
+        collection: Optional[str] = None,
+        timestamp_field: Optional[str] = "",
+        created_timestamp_column: Optional[str] = "",
+        field_mapping: Optional[Dict[str, str]] = None,
+        description: Optional[str] = "",
+        tags: Optional[Dict[str, str]] = None,
+        owner: Optional[str] = "",
+        schema_sample_size: int = 100,
+    ):
+        if name is None and collection is None:
+            raise DataSourceNoNameException()
+        # At least one of name / collection is non-None; cast to satisfy the type checker.
+        name = cast(str, name or collection)
+
+        self._mongodb_options = MongoDBOptions(
+            database=database or "",
+            collection=collection or name,
+        )
+        self._schema_sample_size = schema_sample_size
+
+        super().__init__(
+            name=name,
+            timestamp_field=timestamp_field,
+            created_timestamp_column=created_timestamp_column,
+            field_mapping=field_mapping,
+            description=description,
+            tags=tags,
+            owner=owner,
+        )
+
+    def __hash__(self):
+        return super().__hash__()
+
+    def __eq__(self, other):
+        if not isinstance(other, MongoDBSource):
+            raise TypeError(
+                "Comparisons should only involve MongoDBSource class objects."
+            )
+        return (
+            super().__eq__(other)
+            and self._mongodb_options._database == other._mongodb_options._database
+            and self._mongodb_options._collection == other._mongodb_options._collection
+            and self.timestamp_field == other.timestamp_field
+            and self.created_timestamp_column == other.created_timestamp_column
+            and self.field_mapping == other.field_mapping
+        )
+
+    @property
+    def database(self) -> str:
+        return self._mongodb_options._database
+
+    @property
+    def collection(self) -> str:
+        return self._mongodb_options._collection
+
+    @staticmethod
+    def from_proto(data_source: DataSourceProto) -> "MongoDBSource":
+        assert data_source.HasField("custom_options")
+        options = json.loads(data_source.custom_options.configuration)
+        return MongoDBSource(
+            name=data_source.name,
+            database=options["database"],
+            collection=options["collection"],
+            field_mapping=dict(data_source.field_mapping),
+            timestamp_field=data_source.timestamp_field,
+            created_timestamp_column=data_source.created_timestamp_column,
+            description=data_source.description,
+            tags=dict(data_source.tags),
+            owner=data_source.owner,
+        )
+
+    def _to_proto_impl(self) -> DataSourceProto:
+        data_source_proto = DataSourceProto(
+            name=self.name,
+            type=DataSourceProto.CUSTOM_SOURCE,
+            data_source_class_type="feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb.MongoDBSource",
+            field_mapping=self.field_mapping,
+            custom_options=self._mongodb_options.to_proto(),
+            description=self.description,
+            tags=self.tags,
+            owner=self.owner,
+        )
+        data_source_proto.timestamp_field = self.timestamp_field
+        data_source_proto.created_timestamp_column = self.created_timestamp_column
+        return data_source_proto
+
+    def validate(self, config: RepoConfig):
+        # No upfront schema validation is required for MongoDB; the connection
+        # is exercised lazily when features are actually retrieved.
+        pass
+
+    @staticmethod
+    def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]:
+        return mongodb_to_feast_value_type
+
+    def get_table_query_string(self) -> str:
+        return f"{self._mongodb_options._database}.{self._mongodb_options._collection}"
+
+    def get_table_column_names_and_types(
+        self, config: RepoConfig
+    ) -> Iterable[Tuple[str, str]]:
+        """Sample documents from the collection to infer field names and their Feast type strings.
+
+        Uses ``$sample`` to fetch up to ``schema_sample_size`` documents, then
+        picks the most-frequent Python type observed per field.  The ``_id``
+        field is always excluded.
+        """
+        if MongoClient is None:
+            raise FeastExtrasDependencyImportError(
+                "mongodb", "pymongo is not installed."
+            )
+        connection_string = config.offline_store.connection_string
+        db_name = self.database or config.offline_store.database
+        client: Any = MongoClient(connection_string, tz_aware=True)
+        try:
+            docs = list(
+                client[db_name][self.collection].aggregate(
+                    [{"$sample": {"size": self._schema_sample_size}}]
+                )
+            )
+        finally:
+            client.close()
+
+        field_type_counts: Dict[str, Dict[str, int]] = {}
+        for doc in docs:
+            for field, value in doc.items():
+                if field == "_id":
+                    continue
+                type_str = _infer_python_type_str(value)
+                if type_str is None:
+                    continue
+                field_type_counts.setdefault(field, {})
+                field_type_counts[field][type_str] = (
+                    field_type_counts[field].get(type_str, 0) + 1
+                )
+
+        return [
+            (field, max(counts, key=lambda t: counts[t]))
+            for field, counts in field_type_counts.items()
+        ]
+
+
+class SavedDatasetMongoDBStorage(SavedDatasetStorage):
+    """Persists a Feast SavedDataset into a MongoDB collection."""
+
+    _proto_attr_name = "custom_storage"
+
+    mongodb_options: MongoDBOptions
+
+    def __init__(self, database: str, collection: str):
+        self.mongodb_options = MongoDBOptions(
+            database=database,
+            collection=collection,
+        )
+
+    @staticmethod
+    def from_proto(
+        storage_proto: SavedDatasetStorageProto,
+    ) -> "SavedDatasetMongoDBStorage":
+        options = json.loads(storage_proto.custom_storage.configuration)
+        return SavedDatasetMongoDBStorage(
+            database=options["database"],
+            collection=options["collection"],
+        )
+
+    def to_proto(self) -> SavedDatasetStorageProto:
+        return SavedDatasetStorageProto(custom_storage=self.mongodb_options.to_proto())
+
+    def to_data_source(self) -> DataSource:
+        return MongoDBSource(
+            database=self.mongodb_options._database,
+            collection=self.mongodb_options._collection,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Offline store configuration and implementation
+# ---------------------------------------------------------------------------
 
 
 class MongoDBOfflineStoreIbisConfig(FeastConfigBaseModel):
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py
deleted file mode 100644
index ee55fe24e6..0000000000
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_source.py
+++ /dev/null
@@ -1,283 +0,0 @@
-# Copyright 2026 The Feast Authors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-from datetime import datetime
-from typing import Any, Callable, Dict, Iterable, Optional, Tuple, cast
-
-try:
-    from pymongo import MongoClient
-except ImportError:
-    MongoClient = None  # type: ignore[assignment,misc]
-
-from feast.data_source import DataSource
-from feast.errors import DataSourceNoNameException, FeastExtrasDependencyImportError
-from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
-from feast.protos.feast.core.SavedDataset_pb2 import (
-    SavedDatasetStorage as SavedDatasetStorageProto,
-)
-from feast.repo_config import RepoConfig
-from feast.saved_dataset import SavedDatasetStorage
-from feast.type_map import mongodb_to_feast_value_type
-from feast.value_type import ValueType
-
-
-def _infer_python_type_str(value: Any) -> Optional[str]:
-    """Infer a Feast-compatible type string from a Python value returned by pymongo."""
-    if value is None:
-        return None
-    if isinstance(value, bool):
-        return "bool"
-    if isinstance(value, int):
-        return "int"
-    if isinstance(value, float):
-        return "float"
-    if isinstance(value, str):
-        return "str"
-    if isinstance(value, bytes):
-        return "bytes"
-    if isinstance(value, datetime):
-        return "datetime"
-    if isinstance(value, list):
-        if not value:
-            return "list[str]"
-        elem_type = _infer_python_type_str(value[0])
-        if elem_type:
-            return f"list[{elem_type}]"
-        return "list[str]"
-    return None
-
-
-class MongoDBOptions:
-    """Options for a MongoDB data source (database + collection)."""
-
-    def __init__(self, database: str, collection: str):
-        self._database = database
-        self._collection = collection
-
-    def to_proto(self) -> DataSourceProto.CustomSourceOptions:
-        """Serialize database and collection names as JSON into a CustomSourceOptions proto."""
-        return DataSourceProto.CustomSourceOptions(
-            configuration=json.dumps(
-                {"database": self._database, "collection": self._collection}
-            ).encode()
-        )
-
-    @classmethod
-    def from_proto(
-        cls, options_proto: DataSourceProto.CustomSourceOptions
-    ) -> "MongoDBOptions":
-        """Deserialize a CustomSourceOptions proto back into a MongoDBOptions instance."""
-        config = json.loads(options_proto.configuration.decode("utf8"))
-        return cls(database=config["database"], collection=config["collection"])
-
-
-class MongoDBSource(DataSource):
-    """A MongoDB collection used as a Feast offline data source.
-
-    ``name`` is the logical Feast name for this source. If omitted, it defaults
-    to the value of ``collection``.  At least one of ``name`` or ``collection``
-    must be supplied.
-
-    ``database`` is the MongoDB database that contains the collection.  When
-    omitted it falls back to ``MongoDBOfflineStoreConfig.database`` at query
-    time, so a single store-level default can be shared across many sources.
-
-    ``schema_sample_size`` controls how many documents are randomly sampled
-    when Feast infers the collection schema (used by ``feast apply`` and
-    ``get_table_column_names_and_types``).  Increase it for collections with
-    highly variable document shapes; decrease it to speed up ``feast apply``
-    at the cost of schema coverage.
-    """
-
-    def source_type(self) -> DataSourceProto.SourceType.ValueType:
-        return DataSourceProto.CUSTOM_SOURCE
-
-    def __init__(
-        self,
-        name: Optional[str] = None,
-        database: Optional[str] = None,
-        collection: Optional[str] = None,
-        timestamp_field: Optional[str] = "",
-        created_timestamp_column: Optional[str] = "",
-        field_mapping: Optional[Dict[str, str]] = None,
-        description: Optional[str] = "",
-        tags: Optional[Dict[str, str]] = None,
-        owner: Optional[str] = "",
-        schema_sample_size: int = 100,
-    ):
-        if name is None and collection is None:
-            raise DataSourceNoNameException()
-        # At least one of name / collection is non-None; cast to satisfy the type checker.
-        name = cast(str, name or collection)
-
-        self._mongodb_options = MongoDBOptions(
-            database=database or "",
-            collection=collection or name,
-        )
-        self._schema_sample_size = schema_sample_size
-
-        super().__init__(
-            name=name,
-            timestamp_field=timestamp_field,
-            created_timestamp_column=created_timestamp_column,
-            field_mapping=field_mapping,
-            description=description,
-            tags=tags,
-            owner=owner,
-        )
-
-    def __hash__(self):
-        return super().__hash__()
-
-    def __eq__(self, other):
-        if not isinstance(other, MongoDBSource):
-            raise TypeError(
-                "Comparisons should only involve MongoDBSource class objects."
-            )
-        return (
-            super().__eq__(other)
-            and self._mongodb_options._database == other._mongodb_options._database
-            and self._mongodb_options._collection == other._mongodb_options._collection
-            and self.timestamp_field == other.timestamp_field
-            and self.created_timestamp_column == other.created_timestamp_column
-            and self.field_mapping == other.field_mapping
-        )
-
-    @property
-    def database(self) -> str:
-        return self._mongodb_options._database
-
-    @property
-    def collection(self) -> str:
-        return self._mongodb_options._collection
-
-    @staticmethod
-    def from_proto(data_source: DataSourceProto) -> "MongoDBSource":
-        assert data_source.HasField("custom_options")
-        options = json.loads(data_source.custom_options.configuration)
-        return MongoDBSource(
-            name=data_source.name,
-            database=options["database"],
-            collection=options["collection"],
-            field_mapping=dict(data_source.field_mapping),
-            timestamp_field=data_source.timestamp_field,
-            created_timestamp_column=data_source.created_timestamp_column,
-            description=data_source.description,
-            tags=dict(data_source.tags),
-            owner=data_source.owner,
-        )
-
-    def _to_proto_impl(self) -> DataSourceProto:
-        data_source_proto = DataSourceProto(
-            name=self.name,
-            type=DataSourceProto.CUSTOM_SOURCE,
-            data_source_class_type="feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_source.MongoDBSource",
-            field_mapping=self.field_mapping,
-            custom_options=self._mongodb_options.to_proto(),
-            description=self.description,
-            tags=self.tags,
-            owner=self.owner,
-        )
-        data_source_proto.timestamp_field = self.timestamp_field
-        data_source_proto.created_timestamp_column = self.created_timestamp_column
-        return data_source_proto
-
-    def validate(self, config: RepoConfig):
-        # No upfront schema validation is required for MongoDB; the connection
-        # is exercised lazily when features are actually retrieved.
-        pass
-
-    @staticmethod
-    def source_datatype_to_feast_value_type() -> Callable[[str], ValueType]:
-        return mongodb_to_feast_value_type
-
-    def get_table_query_string(self) -> str:
-        return f"{self._mongodb_options._database}.{self._mongodb_options._collection}"
-
-    def get_table_column_names_and_types(
-        self, config: RepoConfig
-    ) -> Iterable[Tuple[str, str]]:
-        """Sample documents from the collection to infer field names and their Feast type strings.
-
-        Uses ``$sample`` to fetch up to ``schema_sample_size`` documents, then
-        picks the most-frequent Python type observed per field.  The ``_id``
-        field is always excluded.
-        """
-        if MongoClient is None:
-            raise FeastExtrasDependencyImportError(
-                "mongodb", "pymongo is not installed."
-            )
-        connection_string = config.offline_store.connection_string
-        db_name = self.database or config.offline_store.database
-        client: Any = MongoClient(connection_string, tz_aware=True)
-        try:
-            docs = list(
-                client[db_name][self.collection].aggregate(
-                    [{"$sample": {"size": self._schema_sample_size}}]
-                )
-            )
-        finally:
-            client.close()
-
-        field_type_counts: Dict[str, Dict[str, int]] = {}
-        for doc in docs:
-            for field, value in doc.items():
-                if field == "_id":
-                    continue
-                type_str = _infer_python_type_str(value)
-                if type_str is None:
-                    continue
-                field_type_counts.setdefault(field, {})
-                field_type_counts[field][type_str] = (
-                    field_type_counts[field].get(type_str, 0) + 1
-                )
-
-        return [
-            (field, max(counts, key=lambda t: counts[t]))
-            for field, counts in field_type_counts.items()
-        ]
-
-
-class SavedDatasetMongoDBStorage(SavedDatasetStorage):
-    """Persists a Feast SavedDataset into a MongoDB collection."""
-
-    _proto_attr_name = "custom_storage"
-
-    mongodb_options: MongoDBOptions
-
-    def __init__(self, database: str, collection: str):
-        self.mongodb_options = MongoDBOptions(
-            database=database,
-            collection=collection,
-        )
-
-    @staticmethod
-    def from_proto(
-        storage_proto: SavedDatasetStorageProto,
-    ) -> "SavedDatasetMongoDBStorage":
-        options = json.loads(storage_proto.custom_storage.configuration)
-        return SavedDatasetMongoDBStorage(
-            database=options["database"],
-            collection=options["collection"],
-        )
-
-    def to_proto(self) -> SavedDatasetStorageProto:
-        return SavedDatasetStorageProto(custom_storage=self.mongodb_options.to_proto())
-
-    def to_data_source(self) -> DataSource:
-        return MongoDBSource(
-            database=self.mongodb_options._database,
-            collection=self.mongodb_options._collection,
-        )
diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py b/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py
index 177023dd6f..27c6a6a35a 100644
--- a/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py
@@ -37,15 +37,13 @@
 from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb import (
     MongoDBOfflineStoreIbis,
     MongoDBOfflineStoreIbisConfig,
+    MongoDBSource,
 )
 from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_native import (
     MongoDBOfflineStoreNative,
     MongoDBOfflineStoreNativeConfig,
     MongoDBSourceNative,
 )
-from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_source import (
-    MongoDBSource,
-)
 from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
 from feast.protos.feast.types.Value_pb2 import Value as ValueProto
 from feast.repo_config import RepoConfig
diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py b/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
index 225d18d3e9..3acd93c288 100644
--- a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
@@ -22,8 +22,6 @@
 from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb import (
     MongoDBOfflineStoreIbis,
     MongoDBOfflineStoreIbisConfig,
-)
-from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_source import (
     MongoDBSource,
 )
 from feast.repo_config import RepoConfig

From 9bd0c1a36a9411df07bda47c799495cc0b471f64 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Thu, 19 Mar 2026 14:27:34 -0400
Subject: [PATCH 19/30] Rename mongodb_offline_store to mongodb, use One/Many
 naming convention
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename module: mongodb_offline_store/ → mongodb/
- Rename files: mongodb.py → mongodb_many.py, mongodb_native.py → mongodb_one.py

Class renames:
- MongoDBSource → MongoDBSourceMany
- MongoDBOptions → MongoDBOptionsMany
- SavedDatasetMongoDBStorage → SavedDatasetMongoDBStorageMany
- MongoDBOfflineStoreIbis → MongoDBOfflineStoreMany
- MongoDBOfflineStoreIbisConfig → MongoDBOfflineStoreManyConfig
- MongoDBSourceNative → MongoDBSourceOne
- MongoDBOfflineStoreNative → MongoDBOfflineStoreOne
- MongoDBOfflineStoreNativeConfig → MongoDBOfflineStoreOneConfig
- MongoDBNativeRetrievalJob → MongoDBOneRetrievalJob

The One/Many naming reflects the core architectural difference:
- One: Single shared collection for all FeatureViews
- Many: One collection per FeatureView

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../__init__.py                               |  0
 .../mongodb.py => mongodb/mongodb_many.py}    | 72 ++++++++++---------
 .../mongodb_one.py}                           | 56 ++++++++-------
 .../benchmark_mongodb_offline_stores.py       | 40 +++++------
 .../contrib/test_mongodb_offline_retrieval.py | 46 ++++++------
 .../test_mongodb_offline_retrieval_native.py  | 44 ++++++------
 6 files changed, 132 insertions(+), 126 deletions(-)
 rename sdk/python/feast/infra/offline_stores/contrib/{mongodb_offline_store => mongodb}/__init__.py (100%)
 rename sdk/python/feast/infra/offline_stores/contrib/{mongodb_offline_store/mongodb.py => mongodb/mongodb_many.py} (89%)
 rename sdk/python/feast/infra/offline_stores/contrib/{mongodb_offline_store/mongodb_native.py => mongodb/mongodb_one.py} (94%)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/__init__.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb/__init__.py
similarity index 100%
rename from sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/__init__.py
rename to sdk/python/feast/infra/offline_stores/contrib/mongodb/__init__.py
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb/mongodb_many.py
similarity index 89%
rename from sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
rename to sdk/python/feast/infra/offline_stores/contrib/mongodb/mongodb_many.py
index 241e69cbb4..7dac38af02 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb/mongodb_many.py
@@ -34,7 +34,7 @@
     SavedDatasetLocationAlreadyExists,
 )
 from feast.feature_view import FeatureView
-from feast.infra.offline_stores.contrib.mongodb_offline_store import DRIVER_METADATA
+from feast.infra.offline_stores.contrib.mongodb import DRIVER_METADATA
 from feast.infra.offline_stores.ibis import (
     get_historical_features_ibis,
     pull_all_from_table_or_query_ibis,
@@ -86,11 +86,11 @@ def _infer_python_type_str(value: Any) -> Optional[str]:
 
 
 # ---------------------------------------------------------------------------
-# MongoDBSource and related classes (collection-per-FeatureView schema)
+# MongoDBSourceMany and related classes (one collection per FeatureView)
 # ---------------------------------------------------------------------------
 
 
-class MongoDBOptions:
+class MongoDBOptionsMany:
     """Options for a MongoDB data source (database + collection)."""
 
     def __init__(self, database: str, collection: str):
@@ -108,21 +108,21 @@ def to_proto(self) -> DataSourceProto.CustomSourceOptions:
     @classmethod
     def from_proto(
         cls, options_proto: DataSourceProto.CustomSourceOptions
-    ) -> "MongoDBOptions":
-        """Deserialize a CustomSourceOptions proto back into a MongoDBOptions instance."""
+    ) -> "MongoDBOptionsMany":
+        """Deserialize a CustomSourceOptions proto back into a MongoDBOptionsMany instance."""
         config = json.loads(options_proto.configuration.decode("utf8"))
         return cls(database=config["database"], collection=config["collection"])
 
 
-class MongoDBSource(DataSource):
-    """A MongoDB collection used as a Feast offline data source.
+class MongoDBSourceMany(DataSource):
+    """A MongoDB collection used as a Feast offline data source (one collection per FeatureView).
 
     ``name`` is the logical Feast name for this source. If omitted, it defaults
     to the value of ``collection``.  At least one of ``name`` or ``collection``
     must be supplied.
 
     ``database`` is the MongoDB database that contains the collection.  When
-    omitted it falls back to ``MongoDBOfflineStoreConfig.database`` at query
+    omitted it falls back to ``MongoDBOfflineStoreManyConfig.database`` at query
     time, so a single store-level default can be shared across many sources.
 
     ``schema_sample_size`` controls how many documents are randomly sampled
@@ -153,7 +153,7 @@ def __init__(
         # At least one of name / collection is non-None; cast to satisfy the type checker.
         name = cast(str, name or collection)
 
-        self._mongodb_options = MongoDBOptions(
+        self._mongodb_options = MongoDBOptionsMany(
             database=database or "",
             collection=collection or name,
         )
@@ -173,9 +173,9 @@ def __hash__(self):
         return super().__hash__()
 
     def __eq__(self, other):
-        if not isinstance(other, MongoDBSource):
+        if not isinstance(other, MongoDBSourceMany):
             raise TypeError(
-                "Comparisons should only involve MongoDBSource class objects."
+                "Comparisons should only involve MongoDBSourceMany class objects."
             )
         return (
             super().__eq__(other)
@@ -195,10 +195,10 @@ def collection(self) -> str:
         return self._mongodb_options._collection
 
     @staticmethod
-    def from_proto(data_source: DataSourceProto) -> "MongoDBSource":
+    def from_proto(data_source: DataSourceProto) -> "MongoDBSourceMany":
         assert data_source.HasField("custom_options")
         options = json.loads(data_source.custom_options.configuration)
-        return MongoDBSource(
+        return MongoDBSourceMany(
             name=data_source.name,
             database=options["database"],
             collection=options["collection"],
@@ -214,7 +214,7 @@ def _to_proto_impl(self) -> DataSourceProto:
         data_source_proto = DataSourceProto(
             name=self.name,
             type=DataSourceProto.CUSTOM_SOURCE,
-            data_source_class_type="feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb.MongoDBSource",
+            data_source_class_type="feast.infra.offline_stores.contrib.mongodb.mongodb_many.MongoDBSourceMany",
             field_mapping=self.field_mapping,
             custom_options=self._mongodb_options.to_proto(),
             description=self.description,
@@ -281,15 +281,15 @@ def get_table_column_names_and_types(
         ]
 
 
-class SavedDatasetMongoDBStorage(SavedDatasetStorage):
-    """Persists a Feast SavedDataset into a MongoDB collection."""
+class SavedDatasetMongoDBStorageMany(SavedDatasetStorage):
+    """Persists a Feast SavedDataset into a MongoDB collection (many-collection schema)."""
 
     _proto_attr_name = "custom_storage"
 
-    mongodb_options: MongoDBOptions
+    mongodb_options: MongoDBOptionsMany
 
     def __init__(self, database: str, collection: str):
-        self.mongodb_options = MongoDBOptions(
+        self.mongodb_options = MongoDBOptionsMany(
             database=database,
             collection=collection,
         )
@@ -297,9 +297,9 @@ def __init__(self, database: str, collection: str):
     @staticmethod
     def from_proto(
         storage_proto: SavedDatasetStorageProto,
-    ) -> "SavedDatasetMongoDBStorage":
+    ) -> "SavedDatasetMongoDBStorageMany":
         options = json.loads(storage_proto.custom_storage.configuration)
-        return SavedDatasetMongoDBStorage(
+        return SavedDatasetMongoDBStorageMany(
             database=options["database"],
             collection=options["collection"],
         )
@@ -308,7 +308,7 @@ def to_proto(self) -> SavedDatasetStorageProto:
         return SavedDatasetStorageProto(custom_storage=self.mongodb_options.to_proto())
 
     def to_data_source(self) -> DataSource:
-        return MongoDBSource(
+        return MongoDBSourceMany(
             database=self.mongodb_options._database,
             collection=self.mongodb_options._collection,
         )
@@ -319,10 +319,10 @@ def to_data_source(self) -> DataSource:
 # ---------------------------------------------------------------------------
 
 
-class MongoDBOfflineStoreIbisConfig(FeastConfigBaseModel):
-    """Configuration for the MongoDB Ibis-backed offline store."""
+class MongoDBOfflineStoreManyConfig(FeastConfigBaseModel):
+    """Configuration for the MongoDB offline store (one collection per FeatureView)."""
 
-    type: StrictStr = "feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb.MongoDBOfflineStoreIbis"
+    type: StrictStr = "feast.infra.offline_stores.contrib.mongodb.mongodb_many.MongoDBOfflineStoreMany"
     """Offline store type selector"""
 
     connection_string: StrictStr = "mongodb://localhost:27017"
@@ -332,8 +332,12 @@ class MongoDBOfflineStoreIbisConfig(FeastConfigBaseModel):
     """Default MongoDB database name"""
 
 
-class MongoDBOfflineStoreIbis(OfflineStore):
-    """Offline store backed by MongoDB, using Ibis for point-in-time joins."""
+class MongoDBOfflineStoreMany(OfflineStore):
+    """Offline store backed by MongoDB (one collection per FeatureView).
+
+    Uses Ibis memtables for point-in-time joins. Each FeatureView's data is stored
+    in a separate MongoDB collection, with the collection name matching the source name.
+    """
 
     @staticmethod
     def pull_latest_from_table_or_query(
@@ -346,9 +350,9 @@ def pull_latest_from_table_or_query(
         start_date: datetime,
         end_date: datetime,
     ) -> RetrievalJob:
-        if not isinstance(data_source, MongoDBSource):
+        if not isinstance(data_source, MongoDBSourceMany):
             raise ValueError(
-                f"MongoDBOfflineStore expected a MongoDBSource, "
+                f"MongoDBOfflineStoreMany expected a MongoDBSourceMany, "
                 f"got {type(data_source).__name__!r}."
             )
         warnings.warn(
@@ -405,9 +409,9 @@ def pull_all_from_table_or_query(
         start_date: Optional[datetime] = None,
         end_date: Optional[datetime] = None,
     ) -> RetrievalJob:
-        if not isinstance(data_source, MongoDBSource):
+        if not isinstance(data_source, MongoDBSourceMany):
             raise ValueError(
-                f"MongoDBOfflineStore expected a MongoDBSource, "
+                f"MongoDBOfflineStoreMany expected a MongoDBSourceMany, "
                 f"got {type(data_source).__name__!r}."
             )
         warnings.warn(
@@ -436,9 +440,9 @@ def reader(data_source: DataSource, repo_path: str) -> Table:
             raise FeastExtrasDependencyImportError(
                 "mongodb", "pymongo is not installed."
             )
-        if not isinstance(data_source, MongoDBSource):
+        if not isinstance(data_source, MongoDBSourceMany):
             raise ValueError(
-                f"MongoDBOfflineStore reader expected a MongoDBSource, "
+                f"MongoDBOfflineStoreMany reader expected a MongoDBSourceMany, "
                 f"got {type(data_source).__name__!r}."
             )
         connection_string = config.offline_store.connection_string
@@ -487,9 +491,9 @@ def writer(
             raise FeastExtrasDependencyImportError(
                 "mongodb", "pymongo is not installed."
             )
-        if not isinstance(data_source, MongoDBSource):
+        if not isinstance(data_source, MongoDBSourceMany):
             raise ValueError(
-                f"MongoDBOfflineStore writer expected a MongoDBSource, "
+                f"MongoDBOfflineStoreMany writer expected a MongoDBSourceMany, "
                 f"got {type(data_source).__name__!r}."
             )
         connection_string = config.offline_store.connection_string
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb/mongodb_one.py
similarity index 94%
rename from sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
rename to sdk/python/feast/infra/offline_stores/contrib/mongodb/mongodb_one.py
index aa0c88f033..293b785c86 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_native.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb/mongodb_one.py
@@ -109,7 +109,7 @@
 from feast.errors import DataSourceNoNameException, FeastExtrasDependencyImportError
 from feast.feature_view import FeatureView
 from feast.infra.key_encoding_utils import serialize_entity_key
-from feast.infra.offline_stores.contrib.mongodb_offline_store import DRIVER_METADATA
+from feast.infra.offline_stores.contrib.mongodb import DRIVER_METADATA
 from feast.infra.offline_stores.offline_store import (
     OfflineStore,
     RetrievalJob,
@@ -127,10 +127,12 @@
 from feast.value_type import ValueType
 
 
-class MongoDBOfflineStoreNativeConfig(FeastConfigBaseModel):
-    """Configuration for the Native MongoDB offline store."""
+class MongoDBOfflineStoreOneConfig(FeastConfigBaseModel):
+    """Configuration for the MongoDB offline store (single shared collection)."""
 
-    type: StrictStr = "feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_native.MongoDBOfflineStoreNative"
+    type: StrictStr = (
+        "feast.infra.offline_stores.contrib.mongodb.mongodb_one.MongoDBOfflineStoreOne"
+    )
     """Offline store type selector"""
 
     connection_string: StrictStr = "mongodb://localhost:27017"
@@ -143,12 +145,12 @@ class MongoDBOfflineStoreNativeConfig(FeastConfigBaseModel):
     """Single collection name for all feature views"""
 
 
-class MongoDBSourceNative(DataSource):
-    """A MongoDB data source for the native offline store.
+class MongoDBSourceOne(DataSource):
+    """A MongoDB data source for the single-collection offline store.
 
-    Unlike many data source implementations, this source does not map each
-    FeatureView to its own table or collection. Instead, all FeatureViews
-    share a single MongoDB collection (configured at the store level).
+    Unlike MongoDBSourceMany, this source does not map each FeatureView to
+    its own collection. Instead, all FeatureViews share a single MongoDB
+    collection (configured at the store level).
 
     Each document in that collection includes a ``feature_view`` field that
     identifies which FeatureView it belongs to. The ``name`` of this data
@@ -183,9 +185,9 @@ def __hash__(self):
         return super().__hash__()
 
     def __eq__(self, other):
-        if not isinstance(other, MongoDBSourceNative):
+        if not isinstance(other, MongoDBSourceOne):
             raise TypeError(
-                "Comparisons should only involve MongoDBSourceNative class objects."
+                "Comparisons should only involve MongoDBSourceOne class objects."
             )
         return (
             super().__eq__(other)
@@ -203,9 +205,9 @@ def source_type(self) -> DataSourceProto.SourceType.ValueType:
         return DataSourceProto.CUSTOM_SOURCE
 
     @staticmethod
-    def from_proto(data_source: DataSourceProto) -> "MongoDBSourceNative":
+    def from_proto(data_source: DataSourceProto) -> "MongoDBSourceOne":
         assert data_source.HasField("custom_options")
-        return MongoDBSourceNative(
+        return MongoDBSourceOne(
             name=data_source.name,
             timestamp_field=data_source.timestamp_field,
             created_timestamp_column=data_source.created_timestamp_column,
@@ -219,7 +221,7 @@ def _to_proto_impl(self) -> DataSourceProto:
         return DataSourceProto(
             name=self.name,
             type=DataSourceProto.CUSTOM_SOURCE,
-            data_source_class_type="feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_native.MongoDBSourceNative",
+            data_source_class_type="feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_native.MongoDBSourceOne",
             field_mapping=self.field_mapping,
             custom_options=DataSourceProto.CustomSourceOptions(
                 configuration=json.dumps({"feature_view": self.name}).encode()
@@ -320,7 +322,7 @@ def _fetch_documents(
     return list(client[database][collection].aggregate(pipeline))
 
 
-class MongoDBNativeRetrievalJob(RetrievalJob):
+class MongoDBOneRetrievalJob(RetrievalJob):
     """Retrieval job for native MongoDB offline store queries."""
 
     def __init__(
@@ -384,7 +386,7 @@ def _serialize_entity_key_from_row(
     return serialize_entity_key(entity_key, entity_key_serialization_version)
 
 
-class MongoDBOfflineStoreNative(OfflineStore):
+class MongoDBOfflineStoreOne(OfflineStore):
     """Native MongoDB offline store using single-collection schema.
 
     All feature views share one collection (``feature_history``), with documents
@@ -452,9 +454,9 @@ def pull_latest_from_table_or_query(
         start_date: datetime,
         end_date: datetime,
     ) -> RetrievalJob:
-        if not isinstance(data_source, MongoDBSourceNative):
+        if not isinstance(data_source, MongoDBSourceOne):
             raise ValueError(
-                f"MongoDBOfflineStoreNative expected MongoDBSourceNative, "
+                f"MongoDBOfflineStoreOne expected MongoDBSourceOne, "
                 f"got {type(data_source).__name__!r}."
             )
         warnings.warn(
@@ -499,7 +501,7 @@ def pull_latest_from_table_or_query(
         ]
 
         def _run() -> pyarrow.Table:
-            client = MongoDBOfflineStoreNative._get_client_and_ensure_indexes(config)
+            client = MongoDBOfflineStoreOne._get_client_and_ensure_indexes(config)
             try:
                 docs = _fetch_documents(client, db_name, collection, pipeline)
                 if not docs:
@@ -515,7 +517,7 @@ def _run() -> pyarrow.Table:
             finally:
                 client.close()
 
-        return MongoDBNativeRetrievalJob(query_fn=_run, full_feature_names=False)
+        return MongoDBOneRetrievalJob(query_fn=_run, full_feature_names=False)
 
     @staticmethod
     def pull_all_from_table_or_query(
@@ -528,9 +530,9 @@ def pull_all_from_table_or_query(
         start_date: Optional[datetime] = None,
         end_date: Optional[datetime] = None,
     ) -> RetrievalJob:
-        if not isinstance(data_source, MongoDBSourceNative):
+        if not isinstance(data_source, MongoDBSourceOne):
             raise ValueError(
-                f"MongoDBOfflineStoreNative expected MongoDBSourceNative, "
+                f"MongoDBOfflineStoreOne expected MongoDBSourceOne, "
                 f"got {type(data_source).__name__!r}."
             )
         warnings.warn(
@@ -571,7 +573,7 @@ def pull_all_from_table_or_query(
         ]
 
         def _run() -> pyarrow.Table:
-            client = MongoDBOfflineStoreNative._get_client_and_ensure_indexes(config)
+            client = MongoDBOfflineStoreOne._get_client_and_ensure_indexes(config)
             try:
                 docs = _fetch_documents(client, db_name, collection, pipeline)
                 if not docs:
@@ -587,7 +589,7 @@ def _run() -> pyarrow.Table:
             finally:
                 client.close()
 
-        return MongoDBNativeRetrievalJob(query_fn=_run, full_feature_names=False)
+        return MongoDBOneRetrievalJob(query_fn=_run, full_feature_names=False)
 
     @staticmethod
     def get_historical_features(
@@ -610,7 +612,7 @@ def get_historical_features(
         """
         if isinstance(entity_df, str):
             raise ValueError(
-                "MongoDBOfflineStoreNative does not support SQL entity_df strings. "
+                "MongoDBOfflineStoreOne does not support SQL entity_df strings. "
                 "Pass a pandas DataFrame instead."
             )
         warnings.warn(
@@ -777,7 +779,7 @@ def _run() -> pyarrow.Table:
             working_df["_row_idx"] = range(len(working_df))
 
             # Create client once for all chunks
-            client = MongoDBOfflineStoreNative._get_client_and_ensure_indexes(config)
+            client = MongoDBOfflineStoreOne._get_client_and_ensure_indexes(config)
             try:
                 coll = client[db_name][feature_collection]
 
@@ -807,7 +809,7 @@ def _run() -> pyarrow.Table:
 
             return pyarrow.Table.from_pandas(result_df, preserve_index=False)
 
-        return MongoDBNativeRetrievalJob(
+        return MongoDBOneRetrievalJob(
             query_fn=_run,
             full_feature_names=full_feature_names,
         )
diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py b/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py
index 27c6a6a35a..3b663b150c 100644
--- a/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py
@@ -34,15 +34,15 @@
 
 from feast import Entity, FeatureView, Field
 from feast.infra.key_encoding_utils import serialize_entity_key
-from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb import (
-    MongoDBOfflineStoreIbis,
-    MongoDBOfflineStoreIbisConfig,
-    MongoDBSource,
+from feast.infra.offline_stores.contrib.mongodb.mongodb_many import (
+    MongoDBOfflineStoreMany,
+    MongoDBOfflineStoreManyConfig,
+    MongoDBSourceMany,
 )
-from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_native import (
-    MongoDBOfflineStoreNative,
-    MongoDBOfflineStoreNativeConfig,
-    MongoDBSourceNative,
+from feast.infra.offline_stores.contrib.mongodb.mongodb_one import (
+    MongoDBOfflineStoreOne,
+    MongoDBOfflineStoreOneConfig,
+    MongoDBSourceOne,
 )
 from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
 from feast.protos.feast.types.Value_pb2 import Value as ValueProto
@@ -149,7 +149,7 @@ def ibis_config(mongodb_connection_string: str) -> RepoConfig:
         project="benchmark",
         registry="memory://",
         provider="local",
-        offline_store=MongoDBOfflineStoreIbisConfig(
+        offline_store=MongoDBOfflineStoreManyConfig(
             connection_string=mongodb_connection_string,
             database="benchmark_db",
         ),
@@ -165,7 +165,7 @@ def native_config(mongodb_connection_string: str) -> RepoConfig:
         project="benchmark",
         registry="memory://",
         provider="local",
-        offline_store=MongoDBOfflineStoreNativeConfig(
+        offline_store=MongoDBOfflineStoreOneConfig(
             connection_string=mongodb_connection_string,
             database="benchmark_db",
             collection="feature_history",
@@ -241,7 +241,7 @@ def _generate_native_data(
 
 def _create_ibis_fv(num_features: int) -> tuple:
     """Create Ibis source and FeatureView."""
-    source = MongoDBSource(
+    source = MongoDBSourceMany(
         name="driver_benchmark",
         database="benchmark_db",
         collection="driver_benchmark",
@@ -267,7 +267,7 @@ def _create_ibis_fv(num_features: int) -> tuple:
 
 def _create_native_fv(num_features: int) -> tuple:
     """Create Native source and FeatureView."""
-    source = MongoDBSourceNative(
+    source = MongoDBSourceOne(
         name="driver_benchmark",
         timestamp_field="event_timestamp",
     )
@@ -406,7 +406,7 @@ def test_scale_rows_ibis(
         feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
 
         def run_query():
-            job = MongoDBOfflineStoreIbis.get_historical_features(
+            job = MongoDBOfflineStoreMany.get_historical_features(
                 config=ibis_config,
                 feature_views=[fv],
                 feature_refs=feature_refs,
@@ -461,7 +461,7 @@ def test_scale_rows_native(
         feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
 
         def run_query():
-            job = MongoDBOfflineStoreNative.get_historical_features(
+            job = MongoDBOfflineStoreOne.get_historical_features(
                 config=native_config,
                 feature_views=[fv],
                 feature_refs=feature_refs,
@@ -517,7 +517,7 @@ def test_wide_features_ibis(
         feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
 
         def run_query():
-            job = MongoDBOfflineStoreIbis.get_historical_features(
+            job = MongoDBOfflineStoreMany.get_historical_features(
                 config=ibis_config,
                 feature_views=[fv],
                 feature_refs=feature_refs,
@@ -570,7 +570,7 @@ def test_wide_features_native(
         feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
 
         def run_query():
-            job = MongoDBOfflineStoreNative.get_historical_features(
+            job = MongoDBOfflineStoreOne.get_historical_features(
                 config=native_config,
                 feature_views=[fv],
                 feature_refs=feature_refs,
@@ -637,7 +637,7 @@ def test_entity_skew_ibis(
         feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
 
         def run_query():
-            job = MongoDBOfflineStoreIbis.get_historical_features(
+            job = MongoDBOfflineStoreMany.get_historical_features(
                 config=ibis_config,
                 feature_views=[fv],
                 feature_refs=feature_refs,
@@ -703,7 +703,7 @@ def test_entity_skew_native(
         feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
 
         def run_query():
-            job = MongoDBOfflineStoreNative.get_historical_features(
+            job = MongoDBOfflineStoreOne.get_historical_features(
                 config=native_config,
                 feature_views=[fv],
                 feature_refs=feature_refs,
@@ -776,7 +776,7 @@ def test_summary_comparison(
         _, ibis_fv = _create_ibis_fv(num_features)
 
         def run_ibis():
-            job = MongoDBOfflineStoreIbis.get_historical_features(
+            job = MongoDBOfflineStoreMany.get_historical_features(
                 config=ibis_config,
                 feature_views=[ibis_fv],
                 feature_refs=feature_refs,
@@ -793,7 +793,7 @@ def run_ibis():
         _, native_fv = _create_native_fv(num_features)
 
         def run_native():
-            job = MongoDBOfflineStoreNative.get_historical_features(
+            job = MongoDBOfflineStoreOne.get_historical_features(
                 config=native_config,
                 feature_views=[native_fv],
                 feature_refs=feature_refs,
diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py b/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
index 3acd93c288..1c9882900d 100644
--- a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
@@ -19,10 +19,10 @@
 from testcontainers.mongodb import MongoDbContainer
 
 from feast import Entity, FeatureView, Field
-from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb import (
-    MongoDBOfflineStoreIbis,
-    MongoDBOfflineStoreIbisConfig,
-    MongoDBSource,
+from feast.infra.offline_stores.contrib.mongodb.mongodb_many import (
+    MongoDBOfflineStoreMany,
+    MongoDBOfflineStoreManyConfig,
+    MongoDBSourceMany,
 )
 from feast.repo_config import RepoConfig
 from feast.types import Float64, Int64, String
@@ -75,7 +75,7 @@ def repo_config(mongodb_connection_string: str) -> RepoConfig:
         project="test_project",
         registry="memory://",
         provider="local",
-        offline_store=MongoDBOfflineStoreIbisConfig(
+        offline_store=MongoDBOfflineStoreManyConfig(
             connection_string=mongodb_connection_string,
             database="feast_test",
         ),
@@ -129,9 +129,9 @@ def sample_data(mongodb_connection_string: str) -> datetime:
 
 
 @pytest.fixture
-def driver_source() -> MongoDBSource:
-    """Create a MongoDBSource for driver stats."""
-    return MongoDBSource(
+def driver_source() -> MongoDBSourceMany:
+    """Create a MongoDBSourceMany for driver stats."""
+    return MongoDBSourceMany(
         name="driver_stats",
         database="feast_test",
         collection="driver_stats",
@@ -140,7 +140,7 @@ def driver_source() -> MongoDBSource:
 
 
 @pytest.fixture
-def driver_fv(driver_source: MongoDBSource) -> FeatureView:
+def driver_fv(driver_source: MongoDBSourceMany) -> FeatureView:
     """Create a FeatureView for driver stats.
 
     The ttl (time-to-live) parameter defines how far back in time Feast will look
@@ -170,7 +170,7 @@ def driver_fv(driver_source: MongoDBSource) -> FeatureView:
 
 @_requires_docker
 def test_pull_latest_from_table_or_query(
-    repo_config: RepoConfig, sample_data: datetime, driver_source: MongoDBSource
+    repo_config: RepoConfig, sample_data: datetime, driver_source: MongoDBSourceMany
 ) -> None:
     """Test pulling latest features per entity from MongoDB.
 
@@ -180,7 +180,7 @@ def test_pull_latest_from_table_or_query(
     is from 2 hours ago.
     """
     now = sample_data
-    job = MongoDBOfflineStoreIbis.pull_latest_from_table_or_query(
+    job = MongoDBOfflineStoreMany.pull_latest_from_table_or_query(
         config=repo_config,
         data_source=driver_source,
         join_key_columns=["driver_id"],
@@ -256,7 +256,7 @@ def test_get_historical_features_pit_join(
         }
     )
 
-    job = MongoDBOfflineStoreIbis.get_historical_features(
+    job = MongoDBOfflineStoreMany.get_historical_features(
         config=repo_config,
         feature_views=[driver_fv],
         feature_refs=["driver_stats:conv_rate", "driver_stats:acc_rate"],
@@ -287,11 +287,11 @@ def test_get_historical_features_pit_join(
 
 @_requires_docker
 def test_pull_all_from_table_or_query(
-    repo_config: RepoConfig, sample_data: datetime, driver_source: MongoDBSource
+    repo_config: RepoConfig, sample_data: datetime, driver_source: MongoDBSourceMany
 ) -> None:
     """Test pulling all features within a time range (no deduplication)."""
     now = sample_data
-    job = MongoDBOfflineStoreIbis.pull_all_from_table_or_query(
+    job = MongoDBOfflineStoreMany.pull_all_from_table_or_query(
         config=repo_config,
         data_source=driver_source,
         join_key_columns=["driver_id"],
@@ -314,7 +314,7 @@ def test_pull_all_from_table_or_query(
 def test_ttl_excludes_stale_features(
     repo_config: RepoConfig,
     mongodb_connection_string: str,
-    driver_source: MongoDBSource,
+    driver_source: MongoDBSourceMany,
 ) -> None:
     """Test that TTL causes stale feature values to be returned as NULL.
 
@@ -339,7 +339,7 @@ def test_ttl_excludes_stale_features(
     client.close()
 
     # Create source and feature view with 1-day TTL
-    ttl_source = MongoDBSource(
+    ttl_source = MongoDBSourceMany(
         name="driver_stats_ttl_test",
         database="feast_test",
         collection="driver_stats_ttl_test",
@@ -367,7 +367,7 @@ def test_ttl_excludes_stale_features(
         }
     )
 
-    job = MongoDBOfflineStoreIbis.get_historical_features(
+    job = MongoDBOfflineStoreMany.get_historical_features(
         config=repo_config,
         feature_views=[ttl_fv],
         feature_refs=["driver_stats_ttl_test:conv_rate"],
@@ -429,13 +429,13 @@ def test_multiple_feature_views(
     client.close()
 
     # Create sources for each collection
-    driver_source = MongoDBSource(
+    driver_source = MongoDBSourceMany(
         name="driver_stats_multi",
         database="feast_test",
         collection="driver_stats_multi",
         timestamp_field="event_timestamp",
     )
-    vehicle_source = MongoDBSource(
+    vehicle_source = MongoDBSourceMany(
         name="vehicle_stats_multi",
         database="feast_test",
         collection="vehicle_stats_multi",
@@ -481,7 +481,7 @@ def test_multiple_feature_views(
     )
 
     # Request features from BOTH feature views
-    job = MongoDBOfflineStoreIbis.get_historical_features(
+    job = MongoDBOfflineStoreMany.get_historical_features(
         config=repo_config,
         feature_views=[driver_fv, vehicle_fv],
         feature_refs=[
@@ -566,7 +566,7 @@ def test_compound_join_keys(
     client.close()
 
     # Create source
-    source = MongoDBSource(
+    source = MongoDBSourceMany(
         name="user_device_features",
         database="feast_test",
         collection="user_device_features",
@@ -594,7 +594,7 @@ def test_compound_join_keys(
     )
 
     # Test pull_latest: should get one row per unique (user_id, device_id) combination
-    job = MongoDBOfflineStoreIbis.pull_latest_from_table_or_query(
+    job = MongoDBOfflineStoreMany.pull_latest_from_table_or_query(
         config=repo_config,
         data_source=source,
         join_key_columns=["user_id", "device_id"],
@@ -622,7 +622,7 @@ def test_compound_join_keys(
         }
     )
 
-    job = MongoDBOfflineStoreIbis.get_historical_features(
+    job = MongoDBOfflineStoreMany.get_historical_features(
         config=repo_config,
         feature_views=[fv],
         feature_refs=["user_device_features:app_opens"],
diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval_native.py b/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval_native.py
index 5c02299254..f18d2d15af 100644
--- a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval_native.py
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval_native.py
@@ -32,10 +32,10 @@
 
 from feast import Entity, FeatureView, Field
 from feast.infra.key_encoding_utils import serialize_entity_key
-from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_native import (
-    MongoDBOfflineStoreNative,
-    MongoDBOfflineStoreNativeConfig,
-    MongoDBSourceNative,
+from feast.infra.offline_stores.contrib.mongodb.mongodb_one import (
+    MongoDBOfflineStoreOne,
+    MongoDBOfflineStoreOneConfig,
+    MongoDBSourceOne,
 )
 from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
 from feast.protos.feast.types.Value_pb2 import Value as ValueProto
@@ -109,7 +109,7 @@ def repo_config(mongodb_connection_string: str) -> RepoConfig:
         project="test_project",
         registry="memory://",
         provider="local",
-        offline_store=MongoDBOfflineStoreNativeConfig(
+        offline_store=MongoDBOfflineStoreOneConfig(
             connection_string=mongodb_connection_string,
             database="feast_test",
             collection="feature_history",
@@ -170,9 +170,9 @@ def sample_data(mongodb_connection_string: str) -> datetime:
 
 
 @pytest.fixture
-def driver_source() -> MongoDBSourceNative:
-    """Create a MongoDBSourceNative for driver stats."""
-    return MongoDBSourceNative(
+def driver_source() -> MongoDBSourceOne:
+    """Create a MongoDBSourceOne for driver stats."""
+    return MongoDBSourceOne(
         name="driver_stats",
         timestamp_field="event_timestamp",
         created_timestamp_column="created_at",
@@ -180,7 +180,7 @@ def driver_source() -> MongoDBSourceNative:
 
 
 @pytest.fixture
-def driver_fv(driver_source: MongoDBSourceNative) -> FeatureView:
+def driver_fv(driver_source: MongoDBSourceOne) -> FeatureView:
     """Create a FeatureView for driver stats."""
     driver_entity = Entity(
         name="driver_id", join_keys=["driver_id"], value_type=ValueType.INT64
@@ -200,11 +200,11 @@ def driver_fv(driver_source: MongoDBSourceNative) -> FeatureView:
 
 @_requires_docker
 def test_pull_latest_from_table_or_query(
-    repo_config: RepoConfig, sample_data: datetime, driver_source: MongoDBSourceNative
+    repo_config: RepoConfig, sample_data: datetime, driver_source: MongoDBSourceOne
 ) -> None:
     """Test pulling latest features per entity from the single collection."""
     now = sample_data
-    job = MongoDBOfflineStoreNative.pull_latest_from_table_or_query(
+    job = MongoDBOfflineStoreOne.pull_latest_from_table_or_query(
         config=repo_config,
         data_source=driver_source,
         join_key_columns=["driver_id"],
@@ -246,7 +246,7 @@ def test_get_historical_features_pit_join(
         }
     )
 
-    job = MongoDBOfflineStoreNative.get_historical_features(
+    job = MongoDBOfflineStoreOne.get_historical_features(
         config=repo_config,
         feature_views=[driver_fv],
         feature_refs=["driver_stats:conv_rate", "driver_stats:acc_rate"],
@@ -277,11 +277,11 @@ def test_get_historical_features_pit_join(
 
 @_requires_docker
 def test_pull_all_from_table_or_query(
-    repo_config: RepoConfig, sample_data: datetime, driver_source: MongoDBSourceNative
+    repo_config: RepoConfig, sample_data: datetime, driver_source: MongoDBSourceOne
 ) -> None:
     """Test pulling all features within a time range (no deduplication)."""
     now = sample_data
-    job = MongoDBOfflineStoreNative.pull_all_from_table_or_query(
+    job = MongoDBOfflineStoreOne.pull_all_from_table_or_query(
         config=repo_config,
         data_source=driver_source,
         join_key_columns=["driver_id"],
@@ -330,7 +330,7 @@ def test_ttl_excludes_stale_features(
     collection.insert_many(ttl_docs)
     client.close()
 
-    ttl_source = MongoDBSourceNative(
+    ttl_source = MongoDBSourceOne(
         name="driver_stats_ttl",
         timestamp_field="event_timestamp",
     )
@@ -355,7 +355,7 @@ def test_ttl_excludes_stale_features(
         }
     )
 
-    job = MongoDBOfflineStoreNative.get_historical_features(
+    job = MongoDBOfflineStoreOne.get_historical_features(
         config=repo_config,
         feature_views=[ttl_fv],
         feature_refs=["driver_stats_ttl:conv_rate"],
@@ -422,8 +422,8 @@ def test_multiple_feature_views(
     client.close()
 
     # Create sources and feature views
-    driver_source = MongoDBSourceNative(name="driver_stats_multi")
-    vehicle_source = MongoDBSourceNative(name="vehicle_stats_multi")
+    driver_source = MongoDBSourceOne(name="driver_stats_multi")
+    vehicle_source = MongoDBSourceOne(name="vehicle_stats_multi")
 
     driver_entity = Entity(
         name="driver_id", join_keys=["driver_id"], value_type=ValueType.INT64
@@ -459,7 +459,7 @@ def test_multiple_feature_views(
         }
     )
 
-    job = MongoDBOfflineStoreNative.get_historical_features(
+    job = MongoDBOfflineStoreOne.get_historical_features(
         config=repo_config,
         feature_views=[driver_fv, vehicle_fv],
         feature_refs=[
@@ -534,7 +534,7 @@ def test_compound_join_keys(
     collection.insert_many(compound_docs)
     client.close()
 
-    source = MongoDBSourceNative(name="user_device_features")
+    source = MongoDBSourceOne(name="user_device_features")
 
     user_entity = Entity(
         name="user_id", join_keys=["user_id"], value_type=ValueType.INT64
@@ -556,7 +556,7 @@ def test_compound_join_keys(
     )
 
     # Test pull_latest: should get one row per unique (user_id, device_id)
-    job = MongoDBOfflineStoreNative.pull_latest_from_table_or_query(
+    job = MongoDBOfflineStoreOne.pull_latest_from_table_or_query(
         config=repo_config,
         data_source=source,
         join_key_columns=["user_id", "device_id"],
@@ -585,7 +585,7 @@ def test_compound_join_keys(
         }
     )
 
-    job = MongoDBOfflineStoreNative.get_historical_features(
+    job = MongoDBOfflineStoreOne.get_historical_features(
         config=repo_config,
         feature_views=[fv],
         feature_refs=["user_device_features:app_opens"],

From 2c2549474fa7ba5fba781946d92c4809024e3e82 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Fri, 20 Mar 2026 14:14:49 -0400
Subject: [PATCH 20/30] Add README.md documenting MongoDB offline store
 implementations

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../offline_stores/contrib/mongodb/README.md  | 143 ++++++++++++++++++
 1 file changed, 143 insertions(+)
 create mode 100644 sdk/python/feast/infra/offline_stores/contrib/mongodb/README.md

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb/README.md b/sdk/python/feast/infra/offline_stores/contrib/mongodb/README.md
new file mode 100644
index 0000000000..44983940ff
--- /dev/null
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb/README.md
@@ -0,0 +1,143 @@
+# MongoDB Offline Store
+
+Two MongoDB offline store implementations optimized for different use cases.
+
+## Overview
+
+| Aspect | `MongoDBOfflineStoreMany` | `MongoDBOfflineStoreOne` |
+|--------|---------------------------|--------------------------|
+| Collections | One per FeatureView | Single shared collection |
+| Schema | Flat documents | Nested `features` subdoc |
+| Entity ID | Separate columns | Serialized bytes |
+| Best for | Small-medium feature stores | Large feature stores |
+
+## MongoDBOfflineStoreMany (mongodb_many.py)
+
+**One collection per FeatureView** — each FeatureView maps to its own MongoDB collection.
+
+### Schema
+
+```javascript
+// Collection: driver_stats
+{
+  "driver_id": 1001,
+  "event_timestamp": ISODate("2024-01-15T10:00:00Z"),
+  "trips_today": 5,
+  "rating": 4.8
+}
+```
+
+### Configuration
+
+```yaml
+offline_store:
+  type: feast.infra.offline_stores.contrib.mongodb.mongodb_many.MongoDBOfflineStoreMany
+  connection_string: mongodb://localhost:27017
+  database: feast
+```
+
+### When to Use
+
+✅ **Small to medium feature stores** — loads entire collection into memory  
+✅ **Fast PIT joins** — Ibis memtables are highly optimized  
+✅ **Simple schema** — flat documents, easy to query directly  
+✅ **Per-collection indexes** — each FV can have tailored indexes  
+
+⚠️ **Caution**: Loads ALL documents from each collection. May OOM on very large collections.
+
+## MongoDBOfflineStoreOne (mongodb_one.py)
+
+**Single shared collection** — all FeatureViews store data in one collection with a discriminator field.
+
+### Schema
+
+```javascript
+// Collection: feature_history (shared by all FVs)
+{
+  "entity_id": Binary("..."),           // Serialized entity key
+  "feature_view": "driver_stats",       // Discriminator
+  "features": {                         // Nested subdocument
+    "trips_today": 5,
+    "rating": 4.8
+  },
+  "event_timestamp": ISODate("2024-01-15T10:00:00Z"),
+  "created_at": ISODate("2024-01-15T10:00:01Z")
+}
+```
+
+### Configuration
+
+```yaml
+offline_store:
+  type: feast.infra.offline_stores.contrib.mongodb.mongodb_one.MongoDBOfflineStoreOne
+  connection_string: mongodb://localhost:27017
+  database: feast
+  collection: feature_history
+```
+
+### When to Use
+
+✅ **Large feature stores** — filters by entity_id, doesn't load entire collection  
+✅ **Memory-safe** — processes in chunks, bounded memory usage  
+✅ **Schema consistency** — matches online store pattern  
+✅ **Efficient materialization** — MQL aggregation pipeline  
+
+⚠️ **Trade-off**: Slightly slower than Many for small workloads due to serialization overhead.
+
+## Performance Comparison
+
+Benchmarks with 10 features, 3 historical rows per entity:
+
+| Entity Rows | Many (time) | One (time) | Winner |
+|-------------|-------------|------------|--------|
+| 1,000 | 0.30s | 0.06s | One |
+| 10,000 | 0.20s | 0.31s | Many |
+| 100,000 | 1.51s | 5.22s | Many |
+| 1,000,000 | 16.08s | 212s | Many |
+
+### Memory Behavior
+
+| Scenario | Many | One |
+|----------|------|-----|
+| Large feature collection, small entity_df | ❌ Loads all | ✅ Filters |
+| Small feature collection, large entity_df | ✅ Fast | ⚠️ Slower |
+
+## Choosing an Implementation
+
+```
+                    ┌─────────────────────────────┐
+                    │ Is your feature collection  │
+                    │ larger than available RAM?  │
+                    └─────────────────────────────┘
+                               │
+                    ┌──────────┴──────────┐
+                    ▼                     ▼
+                   YES                    NO
+                    │                     │
+                    ▼                     ▼
+            ┌───────────────┐     ┌───────────────┐
+            │ Use ONE       │     │ Use MANY      │
+            │ (memory-safe) │     │ (faster)      │
+            └───────────────┘     └───────────────┘
+```
+
+## Index Recommendations
+
+### Many (per-collection)
+
+```javascript
+db.driver_stats.createIndex({ "driver_id": 1, "event_timestamp": -1 })
+```
+
+### One (shared collection)
+
+```javascript
+db.feature_history.createIndex({
+  "entity_id": 1,
+  "feature_view": 1, 
+  "event_timestamp": -1
+})
+```
+
+The One implementation creates this index automatically on first use.
+

From b50e22fc47230dfbb370d45af2e46796dce2e87f Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Fri, 20 Mar 2026 14:25:44 -0400
Subject: [PATCH 21/30] Rename mongodb/ to mongodb_offline_store/, organize
 tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename module: mongodb/ → mongodb_offline_store/ (follows naming convention)
- Move tests to mongodb_offline_store/ subdirectory:
  - test_mongodb_offline_retrieval.py → mongodb_offline_store/test_many.py
  - test_mongodb_offline_retrieval_native.py → mongodb_offline_store/test_one.py
  - benchmark_mongodb_offline_stores.py → mongodb_offline_store/benchmark.py
- Update all imports to use mongodb_offline_store path

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../contrib/{mongodb => mongodb_offline_store}/README.md    | 4 ++--
 .../contrib/{mongodb => mongodb_offline_store}/__init__.py  | 0
 .../{mongodb => mongodb_offline_store}/mongodb_many.py      | 6 +++---
 .../{mongodb => mongodb_offline_store}/mongodb_one.py       | 6 ++----
 .../contrib/mongodb_offline_store/__init__.py               | 0
 .../benchmark.py}                                           | 4 ++--
 .../test_many.py}                                           | 2 +-
 .../test_one.py}                                            | 2 +-
 8 files changed, 11 insertions(+), 13 deletions(-)
 rename sdk/python/feast/infra/offline_stores/contrib/{mongodb => mongodb_offline_store}/README.md (95%)
 rename sdk/python/feast/infra/offline_stores/contrib/{mongodb => mongodb_offline_store}/__init__.py (100%)
 rename sdk/python/feast/infra/offline_stores/contrib/{mongodb => mongodb_offline_store}/mongodb_many.py (98%)
 rename sdk/python/feast/infra/offline_stores/contrib/{mongodb => mongodb_offline_store}/mongodb_one.py (99%)
 create mode 100644 sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/__init__.py
 rename sdk/python/tests/unit/infra/offline_stores/contrib/{benchmark_mongodb_offline_stores.py => mongodb_offline_store/benchmark.py} (99%)
 rename sdk/python/tests/unit/infra/offline_stores/contrib/{test_mongodb_offline_retrieval.py => mongodb_offline_store/test_many.py} (99%)
 rename sdk/python/tests/unit/infra/offline_stores/contrib/{test_mongodb_offline_retrieval_native.py => mongodb_offline_store/test_one.py} (99%)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb/README.md b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md
similarity index 95%
rename from sdk/python/feast/infra/offline_stores/contrib/mongodb/README.md
rename to sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md
index 44983940ff..23afc6f2f5 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb/README.md
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md
@@ -31,7 +31,7 @@ Two MongoDB offline store implementations optimized for different use cases.
 
 ```yaml
 offline_store:
-  type: feast.infra.offline_stores.contrib.mongodb.mongodb_many.MongoDBOfflineStoreMany
+  type: feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_many.MongoDBOfflineStoreMany
   connection_string: mongodb://localhost:27017
   database: feast
 ```
@@ -69,7 +69,7 @@ offline_store:
 
 ```yaml
 offline_store:
-  type: feast.infra.offline_stores.contrib.mongodb.mongodb_one.MongoDBOfflineStoreOne
+  type: feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_one.MongoDBOfflineStoreOne
   connection_string: mongodb://localhost:27017
   database: feast
   collection: feature_history
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb/__init__.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/__init__.py
similarity index 100%
rename from sdk/python/feast/infra/offline_stores/contrib/mongodb/__init__.py
rename to sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/__init__.py
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb/mongodb_many.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py
similarity index 98%
rename from sdk/python/feast/infra/offline_stores/contrib/mongodb/mongodb_many.py
rename to sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py
index 7dac38af02..5c5bf0b0ba 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb/mongodb_many.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py
@@ -34,7 +34,7 @@
     SavedDatasetLocationAlreadyExists,
 )
 from feast.feature_view import FeatureView
-from feast.infra.offline_stores.contrib.mongodb import DRIVER_METADATA
+from feast.infra.offline_stores.contrib.mongodb_offline_store import DRIVER_METADATA
 from feast.infra.offline_stores.ibis import (
     get_historical_features_ibis,
     pull_all_from_table_or_query_ibis,
@@ -214,7 +214,7 @@ def _to_proto_impl(self) -> DataSourceProto:
         data_source_proto = DataSourceProto(
             name=self.name,
             type=DataSourceProto.CUSTOM_SOURCE,
-            data_source_class_type="feast.infra.offline_stores.contrib.mongodb.mongodb_many.MongoDBSourceMany",
+            data_source_class_type="feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_many.MongoDBSourceMany",
             field_mapping=self.field_mapping,
             custom_options=self._mongodb_options.to_proto(),
             description=self.description,
@@ -322,7 +322,7 @@ def to_data_source(self) -> DataSource:
 class MongoDBOfflineStoreManyConfig(FeastConfigBaseModel):
     """Configuration for the MongoDB offline store (one collection per FeatureView)."""
 
-    type: StrictStr = "feast.infra.offline_stores.contrib.mongodb.mongodb_many.MongoDBOfflineStoreMany"
+    type: StrictStr = "feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_many.MongoDBOfflineStoreMany"
     """Offline store type selector"""
 
     connection_string: StrictStr = "mongodb://localhost:27017"
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb/mongodb_one.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_one.py
similarity index 99%
rename from sdk/python/feast/infra/offline_stores/contrib/mongodb/mongodb_one.py
rename to sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_one.py
index 293b785c86..f40f30df83 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb/mongodb_one.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_one.py
@@ -109,7 +109,7 @@
 from feast.errors import DataSourceNoNameException, FeastExtrasDependencyImportError
 from feast.feature_view import FeatureView
 from feast.infra.key_encoding_utils import serialize_entity_key
-from feast.infra.offline_stores.contrib.mongodb import DRIVER_METADATA
+from feast.infra.offline_stores.contrib.mongodb_offline_store import DRIVER_METADATA
 from feast.infra.offline_stores.offline_store import (
     OfflineStore,
     RetrievalJob,
@@ -130,9 +130,7 @@
 class MongoDBOfflineStoreOneConfig(FeastConfigBaseModel):
     """Configuration for the MongoDB offline store (single shared collection)."""
 
-    type: StrictStr = (
-        "feast.infra.offline_stores.contrib.mongodb.mongodb_one.MongoDBOfflineStoreOne"
-    )
+    type: StrictStr = "feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_one.MongoDBOfflineStoreOne"
     """Offline store type selector"""
 
     connection_string: StrictStr = "mongodb://localhost:27017"
diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/__init__.py b/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py b/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/benchmark.py
similarity index 99%
rename from sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py
rename to sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/benchmark.py
index 3b663b150c..f0d03e32e4 100644
--- a/sdk/python/tests/unit/infra/offline_stores/contrib/benchmark_mongodb_offline_stores.py
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/benchmark.py
@@ -34,12 +34,12 @@
 
 from feast import Entity, FeatureView, Field
 from feast.infra.key_encoding_utils import serialize_entity_key
-from feast.infra.offline_stores.contrib.mongodb.mongodb_many import (
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_many import (
     MongoDBOfflineStoreMany,
     MongoDBOfflineStoreManyConfig,
     MongoDBSourceMany,
 )
-from feast.infra.offline_stores.contrib.mongodb.mongodb_one import (
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_one import (
     MongoDBOfflineStoreOne,
     MongoDBOfflineStoreOneConfig,
     MongoDBSourceOne,
diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py b/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/test_many.py
similarity index 99%
rename from sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
rename to sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/test_many.py
index 1c9882900d..cbd43ea8d1 100644
--- a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval.py
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/test_many.py
@@ -19,7 +19,7 @@
 from testcontainers.mongodb import MongoDbContainer
 
 from feast import Entity, FeatureView, Field
-from feast.infra.offline_stores.contrib.mongodb.mongodb_many import (
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_many import (
     MongoDBOfflineStoreMany,
     MongoDBOfflineStoreManyConfig,
     MongoDBSourceMany,
diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval_native.py b/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/test_one.py
similarity index 99%
rename from sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval_native.py
rename to sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/test_one.py
index f18d2d15af..689fef915e 100644
--- a/sdk/python/tests/unit/infra/offline_stores/contrib/test_mongodb_offline_retrieval_native.py
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/test_one.py
@@ -32,7 +32,7 @@
 
 from feast import Entity, FeatureView, Field
 from feast.infra.key_encoding_utils import serialize_entity_key
-from feast.infra.offline_stores.contrib.mongodb.mongodb_one import (
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_one import (
     MongoDBOfflineStoreOne,
     MongoDBOfflineStoreOneConfig,
     MongoDBSourceOne,

From bae2648a7583af52128a19866e71fd31e92b3a72 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Fri, 20 Mar 2026 14:30:40 -0400
Subject: [PATCH 22/30] Update docstring in benchmark.py

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../offline_stores/contrib/mongodb_offline_store/benchmark.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/benchmark.py b/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/benchmark.py
index f0d03e32e4..49d8b8aeb6 100644
--- a/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/benchmark.py
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/benchmark.py
@@ -1,5 +1,7 @@
 """
-Performance benchmarks comparing Ibis vs Native MongoDB offline store implementations.
+Performance benchmarks comparing the two MongoDB offline store implementations -
+one Collection with all feature views
+vs. a schema of N collections for N features views.
 
 These tests measure performance across different scaling dimensions:
 1. Row count scaling (entity_df size)

From e4c79bf8cda64078e9d43f1b86469a65ed33d89c Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Fri, 20 Mar 2026 14:38:59 -0400
Subject: [PATCH 23/30] Update README to show created_at tie-breaker in Many
 schema

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../offline_stores/contrib/mongodb_offline_store/README.md     | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md
index 23afc6f2f5..30ea64af2a 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md
@@ -22,11 +22,14 @@ Two MongoDB offline store implementations optimized for different use cases.
 {
   "driver_id": 1001,
   "event_timestamp": ISODate("2024-01-15T10:00:00Z"),
+  "created_at": ISODate("2024-01-15T10:00:01Z"),  // Optional tie-breaker
   "trips_today": 5,
   "rating": 4.8
 }
 ```
 
+Ties (same `event_timestamp`) are broken by `created_timestamp_column` if configured.
+
 ### Configuration
 
 ```yaml

From 548698b590a3f49a374b501e575f641760822424 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Fri, 20 Mar 2026 14:43:44 -0400
Subject: [PATCH 24/30] Update README index recommendations for Many
 implementation

- Clarify that indexes should be on join keys + timestamp
- Show example for compound join keys
- Note that Many does not auto-create indexes

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../contrib/mongodb_offline_store/README.md   | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md
index 30ea64af2a..561eb70ed7 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md
@@ -128,16 +128,31 @@ Benchmarks with 10 features, 3 historical rows per entity:
 
 ### Many (per-collection)
 
+Each collection should have an index on the join keys + timestamp:
+
 ```javascript
-db.driver_stats.createIndex({ "driver_id": 1, "event_timestamp": -1 })
+// For a FeatureView with join key "driver_id"
+db.driver_stats.createIndex({
+  "driver_id": 1,           // Join key(s)
+  "event_timestamp": -1
+})
+
+// For a FeatureView with compound join keys
+db.order_stats.createIndex({
+  "customer_id": 1,
+  "order_id": 1,
+  "event_timestamp": -1
+})
 ```
 
+**Note**: The Many implementation does not auto-create indexes. Create them manually or via a migration script.
+
 ### One (shared collection)
 
 ```javascript
 db.feature_history.createIndex({
   "entity_id": 1,
-  "feature_view": 1, 
+  "feature_view": 1,
   "event_timestamp": -1
 })
 ```

From 1597264f0338efa21b54b48f742df38adc8bd7c1 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Fri, 20 Mar 2026 14:47:36 -0400
Subject: [PATCH 25/30] Add auto-create index to MongoDBOfflineStoreMany

- Add _ensure_index_many() function with module-level cache
- Call during pull_latest_from_table_or_query (materialization)
- Creates index on join_keys + timestamp + created_timestamp
- Checks for existing index before creating
- Update README to reflect auto-create behavior

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../contrib/mongodb_offline_store/README.md   |  2 +-
 .../mongodb_offline_store/mongodb_many.py     | 61 +++++++++++++++++++
 2 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md
index 561eb70ed7..db6318ee17 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/README.md
@@ -145,7 +145,7 @@ db.order_stats.createIndex({
 })
 ```
 
-**Note**: The Many implementation does not auto-create indexes. Create them manually or via a migration script.
+**Note**: The Many implementation auto-creates indexes during `pull_latest_from_table_or_query` (materialization).
 
 ### One (shared collection)
 
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py
index 5c5bf0b0ba..3faec603a3 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py
@@ -359,6 +359,26 @@ def pull_latest_from_table_or_query(
             "MongoDB offline store is in preview. API may change without notice.",
             RuntimeWarning,
         )
+
+        # Ensure index exists for efficient queries
+        if MongoClient is not None:
+            connection_string = config.offline_store.connection_string
+            db_name = data_source.database or config.offline_store.database
+            client: Any = MongoClient(
+                connection_string, driver=DRIVER_METADATA, tz_aware=True
+            )
+            try:
+                _ensure_index_many(
+                    client=client,
+                    db_name=db_name,
+                    collection_name=data_source.collection,
+                    join_keys=join_key_columns,
+                    timestamp_field=timestamp_field,
+                    created_timestamp_column=created_timestamp_column,
+                )
+            finally:
+                client.close()
+
         return pull_latest_from_table_or_query_ibis(
             config=config,
             data_source=data_source,
@@ -475,6 +495,47 @@ def reader(data_source: DataSource, repo_path: str) -> Table:
     return reader
 
 
+# Track which collections have had indexes ensured (module-level cache)
+_indexes_ensured: set = set()
+
+
+def _ensure_index_many(
+    client: Any,
+    db_name: str,
+    collection_name: str,
+    join_keys: List[str],
+    timestamp_field: str,
+    created_timestamp_column: Optional[str] = None,
+) -> None:
+    """Create recommended index on a Many-schema collection.
+
+    Index is on: join_keys (ascending) + timestamp (descending) + created_at (descending).
+    Uses a module-level cache to avoid redundant index creation checks.
+    """
+    cache_key = f"{db_name}.{collection_name}"
+    if cache_key in _indexes_ensured:
+        return
+
+    coll = client[db_name][collection_name]
+
+    # Build index key: join_keys (asc) + timestamp (desc) + created_at (desc)
+    index_keys = [(k, 1) for k in join_keys]
+    index_keys.append((timestamp_field, -1))
+    if created_timestamp_column:
+        index_keys.append((created_timestamp_column, -1))
+
+    # Check if equivalent index already exists
+    existing_indexes = coll.index_information()
+    for idx_info in existing_indexes.values():
+        if idx_info.get("key") == index_keys:
+            _indexes_ensured.add(cache_key)
+            return
+
+    # Create the index
+    coll.create_index(index_keys, background=True)
+    _indexes_ensured.add(cache_key)
+
+
 def _build_data_source_writer(
     config: RepoConfig,
 ) -> Callable[[Table, DataSource, str, str, bool], None]:

From 39afa9a77274cb2eb644db8776ff7493d95a3ae9 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Fri, 20 Mar 2026 15:11:58 -0400
Subject: [PATCH 26/30] Update benchmark.py to use One/Many naming convention
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename functions: _generate_ibis_data → _generate_many_data, etc.
- Rename fixtures: ibis_config → many_config, native_config → one_config
- Rename tests: test_scale_rows_ibis → test_scale_rows_many, etc.
- Update all docstrings and print statements
- Update summary comparison output format

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../mongodb_offline_store/benchmark.py        | 162 +++++++++---------
 1 file changed, 82 insertions(+), 80 deletions(-)

diff --git a/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/benchmark.py b/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/benchmark.py
index 49d8b8aeb6..fa7f99e06b 100644
--- a/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/benchmark.py
+++ b/sdk/python/tests/unit/infra/offline_stores/contrib/mongodb_offline_store/benchmark.py
@@ -1,7 +1,8 @@
 """
-Performance benchmarks comparing the two MongoDB offline store implementations -
-one Collection with all feature views
-vs. a schema of N collections for N features views.
+Performance benchmarks comparing MongoDB offline store implementations: Many vs One.
+
+- Many: One collection per FeatureView (MongoDBOfflineStoreMany)
+- One: Single shared collection for all FeatureViews (MongoDBOfflineStoreOne)
 
 These tests measure performance across different scaling dimensions:
 1. Row count scaling (entity_df size)
@@ -13,8 +14,8 @@
 - Memory (peak Python memory via tracemalloc)
 - MongoDB server metrics (opcounters, execution stats)
 
-Run with: pytest benchmark_mongodb_offline_stores.py -v -s
-Skip slow tests: pytest benchmark_mongodb_offline_stores.py -v -s -m "not slow"
+Run with: pytest benchmark.py -v -s
+Skip slow tests: pytest benchmark.py -v -s -m "not slow"
 """
 
 import time
@@ -145,8 +146,8 @@ def mongodb_connection_string(mongodb_container: MongoDbContainer) -> str:
 
 
 @pytest.fixture
-def ibis_config(mongodb_connection_string: str) -> RepoConfig:
-    """RepoConfig for Ibis implementation."""
+def many_config(mongodb_connection_string: str) -> RepoConfig:
+    """RepoConfig for Many implementation (one collection per FeatureView)."""
     return RepoConfig(
         project="benchmark",
         registry="memory://",
@@ -161,8 +162,8 @@ def ibis_config(mongodb_connection_string: str) -> RepoConfig:
 
 
 @pytest.fixture
-def native_config(mongodb_connection_string: str) -> RepoConfig:
-    """RepoConfig for Native implementation."""
+def one_config(mongodb_connection_string: str) -> RepoConfig:
+    """RepoConfig for One implementation (single shared collection)."""
     return RepoConfig(
         project="benchmark",
         registry="memory://",
@@ -177,7 +178,7 @@ def native_config(mongodb_connection_string: str) -> RepoConfig:
     )
 
 
-def _generate_ibis_data(
+def _generate_many_data(
     client: MongoClient,
     db_name: str,
     collection_name: str,
@@ -185,7 +186,7 @@ def _generate_ibis_data(
     num_features: int,
     rows_per_entity: int = 5,
 ) -> datetime:
-    """Generate test data for Ibis (one collection per FV, flat schema)."""
+    """Generate test data for Many (one collection per FV, flat schema)."""
     collection = client[db_name][collection_name]
     collection.drop()
 
@@ -206,7 +207,7 @@ def _generate_ibis_data(
     return now
 
 
-def _generate_native_data(
+def _generate_one_data(
     client: MongoClient,
     db_name: str,
     collection_name: str,
@@ -215,7 +216,7 @@ def _generate_native_data(
     num_features: int,
     rows_per_entity: int = 5,
 ) -> datetime:
-    """Generate test data for Native (single collection, nested features)."""
+    """Generate test data for One (single collection, nested features)."""
     collection = client[db_name][collection_name]
     # Don't drop - may have multiple FVs in same collection
 
@@ -241,8 +242,8 @@ def _generate_native_data(
     return now
 
 
-def _create_ibis_fv(num_features: int) -> tuple:
-    """Create Ibis source and FeatureView."""
+def _create_many_fv(num_features: int) -> tuple:
+    """Create Many source and FeatureView."""
     source = MongoDBSourceMany(
         name="driver_benchmark",
         database="benchmark_db",
@@ -267,8 +268,8 @@ def _create_ibis_fv(num_features: int) -> tuple:
     return source, fv
 
 
-def _create_native_fv(num_features: int) -> tuple:
-    """Create Native source and FeatureView."""
+def _create_one_fv(num_features: int) -> tuple:
+    """Create One source and FeatureView."""
     source = MongoDBSourceOne(
         name="driver_benchmark",
         timestamp_field="event_timestamp",
@@ -375,10 +376,10 @@ def _print_benchmark_result(
 
 @_requires_docker
 @pytest.mark.parametrize("num_rows", ROW_COUNTS)
-def test_scale_rows_ibis(
-    mongodb_connection_string: str, ibis_config: RepoConfig, num_rows: int
+def test_scale_rows_many(
+    mongodb_connection_string: str, many_config: RepoConfig, num_rows: int
 ) -> None:
-    """Benchmark Ibis implementation with varying entity_df sizes.
+    """Benchmark Many implementation with varying entity_df sizes.
 
     Measures: runtime, peak memory, MongoDB opcounters.
     """
@@ -387,7 +388,7 @@ def test_scale_rows_ibis(
 
     client = MongoClient(mongodb_connection_string)
     try:
-        now = _generate_ibis_data(
+        now = _generate_many_data(
             client,
             "benchmark_db",
             "driver_benchmark",
@@ -396,7 +397,7 @@ def test_scale_rows_ibis(
             rows_per_entity=3,
         )
 
-        _, fv = _create_ibis_fv(num_features)
+        _, fv = _create_many_fv(num_features)
 
         entity_df = pd.DataFrame(
             {
@@ -409,7 +410,7 @@ def test_scale_rows_ibis(
 
         def run_query():
             job = MongoDBOfflineStoreMany.get_historical_features(
-                config=ibis_config,
+                config=many_config,
                 feature_views=[fv],
                 feature_refs=feature_refs,
                 entity_df=entity_df,
@@ -428,10 +429,10 @@ def run_query():
 
 @_requires_docker
 @pytest.mark.parametrize("num_rows", ROW_COUNTS)
-def test_scale_rows_native(
-    mongodb_connection_string: str, native_config: RepoConfig, num_rows: int
+def test_scale_rows_one(
+    mongodb_connection_string: str, one_config: RepoConfig, num_rows: int
 ) -> None:
-    """Benchmark Native implementation with varying entity_df sizes.
+    """Benchmark One implementation with varying entity_df sizes.
 
     Measures: runtime, peak memory, MongoDB opcounters.
     """
@@ -441,7 +442,7 @@ def test_scale_rows_native(
     client = MongoClient(mongodb_connection_string)
     try:
         client["benchmark_db"]["feature_history"].drop()
-        now = _generate_native_data(
+        now = _generate_one_data(
             client,
             "benchmark_db",
             "feature_history",
@@ -451,7 +452,7 @@ def test_scale_rows_native(
             rows_per_entity=3,
         )
 
-        _, fv = _create_native_fv(num_features)
+        _, fv = _create_one_fv(num_features)
 
         entity_df = pd.DataFrame(
             {
@@ -464,7 +465,7 @@ def test_scale_rows_native(
 
         def run_query():
             job = MongoDBOfflineStoreOne.get_historical_features(
-                config=native_config,
+                config=one_config,
                 feature_views=[fv],
                 feature_refs=feature_refs,
                 entity_df=entity_df,
@@ -490,15 +491,15 @@ def run_query():
 
 @_requires_docker
 @pytest.mark.parametrize("num_features", FEATURE_COUNTS)
-def test_wide_features_ibis(
-    mongodb_connection_string: str, ibis_config: RepoConfig, num_features: int
+def test_wide_features_many(
+    mongodb_connection_string: str, many_config: RepoConfig, num_features: int
 ) -> None:
-    """Benchmark Ibis with varying feature width."""
+    """Benchmark Many with varying feature width."""
     num_entities = 1000
 
     client = MongoClient(mongodb_connection_string)
     try:
-        now = _generate_ibis_data(
+        now = _generate_many_data(
             client,
             "benchmark_db",
             "driver_benchmark",
@@ -507,7 +508,7 @@ def test_wide_features_ibis(
             rows_per_entity=3,
         )
 
-        _, fv = _create_ibis_fv(num_features)
+        _, fv = _create_many_fv(num_features)
 
         entity_df = pd.DataFrame(
             {
@@ -520,7 +521,7 @@ def test_wide_features_ibis(
 
         def run_query():
             job = MongoDBOfflineStoreMany.get_historical_features(
-                config=ibis_config,
+                config=many_config,
                 feature_views=[fv],
                 feature_refs=feature_refs,
                 entity_df=entity_df,
@@ -541,16 +542,16 @@ def run_query():
 
 @_requires_docker
 @pytest.mark.parametrize("num_features", FEATURE_COUNTS)
-def test_wide_features_native(
-    mongodb_connection_string: str, native_config: RepoConfig, num_features: int
+def test_wide_features_one(
+    mongodb_connection_string: str, one_config: RepoConfig, num_features: int
 ) -> None:
-    """Benchmark Native with varying feature width."""
+    """Benchmark One with varying feature width."""
     num_entities = 1000
 
     client = MongoClient(mongodb_connection_string)
     try:
         client["benchmark_db"]["feature_history"].drop()
-        now = _generate_native_data(
+        now = _generate_one_data(
             client,
             "benchmark_db",
             "feature_history",
@@ -560,7 +561,7 @@ def test_wide_features_native(
             rows_per_entity=3,
         )
 
-        _, fv = _create_native_fv(num_features)
+        _, fv = _create_one_fv(num_features)
 
         entity_df = pd.DataFrame(
             {
@@ -573,7 +574,7 @@ def test_wide_features_native(
 
         def run_query():
             job = MongoDBOfflineStoreOne.get_historical_features(
-                config=native_config,
+                config=one_config,
                 feature_views=[fv],
                 feature_refs=feature_refs,
                 entity_df=entity_df,
@@ -599,10 +600,10 @@ def run_query():
 
 @_requires_docker
 @pytest.mark.parametrize("unique_ratio", [1.0, 0.5, 0.1])  # 100%, 50%, 10% unique
-def test_entity_skew_ibis(
-    mongodb_connection_string: str, ibis_config: RepoConfig, unique_ratio: float
+def test_entity_skew_many(
+    mongodb_connection_string: str, many_config: RepoConfig, unique_ratio: float
 ) -> None:
-    """Benchmark Ibis with varying entity uniqueness in entity_df."""
+    """Benchmark Many with varying entity uniqueness in entity_df."""
     import numpy as np
 
     total_rows = 5000
@@ -612,7 +613,7 @@ def test_entity_skew_ibis(
 
     client = MongoClient(mongodb_connection_string)
     try:
-        now = _generate_ibis_data(
+        now = _generate_many_data(
             client,
             "benchmark_db",
             "driver_benchmark",
@@ -621,7 +622,7 @@ def test_entity_skew_ibis(
             rows_per_entity=5,
         )
 
-        _, fv = _create_ibis_fv(num_features)
+        _, fv = _create_many_fv(num_features)
 
         # Create entity_df with repeated entity_ids
         entity_ids = np.random.choice(
@@ -640,7 +641,7 @@ def test_entity_skew_ibis(
 
         def run_query():
             job = MongoDBOfflineStoreMany.get_historical_features(
-                config=ibis_config,
+                config=many_config,
                 feature_views=[fv],
                 feature_refs=feature_refs,
                 entity_df=entity_df,
@@ -652,7 +653,7 @@ def run_query():
 
         result = _run_benchmark_full(run_query, mongo_client=client)
         print(
-            f"\n[IBIS] Unique ratio: {unique_ratio:.0%} ({num_unique_entities:,} unique / {total_rows:,} rows)"
+            f"\n[MANY] Unique ratio: {unique_ratio:.0%} ({num_unique_entities:,} unique / {total_rows:,} rows)"
         )
         print(f"  Time:   {result.elapsed_seconds:.3f}s")
         print(f"  Memory: {result.peak_memory_mb:.1f} MB")
@@ -664,10 +665,10 @@ def run_query():
 
 @_requires_docker
 @pytest.mark.parametrize("unique_ratio", [1.0, 0.5, 0.1])
-def test_entity_skew_native(
-    mongodb_connection_string: str, native_config: RepoConfig, unique_ratio: float
+def test_entity_skew_one(
+    mongodb_connection_string: str, one_config: RepoConfig, unique_ratio: float
 ) -> None:
-    """Benchmark Native with varying entity uniqueness in entity_df."""
+    """Benchmark One with varying entity uniqueness in entity_df."""
     import numpy as np
 
     total_rows = 5000
@@ -678,7 +679,7 @@ def test_entity_skew_native(
     client = MongoClient(mongodb_connection_string)
     try:
         client["benchmark_db"]["feature_history"].drop()
-        now = _generate_native_data(
+        now = _generate_one_data(
             client,
             "benchmark_db",
             "feature_history",
@@ -688,7 +689,7 @@ def test_entity_skew_native(
             rows_per_entity=5,
         )
 
-        _, fv = _create_native_fv(num_features)
+        _, fv = _create_one_fv(num_features)
 
         entity_ids = np.random.choice(
             num_unique_entities, size=total_rows, replace=True
@@ -706,7 +707,7 @@ def test_entity_skew_native(
 
         def run_query():
             job = MongoDBOfflineStoreOne.get_historical_features(
-                config=native_config,
+                config=one_config,
                 feature_views=[fv],
                 feature_refs=feature_refs,
                 entity_df=entity_df,
@@ -718,7 +719,7 @@ def run_query():
 
         result = _run_benchmark_full(run_query, mongo_client=client)
         print(
-            f"\n[NATIVE] Unique ratio: {unique_ratio:.0%} ({num_unique_entities:,} unique / {total_rows:,} rows)"
+            f"\n[ONE] Unique ratio: {unique_ratio:.0%} ({num_unique_entities:,} unique / {total_rows:,} rows)"
         )
         print(f"  Time:   {result.elapsed_seconds:.3f}s")
         print(f"  Memory: {result.peak_memory_mb:.1f} MB")
@@ -735,7 +736,7 @@ def run_query():
 
 @_requires_docker
 def test_summary_comparison(
-    mongodb_connection_string: str, ibis_config: RepoConfig, native_config: RepoConfig
+    mongodb_connection_string: str, many_config: RepoConfig, one_config: RepoConfig
 ) -> None:
     """Run a standard comparison and print summary with full metrics."""
     num_entities = 2000
@@ -743,8 +744,8 @@ def test_summary_comparison(
 
     client = MongoClient(mongodb_connection_string)
     try:
-        # Setup Ibis data
-        now = _generate_ibis_data(
+        # Setup Many data
+        now = _generate_many_data(
             client,
             "benchmark_db",
             "driver_benchmark",
@@ -753,9 +754,9 @@ def test_summary_comparison(
             rows_per_entity=5,
         )
 
-        # Setup Native data
+        # Setup One data
         client["benchmark_db"]["feature_history"].drop()
-        _generate_native_data(
+        _generate_one_data(
             client,
             "benchmark_db",
             "feature_history",
@@ -774,13 +775,13 @@ def test_summary_comparison(
 
         feature_refs = [f"driver_benchmark:feature_{i}" for i in range(num_features)]
 
-        # Ibis benchmark
-        _, ibis_fv = _create_ibis_fv(num_features)
+        # Many benchmark
+        _, many_fv = _create_many_fv(num_features)
 
-        def run_ibis():
+        def run_many():
             job = MongoDBOfflineStoreMany.get_historical_features(
-                config=ibis_config,
-                feature_views=[ibis_fv],
+                config=many_config,
+                feature_views=[many_fv],
                 feature_refs=feature_refs,
                 entity_df=entity_df,
                 registry=MagicMock(),
@@ -789,15 +790,15 @@ def run_ibis():
             )
             return job.to_df()
 
-        ibis_result = _run_benchmark_full(run_ibis, mongo_client=client)
+        many_result = _run_benchmark_full(run_many, mongo_client=client)
 
-        # Native benchmark
-        _, native_fv = _create_native_fv(num_features)
+        # One benchmark
+        _, one_fv = _create_one_fv(num_features)
 
-        def run_native():
+        def run_one():
             job = MongoDBOfflineStoreOne.get_historical_features(
-                config=native_config,
-                feature_views=[native_fv],
+                config=one_config,
+                feature_views=[one_fv],
                 feature_refs=feature_refs,
                 entity_df=entity_df,
                 registry=MagicMock(),
@@ -806,30 +807,31 @@ def run_native():
             )
             return job.to_df()
 
-        native_result = _run_benchmark_full(run_native, mongo_client=client)
+        one_result = _run_benchmark_full(run_one, mongo_client=client)
 
         # Print summary
         print("\n" + "=" * 70)
-        print("SUMMARY COMPARISON")
+        print("SUMMARY COMPARISON: Many vs One")
         print("=" * 70)
         print(f"Entities: {num_entities:,} | Features: {num_features}")
         print("-" * 70)
-        print(f"{'Metric':<20} {'Ibis':>20} {'Native':>20}")
+        print(f"{'Metric':<20} {'Many':>20} {'One':>20}")
         print("-" * 70)
         print(
-            f"{'Time (s)':<20} {ibis_result.elapsed_seconds:>20.3f} {native_result.elapsed_seconds:>20.3f}"
+            f"{'Time (s)':<20} {many_result.elapsed_seconds:>20.3f} {one_result.elapsed_seconds:>20.3f}"
         )
         print(
-            f"{'Memory (MB)':<20} {ibis_result.peak_memory_mb:>20.1f} {native_result.peak_memory_mb:>20.1f}"
+            f"{'Memory (MB)':<20} {many_result.peak_memory_mb:>20.1f} {one_result.peak_memory_mb:>20.1f}"
         )
         print(
-            f"{'Rows/sec':<20} {num_entities / ibis_result.elapsed_seconds:>20,.0f} {num_entities / native_result.elapsed_seconds:>20,.0f}"
+            f"{'Rows/sec':<20} {num_entities / many_result.elapsed_seconds:>20,.0f} {num_entities / one_result.elapsed_seconds:>20,.0f}"
         )
         print("-" * 70)
 
-        if native_result.elapsed_seconds > 0:
-            ratio = native_result.elapsed_seconds / ibis_result.elapsed_seconds
-            print(f"Ibis is {ratio:.1f}x faster than Native")
+        if one_result.elapsed_seconds > 0:
+            ratio = one_result.elapsed_seconds / many_result.elapsed_seconds
+            faster = "Many" if ratio > 1 else "One"
+            print(f"{faster} is {max(ratio, 1 / ratio):.1f}x faster")
         print("=" * 70)
 
     finally:

From 5146c4e81b5057626e747e31c4f1f269304ba921 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Fri, 20 Mar 2026 15:15:54 -0400
Subject: [PATCH 27/30] Add comprehensive module docstring to mongodb_many.py

Documents:
- Collection structure (one per FeatureView)
- Index creation (auto-created during materialization)
- Document schema (flat, top-level features)
- Point-in-time join strategy (Ibis memtables)
- Performance characteristics and memory considerations
- When to use vs MongoDBOfflineStoreOne
- Comparison table with One implementation

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../mongodb_offline_store/mongodb_many.py     | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py
index 3faec603a3..2fdf67200d 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py
@@ -12,6 +12,69 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""
+MongoDB Offline Store Implementation (Many Collections).
+
+This module implements a MongoDB offline store using a many-collection schema
+where each FeatureView maps to its own dedicated MongoDB collection. It uses
+Ibis for point-in-time joins, loading collection data into in-memory tables.
+
+Collection Structure:
+    Each FeatureView has its own collection named after the source:
+        - driver_stats FeatureView → db.driver_stats collection
+        - vehicle_stats FeatureView → db.vehicle_stats collection
+
+Collection Index (auto-created during materialization):
+    db.<collection>.createIndex({
+        "<join_key_1>": 1,
+        "<join_key_2>": 1,  // if compound key
+        "event_timestamp": -1,
+        "created_at": -1    // if created_timestamp_column is set
+    })
+
+Document Schema (example for driver_stats):
+    {
+        "_id": ObjectId(),
+        "driver_id": 1001,
+        "event_timestamp": ISODate("2026-01-20T12:00:00Z"),
+        "created_at": ISODate("2026-01-20T12:00:05Z"),
+        "rating": 4.91,
+        "trips_last_7d": 132
+    }
+
+    Note: Features are stored as top-level fields (flat schema), not nested
+    in a subdocument. This differs from the "One" implementation.
+
+Point-in-Time Join Strategy:
+    1. Load entire collection into an Ibis memtable
+    2. Load entity_df into an Ibis memtable
+    3. Use Ibis/pandas merge_asof for point-in-time correctness
+    4. Apply TTL filtering per FeatureView
+
+Performance Characteristics:
+    - Fast for small to medium collections (fits in memory)
+    - Optimized Ibis memtable operations for joins
+    - ⚠️ Loads ENTIRE collection into memory - may OOM on large collections
+
+When to Use:
+    ✅ Small to medium feature stores where collections fit in memory
+    ✅ When query performance is the priority
+    ✅ When you want simple, flat document schemas
+    ✅ When each FeatureView has independent scaling needs
+
+    ❌ Avoid when collections are very large (use MongoDBOfflineStoreOne instead)
+    ❌ Avoid in memory-constrained environments
+
+Comparison with MongoDBOfflineStoreOne:
+    | Aspect          | Many (this module)    | One                   |
+    |-----------------|----------------------|------------------------|
+    | Collections     | N (one per FV)       | 1 (shared)            |
+    | Schema          | Flat top-level       | Nested features{}     |
+    | Memory          | Loads all docs       | Filters by entity     |
+    | Performance     | Faster at scale      | Memory-efficient      |
+    | Entity ID       | Native columns       | Serialized bytes      |
+"""
+
 import json
 import warnings
 from datetime import datetime

From 612d05ab1c99ca79c67254f0b8c8f568bbd0bbe5 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Fri, 20 Mar 2026 15:20:02 -0400
Subject: [PATCH 28/30] Add Feature Freshness and Schema Evolution docs to
 mongodb_many.py

Add missing documentation sections:
- Feature Freshness Semantics: document-level freshness, not per-feature
- Schema Evolution ('Feature Creep'): flexible schema implications
- Notes: entity keys as native types, PIT correctness, TTL constraints

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .../mongodb_offline_store/mongodb_many.py     | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py
index 2fdf67200d..b1112552f3 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_many.py
@@ -45,6 +45,43 @@
     Note: Features are stored as top-level fields (flat schema), not nested
     in a subdocument. This differs from the "One" implementation.
 
+Feature Freshness Semantics:
+    This implementation operates at *document-level freshness*, not
+    per-feature freshness. During retrieval (e.g. point-in-time joins),
+    the system selects the most recent document for a given entity that
+    satisfies time constraints, and then extracts all requested features
+    from that document.
+
+    As a result, if a newer document contains only a subset of features,
+    missing features will be returned as NULL—even if older documents
+    contained values for those features. The system does not backfill
+    individual feature values from earlier events.
+
+    This behavior matches common Feast offline store semantics, but may
+    differ from systems that compute "latest value per feature".
+
+Schema Evolution ("Feature Creep"):
+    Because documents can have varying fields over time, different documents
+    in the same collection may contain different sets of feature fields.
+    This supports:
+        - Adding new features without backfilling historical data
+        - Partial writes or sparse feature computation
+
+    However, it also implies:
+        - Newly added features will be NULL for older events
+        - Partially populated documents may lead to NULL values even
+          when older data contained those features
+
+    Users should ensure that feature computation pipelines write complete
+    feature sets when consistent availability is required.
+
+Notes:
+    - Entity keys are stored as native MongoDB types (not serialized),
+      which differs from the "One" implementation.
+    - Point-in-time correctness is enforced per FeatureView.
+    - TTL (time-to-live) constraints are applied per FeatureView during
+      historical retrieval.
+
 Point-in-Time Join Strategy:
     1. Load entire collection into an Ibis memtable
     2. Load entity_df into an Ibis memtable

From 970ec797c1b0f570d50527aaea91472c15d8b9c6 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Fri, 20 Mar 2026 16:27:35 -0400
Subject: [PATCH 29/30] Add MongoDB DataSourceCreators for universal Feast
 tests

Add DataSourceCreator implementations for MongoDB offline stores:

- MongoDBManyDataSourceCreator: Fully functional, passes universal tests.
  Creates one collection per FeatureView with flat document schema.

- MongoDBOneDataSourceCreator: Implementation exists but NOT registered.
  The One schema requires knowing join keys vs features at data creation
  time, but DataSourceCreator.create_data_source() doesn't receive entity
  definitions. See TODO in mongodb.py for details on required interface
  changes.

Other changes:
- Fix data_source_class_type path in mongodb_one.py (mongodb_native -> mongodb_one)
- Improve datetime handling in mongodb_one.py for non-datetime columns
- Add 'mongodb' marker to pytest.ini
- Register MongoDBManyDataSourceCreator in repo_configuration.py

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .secrets.baseline                             |   6 +-
 .../mongodb_offline_store/mongodb_one.py      |   9 +-
 sdk/python/pytest.ini                         |   1 +
 .../feature_repos/repo_configuration.py       |  27 ++
 .../universal/data_sources/mongodb.py         | 316 ++++++++++++++++++
 5 files changed, 354 insertions(+), 5 deletions(-)
 create mode 100644 sdk/python/tests/universal/feature_repos/universal/data_sources/mongodb.py

diff --git a/.secrets.baseline b/.secrets.baseline
index 9d27a7b000..0391444334 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -1460,14 +1460,14 @@
         "filename": "sdk/python/tests/universal/feature_repos/repo_configuration.py",
         "hashed_secret": "d90e76ef629fb00c95f4e84fec29fbda111e2392",
         "is_verified": false,
-        "line_number": 459
+        "line_number": 479
       },
       {
         "type": "Secret Keyword",
         "filename": "sdk/python/tests/universal/feature_repos/repo_configuration.py",
         "hashed_secret": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8",
         "is_verified": false,
-        "line_number": 461
+        "line_number": 481
       }
     ],
     "sdk/python/tests/universal/feature_repos/universal/data_sources/file.py": [
@@ -1539,5 +1539,5 @@
       }
     ]
   },
-  "generated_at": "2026-03-18T08:09:25Z"
+  "generated_at": "2026-03-20T20:27:19Z"
 }
diff --git a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_one.py b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_one.py
index f40f30df83..3c1aeaf708 100644
--- a/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_one.py
+++ b/sdk/python/feast/infra/offline_stores/contrib/mongodb_offline_store/mongodb_one.py
@@ -219,7 +219,7 @@ def _to_proto_impl(self) -> DataSourceProto:
         return DataSourceProto(
             name=self.name,
             type=DataSourceProto.CUSTOM_SOURCE,
-            data_source_class_type="feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_native.MongoDBSourceOne",
+            data_source_class_type="feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_one.MongoDBSourceOne",
             field_mapping=self.field_mapping,
             custom_options=DataSourceProto.CustomSourceOptions(
                 configuration=json.dumps({"feature_view": self.name}).encode()
@@ -655,7 +655,12 @@ def _run_single(entity_subset_df: pd.DataFrame, coll: Any) -> pd.DataFrame:
             """
             # Prepare entity_df: ensure timestamps are UTC
             result = entity_subset_df.copy()
-            if result[event_timestamp_col].dt.tz is None:
+            # Convert timestamp column to datetime if needed
+            if not pd.api.types.is_datetime64_any_dtype(result[event_timestamp_col]):
+                result[event_timestamp_col] = pd.to_datetime(
+                    result[event_timestamp_col], utc=True
+                )
+            elif result[event_timestamp_col].dt.tz is None:
                 result[event_timestamp_col] = pd.to_datetime(
                     result[event_timestamp_col], utc=True
                 )
diff --git a/sdk/python/pytest.ini b/sdk/python/pytest.ini
index 1ad76b978e..d5ad19660b 100644
--- a/sdk/python/pytest.ini
+++ b/sdk/python/pytest.ini
@@ -21,6 +21,7 @@ markers =
     cloud: Tests requiring cloud credentials
     local_only: Tests that run entirely locally
     xdist_group: Group tests to run in the same xdist worker
+    mongodb: Tests requiring MongoDB
 
 timeout = 300
 timeout_method = thread
diff --git a/sdk/python/tests/universal/feature_repos/repo_configuration.py b/sdk/python/tests/universal/feature_repos/repo_configuration.py
index ddd952f71d..2033d41603 100644
--- a/sdk/python/tests/universal/feature_repos/repo_configuration.py
+++ b/sdk/python/tests/universal/feature_repos/repo_configuration.py
@@ -108,6 +108,33 @@
         ]
     )
 
+# MongoDB offline stores (require testcontainers and pymongo)
+if os.getenv("FEAST_LOCAL_ONLINE_CONTAINER", "False") == "True":
+    try:
+        from tests.universal.feature_repos.universal.data_sources.mongodb import (
+            MongoDBManyDataSourceCreator,
+            # MongoDBOneDataSourceCreator,  # TODO: Not registered - see TODO in mongodb.py
+        )
+
+        AVAILABLE_OFFLINE_STORES.extend(
+            [
+                ("local", MongoDBManyDataSourceCreator),
+                # TODO: MongoDBOneDataSourceCreator requires DataSourceCreator interface
+                # changes to pass entity/join key info. See mongodb.py for details.
+                # ("local", MongoDBOneDataSourceCreator),
+            ]
+        )
+        OFFLINE_STORE_TO_PROVIDER_CONFIG["mongodb_many"] = (
+            "local",
+            MongoDBManyDataSourceCreator,
+        )
+        # OFFLINE_STORE_TO_PROVIDER_CONFIG["mongodb_one"] = (
+        #     "local",
+        #     MongoDBOneDataSourceCreator,
+        # )
+    except ImportError:
+        pass  # pymongo or testcontainers not installed
+
 AVAILABLE_ONLINE_STORES: Dict[
     str, Tuple[Union[str, Dict[Any, Any]], Optional[Type[OnlineStoreCreator]]]
 ] = {"sqlite": ({"type": "sqlite"}, None)}
diff --git a/sdk/python/tests/universal/feature_repos/universal/data_sources/mongodb.py b/sdk/python/tests/universal/feature_repos/universal/data_sources/mongodb.py
new file mode 100644
index 0000000000..8eedc3b695
--- /dev/null
+++ b/sdk/python/tests/universal/feature_repos/universal/data_sources/mongodb.py
@@ -0,0 +1,316 @@
+"""
+MongoDB DataSourceCreator implementations for universal Feast tests.
+
+Provides two implementations matching the two offline store schemas:
+- MongoDBManyDataSourceCreator: One collection per FeatureView (Many)
+- MongoDBOneDataSourceCreator: Single shared collection (One)
+"""
+
+from typing import Any, Dict, Optional
+
+import pandas as pd
+import pytest
+from testcontainers.mongodb import MongoDbContainer
+
+from feast.data_source import DataSource
+from feast.feature_logging import LoggingDestination
+from feast.infra.key_encoding_utils import serialize_entity_key
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_many import (
+    MongoDBOfflineStoreManyConfig,
+    MongoDBSourceMany,
+    SavedDatasetMongoDBStorageMany,
+)
+from feast.infra.offline_stores.contrib.mongodb_offline_store.mongodb_one import (
+    MongoDBOfflineStoreOneConfig,
+    MongoDBSourceOne,
+)
+from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto
+from feast.protos.feast.types.Value_pb2 import Value as ValueProto
+from feast.repo_config import FeastConfigBaseModel
+from feast.saved_dataset import SavedDatasetStorage
+from tests.universal.feature_repos.universal.data_source_creator import (
+    DataSourceCreator,
+)
+
+# Import pymongo - will be available since we're testing MongoDB
+try:
+    from pymongo import MongoClient
+except ImportError:
+    MongoClient = None  # type: ignore
+
+
+class MongoDBManyDataSourceCreator(DataSourceCreator):
+    """DataSourceCreator for MongoDBOfflineStoreMany (one collection per FeatureView)."""
+
+    def __init__(self, project_name: str, *args, **kwargs):
+        super().__init__(project_name)
+        self.container = MongoDbContainer(
+            "mongo:7.0",
+            username="test",
+            password="test",  # pragma: allowlist secret
+        ).with_exposed_ports(27017)
+        self.container.start()
+        self.port = self.container.get_exposed_port(27017)
+        self.connection_string = (
+            f"mongodb://test:test@localhost:{self.port}"  # pragma: allowlist secret
+        )
+        self.database = f"feast_test_{project_name}"
+        self.collections_created: list[str] = []
+
+    def create_data_source(
+        self,
+        df: pd.DataFrame,
+        destination_name: str,
+        created_timestamp_column: str = "created_ts",
+        field_mapping: Optional[Dict[str, str]] = None,
+        timestamp_field: Optional[str] = "ts",
+    ) -> DataSource:
+        """Create a MongoDB data source by inserting df into a collection."""
+        collection_name = self.get_prefixed_table_name(destination_name)
+
+        # Insert data into MongoDB
+        client: Any = MongoClient(self.connection_string, tz_aware=True)
+        try:
+            coll = client[self.database][collection_name]
+            coll.drop()  # Clean slate
+            records = df.to_dict("records")
+            if records:
+                coll.insert_many(records)
+            self.collections_created.append(collection_name)
+        finally:
+            client.close()
+
+        return MongoDBSourceMany(
+            name=destination_name,
+            database=self.database,
+            collection=collection_name,
+            timestamp_field=timestamp_field or "ts",
+            created_timestamp_column=created_timestamp_column,
+            field_mapping=field_mapping,
+        )
+
+    def get_prefixed_table_name(self, suffix: str) -> str:
+        return f"{self.project_name}_{suffix}"
+
+    def create_offline_store_config(self) -> FeastConfigBaseModel:
+        return MongoDBOfflineStoreManyConfig(
+            connection_string=self.connection_string,
+            database=self.database,
+        )
+
+    def create_saved_dataset_destination(self) -> SavedDatasetStorage:
+        return SavedDatasetMongoDBStorageMany(
+            database=self.database,
+            collection=f"{self.project_name}_saved_dataset",
+        )
+
+    def create_logged_features_destination(self) -> LoggingDestination:
+        # MongoDB doesn't have a native LoggingDestination yet
+        # Return None or raise NotImplementedError for now
+        raise NotImplementedError(
+            "MongoDB LoggingDestination not implemented. "
+            "Tests requiring logging features will be skipped."
+        )
+
+    def teardown(self):
+        """Clean up: drop collections and stop container."""
+        try:
+            client: Any = MongoClient(self.connection_string, tz_aware=True)
+            try:
+                db = client[self.database]
+                for coll_name in self.collections_created:
+                    db[coll_name].drop()
+            finally:
+                client.close()
+        except Exception:
+            pass  # Container may already be stopped
+        self.container.stop()
+
+    @staticmethod
+    def test_markers() -> list:
+        """Mark tests as requiring MongoDB."""
+        return [pytest.mark.mongodb]
+
+
+class MongoDBOneDataSourceCreator(DataSourceCreator):
+    """DataSourceCreator for MongoDBOfflineStoreOne (single shared collection).
+
+    This implementation uses the nested features schema where all FeatureViews
+    share a single collection with a discriminator field.
+
+    TODO: This DataSourceCreator has a fundamental limitation. The One schema
+        requires knowing which columns are join keys vs features to properly
+        serialize entity_id and nest features. However, create_data_source() only
+        receives a DataFrame and column names - it doesn't have access to Entity
+        definitions that specify join keys.
+
+    Current workaround uses heuristics (columns ending in '_id' with int/string
+    dtype), which is fragile. A proper fix would require modifying the
+    DataSourceCreator interface to pass entity/join key information to
+    create_data_source(), which is a Feast core change.
+
+    For now, universal tests may fail for FeatureViews where the heuristic
+    doesn't correctly identify join keys. Use unit tests in
+    tests/unit/infra/offline_stores/contrib/mongodb_offline_store/test_one.py
+    for comprehensive testing of the One implementation.
+    """
+
+    ENTITY_KEY_VERSION = 3
+
+    def __init__(self, project_name: str, *args, **kwargs):
+        super().__init__(project_name)
+        self.container = MongoDbContainer(
+            "mongo:7.0",
+            username="test",
+            password="test",  # pragma: allowlist secret
+        ).with_exposed_ports(27017)
+        self.container.start()
+        self.port = self.container.get_exposed_port(27017)
+        self.connection_string = (
+            f"mongodb://test:test@localhost:{self.port}"  # pragma: allowlist secret
+        )
+        self.database = f"feast_test_{project_name}"
+        self.collection = "feature_history"
+        self.feature_views_created: list[str] = []
+        # Track entity key columns per feature view for serialization
+        self._entity_key_columns: Dict[str, list[str]] = {}
+
+    def _serialize_entity_key(self, row: pd.Series, join_keys: list[str]) -> bytes:
+        """Serialize entity key columns to bytes."""
+        entity_key = EntityKeyProto()
+        for key in join_keys:
+            entity_key.join_keys.append(key)
+            val = ValueProto()
+            value = row[key]
+            if isinstance(value, int):
+                val.int64_val = value
+            elif isinstance(value, str):
+                val.string_val = value
+            elif isinstance(value, float):
+                val.double_val = value
+            elif isinstance(value, bool):
+                val.bool_val = value
+            else:
+                val.string_val = str(value)
+            entity_key.entity_values.append(val)
+        return serialize_entity_key(entity_key, self.ENTITY_KEY_VERSION)
+
+    def create_data_source(
+        self,
+        df: pd.DataFrame,
+        destination_name: str,
+        created_timestamp_column: str = "created_ts",
+        field_mapping: Optional[Dict[str, str]] = None,
+        timestamp_field: Optional[str] = "ts",
+    ) -> DataSource:
+        """Create a MongoDB data source by inserting df into the shared collection.
+
+        The data is transformed into the One schema:
+        - entity_id: serialized entity key
+        - feature_view: destination_name
+        - features: nested dict of feature values
+        - event_timestamp: from timestamp_field
+        - created_at: from created_timestamp_column
+        """
+        # Determine which columns are join keys vs features
+        # Join keys must be integer or string types (serializable as entity keys)
+        timestamp_cols = {timestamp_field, created_timestamp_column}
+        all_cols = set(df.columns) - timestamp_cols - {None}
+
+        # Heuristic: identify join keys
+        # 1. Must end with "_id" or be a known key name
+        # 2. Must be integer or string type (not float)
+        join_keys = []
+        for c in all_cols:
+            if c.endswith("_id") or c in {"driver", "customer", "entity"}:
+                dtype = df[c].dtype
+                # Only integer or string types can be join keys
+                if dtype in ("int64", "int32", "object") or str(dtype).startswith(
+                    "int"
+                ):
+                    join_keys.append(c)
+
+        if not join_keys:
+            # Fallback: first integer column
+            for c in all_cols:
+                if df[c].dtype in ("int64", "int32") or str(df[c].dtype).startswith(
+                    "int"
+                ):
+                    join_keys = [c]
+                    break
+
+        feature_cols = [c for c in all_cols if c not in join_keys]
+
+        # Store for later use
+        self._entity_key_columns[destination_name] = join_keys
+
+        # Transform to One schema
+        docs = []
+        for _, row in df.iterrows():
+            entity_id = self._serialize_entity_key(row, join_keys)
+            features = {col: row[col] for col in feature_cols if pd.notna(row.get(col))}
+
+            doc = {
+                "entity_id": entity_id,
+                "feature_view": destination_name,
+                "features": features,
+            }
+            if timestamp_field and timestamp_field in row:
+                doc["event_timestamp"] = row[timestamp_field]
+            if created_timestamp_column and created_timestamp_column in row:
+                doc["created_at"] = row[created_timestamp_column]
+
+            docs.append(doc)
+
+        # Insert into MongoDB
+        client: Any = MongoClient(self.connection_string, tz_aware=True)
+        try:
+            coll = client[self.database][self.collection]
+            if docs:
+                coll.insert_many(docs)
+            self.feature_views_created.append(destination_name)
+        finally:
+            client.close()
+
+        return MongoDBSourceOne(
+            name=destination_name,
+            timestamp_field="event_timestamp",
+            created_timestamp_column="created_at" if created_timestamp_column else None,
+            field_mapping=field_mapping,
+        )
+
+    def get_prefixed_table_name(self, suffix: str) -> str:
+        return f"{self.project_name}_{suffix}"
+
+    def create_offline_store_config(self) -> FeastConfigBaseModel:
+        return MongoDBOfflineStoreOneConfig(
+            connection_string=self.connection_string,
+            database=self.database,
+            collection=self.collection,
+        )
+
+    def create_saved_dataset_destination(self) -> SavedDatasetStorage:
+        # One implementation doesn't have SavedDatasetStorage yet
+        raise NotImplementedError(
+            "MongoDBOfflineStoreOne SavedDatasetStorage not implemented."
+        )
+
+    def create_logged_features_destination(self) -> LoggingDestination:
+        raise NotImplementedError("MongoDB LoggingDestination not implemented.")
+
+    def teardown(self):
+        """Clean up: drop the collection and stop container."""
+        try:
+            client: Any = MongoClient(self.connection_string, tz_aware=True)
+            try:
+                client[self.database][self.collection].drop()
+            finally:
+                client.close()
+        except Exception:
+            pass
+        self.container.stop()
+
+    @staticmethod
+    def test_markers() -> list:
+        """Mark tests as requiring MongoDB."""
+        return [pytest.mark.mongodb]

From 9dc9162a428f4d2afcce6f9edb21636c79029915 Mon Sep 17 00:00:00 2001
From: Casey Clements <casey.clements@mongodb.com>
Date: Fri, 20 Mar 2026 16:57:17 -0400
Subject: [PATCH 30/30] Add .secrets.baseline

Signed-off-by: Casey Clements <casey.clements@mongodb.com>
---
 .secrets.baseline | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.secrets.baseline b/.secrets.baseline
index 0391444334..260d37dfee 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -1460,14 +1460,14 @@
         "filename": "sdk/python/tests/universal/feature_repos/repo_configuration.py",
         "hashed_secret": "d90e76ef629fb00c95f4e84fec29fbda111e2392",
         "is_verified": false,
-        "line_number": 479
+        "line_number": 486
       },
       {
         "type": "Secret Keyword",
         "filename": "sdk/python/tests/universal/feature_repos/repo_configuration.py",
         "hashed_secret": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8",
         "is_verified": false,
-        "line_number": 481
+        "line_number": 488
       }
     ],
     "sdk/python/tests/universal/feature_repos/universal/data_sources/file.py": [
@@ -1539,5 +1539,5 @@
       }
     ]
   },
-  "generated_at": "2026-03-20T20:27:19Z"
+  "generated_at": "2026-03-20T20:55:36Z"
 }