From 34caf8878c9fe7fde00902ea8a3ed67f86a1484e Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 21:27:41 +0100
Subject: [PATCH 01/15]  Summary of Changes

  Created flixopt/dim_iterator.py

  New utility class DimIterator with:
  - Factory methods: from_dataset(), from_flow_system(), from_sentinel_lists()
  - Iteration: iter_slices(ds), iter_keys()
  - Key conversion: key_from_values(p, s), selector_for_key(key)
  - Combination: combine(slices, base_dims, ...), combine_arrays(slices, base_dims, ...)

  Refactored flixopt/clustering/base.py

  1. Added DimIterator import
  2. Removed unused combine_slices() function (~70 lines)
  3. Added _iterator cached property to ClusteringResults
  4. Simplified _build_property_array() from ~35 lines to ~3 lines
  5. Simplified apply() to use selector_for_key()

  Refactored flixopt/transform_accessor.py

  1. Added DimIterator import
  2. Refactored _build_cluster_weight_da() to use DimIterator
  3. Refactored _build_segment_durations_da() to use DimIterator
  4. Refactored _build_clustering_metrics() to use DimIterator
  5. Refactored _build_reduced_dataset() to use DimIterator
  6. Refactored _build_cluster_assignments_da() to use DimIterator
  7. Removed _combine_slices_to_dataarray_generic() (~62 lines)
  8. Removed _combine_slices_to_dataarray_2d() (~40 lines)

  Lines of Code Impact

  - Added: ~400 lines (new dim_iterator.py module)
  - Removed: ~170 lines (old combine methods + combine_slices)
  - Net: +230 lines, but the new code is reusable across the codebase and eliminates repetitive patterns

  Benefits

  - Eliminates [None] sentinel value confusion
  - Eliminates 4-way if/elif branching for period/scenario combinations
  - Consistent key format throughout (tuples matching dims)
  - Single source of truth for period/scenario iteration logic
  - Easier to add new functionality (e.g., parallel processing)
---
 flixopt/clustering/base.py    | 112 +--------
 flixopt/dim_iterator.py       | 454 ++++++++++++++++++++++++++++++++++
 flixopt/transform_accessor.py | 249 +++++--------------
 3 files changed, 533 insertions(+), 282 deletions(-)
 create mode 100644 flixopt/dim_iterator.py

diff --git a/flixopt/clustering/base.py b/flixopt/clustering/base.py
index bc49ab660..97c4c3097 100644
--- a/flixopt/clustering/base.py
+++ b/flixopt/clustering/base.py
@@ -27,6 +27,7 @@
     from ..plot_result import PlotResult
     from ..statistics_accessor import SelectType
 
+from ..dim_iterator import DimIterator
 from ..statistics_accessor import _build_color_kwargs
 
 
@@ -50,75 +51,6 @@ def _select_dims(da: xr.DataArray, period: Any = None, scenario: Any = None) ->
     return da
 
 
-def combine_slices(
-    slices: dict[tuple, np.ndarray],
-    extra_dims: list[str],
-    dim_coords: dict[str, list],
-    output_dim: str,
-    output_coord: Any,
-    attrs: dict | None = None,
-) -> xr.DataArray:
-    """Combine {(dim_values): 1D_array} dict into a DataArray.
-
-    This utility simplifies the common pattern of iterating over extra dimensions
-    (like period, scenario), processing each slice, and combining results.
-
-    Args:
-        slices: Dict mapping dimension value tuples to 1D numpy arrays.
-            Keys are tuples like ('period1', 'scenario1') matching extra_dims order.
-        extra_dims: Dimension names in order (e.g., ['period', 'scenario']).
-        dim_coords: Dict mapping dimension names to coordinate values.
-        output_dim: Name of the output dimension (typically 'time').
-        output_coord: Coordinate values for output dimension.
-        attrs: Optional DataArray attributes.
-
-    Returns:
-        DataArray with dims [output_dim, *extra_dims].
-
-    Raises:
-        ValueError: If slices is empty.
-        KeyError: If a required key is missing from slices.
-
-    Example:
-        >>> slices = {
-        ...     ('P1', 'base'): np.array([1, 2, 3]),
-        ...     ('P1', 'high'): np.array([4, 5, 6]),
-        ...     ('P2', 'base'): np.array([7, 8, 9]),
-        ...     ('P2', 'high'): np.array([10, 11, 12]),
-        ... }
-        >>> result = combine_slices(
-        ...     slices,
-        ...     extra_dims=['period', 'scenario'],
-        ...     dim_coords={'period': ['P1', 'P2'], 'scenario': ['base', 'high']},
-        ...     output_dim='time',
-        ...     output_coord=[0, 1, 2],
-        ... )
-        >>> result.dims
-        ('time', 'period', 'scenario')
-    """
-    if not slices:
-        raise ValueError('slices cannot be empty')
-
-    first = next(iter(slices.values()))
-    n_output = len(first)
-    shape = [n_output] + [len(dim_coords[d]) for d in extra_dims]
-    data = np.empty(shape, dtype=first.dtype)
-
-    for combo in np.ndindex(*shape[1:]):
-        key = tuple(dim_coords[d][i] for d, i in zip(extra_dims, combo, strict=True))
-        try:
-            data[(slice(None),) + combo] = slices[key]
-        except KeyError:
-            raise KeyError(f'Missing slice for key {key} (extra_dims={extra_dims})') from None
-
-    return xr.DataArray(
-        data,
-        dims=[output_dim] + extra_dims,
-        coords={output_dim: output_coord, **dim_coords},
-        attrs=attrs or {},
-    )
-
-
 def _cluster_occurrences(cr: TsamClusteringResult) -> np.ndarray:
     """Compute cluster occurrences from ClusteringResult."""
     counts = Counter(cr.cluster_assignments)
@@ -232,6 +164,14 @@ def coords(self) -> dict[str, list]:
         """
         return {dim: self._get_dim_values(dim) for dim in self._dim_names}
 
+    @functools.cached_property
+    def _iterator(self) -> DimIterator:
+        """DimIterator for this ClusteringResults (cached)."""
+        return DimIterator(
+            periods=self._get_dim_values('period'),
+            scenarios=self._get_dim_values('scenario'),
+        )
+
     def sel(self, **kwargs: Any) -> TsamClusteringResult:
         """Select result by dimension labels (xarray-like).
 
@@ -551,33 +491,8 @@ def _build_property_array(
         name: str | None = None,
     ) -> xr.DataArray:
         """Build a DataArray property, handling both single and multi-dimensional cases."""
-        base_coords = base_coords or {}
-        periods = self._get_dim_values('period')
-        scenarios = self._get_dim_values('scenario')
-
-        # Build list of (dim_name, values) for dimensions that exist
-        extra_dims = []
-        if periods is not None:
-            extra_dims.append(('period', periods))
-        if scenarios is not None:
-            extra_dims.append(('scenario', scenarios))
-
-        # Simple case: no extra dimensions
-        if not extra_dims:
-            return xr.DataArray(get_data(self._results[()]), dims=base_dims, coords=base_coords, name=name)
-
-        # Multi-dimensional: stack data for each combination
-        first_data = get_data(next(iter(self._results.values())))
-        shape = list(first_data.shape) + [len(vals) for _, vals in extra_dims]
-        data = np.empty(shape, dtype=first_data.dtype)  # Preserve dtype
-
-        for combo in np.ndindex(*[len(vals) for _, vals in extra_dims]):
-            key = tuple(extra_dims[i][1][idx] for i, idx in enumerate(combo))
-            data[(...,) + combo] = get_data(self._results[key])
-
-        dims = base_dims + [dim_name for dim_name, _ in extra_dims]
-        coords = {**base_coords, **{dim_name: vals for dim_name, vals in extra_dims}}
-        return xr.DataArray(data, dims=dims, coords=coords, name=name)
+        slices = {key: get_data(cr) for key, cr in self._results.items()}
+        return self._iterator.combine_arrays(slices, base_dims, base_coords, name)
 
     @staticmethod
     def _key_to_str(key: tuple) -> str:
@@ -628,10 +543,7 @@ def apply(self, data: xr.Dataset) -> AggregationResults:
 
         results = {}
         for key, cr in self._results.items():
-            # Build selector for this key
-            selector = dict(zip(self._dim_names, key, strict=False))
-
-            # Select the slice for this (period, scenario)
+            selector = self._iterator.selector_for_key(key)
             data_slice = data.sel(**selector, drop=True) if selector else data
 
             # Drop constant arrays and convert to DataFrame
diff --git a/flixopt/dim_iterator.py b/flixopt/dim_iterator.py
new file mode 100644
index 000000000..27d13c366
--- /dev/null
+++ b/flixopt/dim_iterator.py
@@ -0,0 +1,454 @@
+"""
+Dimension iteration utilities for period/scenario slicing patterns.
+
+This module provides `DimIterator`, a helper class that eliminates repetitive
+boilerplate when working with datasets that may have period and/or scenario
+dimensions.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any
+
+import numpy as np
+import pandas as pd
+import xarray as xr
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+
+    from .flow_system import FlowSystem
+
+
+class DimIterator:
+    """Handles period/scenario iteration and combination patterns.
+
+    Eliminates the need for:
+    - ``[None]`` sentinel values for missing dimensions
+    - 4-way if/elif branching for period/scenario combinations
+    - Manual key conversion between internal and external formats
+
+    Keys returned by this class are tuples that match ``dims`` order:
+    - No dims: ``()``
+    - Period only: ``(period_value,)``
+    - Scenario only: ``(scenario_value,)``
+    - Both: ``(period_value, scenario_value)``
+
+    Example:
+        >>> iterator = DimIterator.from_dataset(ds)
+        >>>
+        >>> # Iteration
+        >>> results = {}
+        >>> for key, ds_slice in iterator.iter_slices(ds):
+        ...     results[key] = transform(ds_slice)
+        >>>
+        >>> # Combination
+        >>> combined = iterator.combine(results, base_dims=['cluster', 'time'])
+
+    Attributes:
+        dims: Tuple of dimension names present (e.g., ``('period', 'scenario')``).
+        coords: Dict mapping dimension names to coordinate values.
+    """
+
+    def __init__(
+        self,
+        periods: list | None = None,
+        scenarios: list | None = None,
+    ):
+        """Initialize with explicit dimension values.
+
+        Args:
+            periods: Period coordinate values, or None if no period dimension.
+            scenarios: Scenario coordinate values, or None if no scenario dimension.
+        """
+        self._periods = periods
+        self._scenarios = scenarios
+
+        # Build dim_names for external keys (no None sentinels)
+        self._dim_names: list[str] = []
+        if periods is not None:
+            self._dim_names.append('period')
+        if scenarios is not None:
+            self._dim_names.append('scenario')
+
+    # ==========================================================================
+    # Factory methods
+    # ==========================================================================
+
+    @classmethod
+    def from_dataset(cls, ds: xr.Dataset) -> DimIterator:
+        """Create iterator from dataset's period/scenario coordinates.
+
+        Args:
+            ds: Dataset with optional 'period' and/or 'scenario' coordinates.
+
+        Returns:
+            DimIterator configured for the dataset's dimensions.
+        """
+        periods = list(ds.period.values) if 'period' in ds.coords else None
+        scenarios = list(ds.scenario.values) if 'scenario' in ds.coords else None
+        return cls(periods=periods, scenarios=scenarios)
+
+    @classmethod
+    def from_flow_system(cls, fs: FlowSystem) -> DimIterator:
+        """Create iterator from FlowSystem's period/scenario structure.
+
+        Args:
+            fs: FlowSystem with optional periods and/or scenarios.
+
+        Returns:
+            DimIterator configured for the FlowSystem's dimensions.
+        """
+        return cls(
+            periods=list(fs.periods) if fs.periods is not None else None,
+            scenarios=list(fs.scenarios) if fs.scenarios is not None else None,
+        )
+
+    @classmethod
+    def from_sentinel_lists(cls, periods: list, scenarios: list) -> DimIterator:
+        """Create iterator from lists that may contain [None] sentinel.
+
+        This handles the common pattern where code uses [None] to indicate
+        no dimension exists:
+
+            periods = list(fs.periods) if has_periods else [None]
+            scenarios = list(fs.scenarios) if has_scenarios else [None]
+
+        Args:
+            periods: Period values, or [None] if no period dimension.
+            scenarios: Scenario values, or [None] if no scenario dimension.
+
+        Returns:
+            DimIterator configured for the actual dimensions present.
+        """
+        return cls(
+            periods=periods if periods != [None] else None,
+            scenarios=scenarios if scenarios != [None] else None,
+        )
+
+    # ==========================================================================
+    # Properties
+    # ==========================================================================
+
+    @property
+    def dims(self) -> tuple[str, ...]:
+        """Dimension names as tuple (xarray-like)."""
+        return tuple(self._dim_names)
+
+    @property
+    def coords(self) -> dict[str, list]:
+        """Coordinate values for each dimension.
+
+        Returns:
+            Dict mapping dimension names to lists of coordinate values.
+            Empty dict if no dimensions.
+        """
+        result = {}
+        if self._periods is not None:
+            result['period'] = self._periods
+        if self._scenarios is not None:
+            result['scenario'] = self._scenarios
+        return result
+
+    @property
+    def has_periods(self) -> bool:
+        """Whether period dimension exists."""
+        return self._periods is not None
+
+    @property
+    def has_scenarios(self) -> bool:
+        """Whether scenario dimension exists."""
+        return self._scenarios is not None
+
+    @property
+    def n_slices(self) -> int:
+        """Total number of slices (product of dimension sizes)."""
+        n = 1
+        if self._periods is not None:
+            n *= len(self._periods)
+        if self._scenarios is not None:
+            n *= len(self._scenarios)
+        return n
+
+    # ==========================================================================
+    # Iteration
+    # ==========================================================================
+
+    def iter_slices(
+        self,
+        ds: xr.Dataset,
+        drop: bool = True,
+    ) -> Iterator[tuple[tuple, xr.Dataset]]:
+        """Iterate over (period, scenario) slices of a dataset.
+
+        Args:
+            ds: Dataset with optional period/scenario dimensions.
+            drop: If True (default), drop period/scenario dims from sliced datasets.
+                If False, keep them as scalar coordinates.
+
+        Yields:
+            ``(key, ds_slice)`` tuples where key matches ``dims`` order.
+            For no dims, key is ``()``.
+
+        Example:
+            >>> for key, slice_ds in iterator.iter_slices(ds):
+            ...     print(f'Processing {key}')
+            ...     result = process(slice_ds)
+        """
+        periods = self._periods if self._periods is not None else [None]
+        scenarios = self._scenarios if self._scenarios is not None else [None]
+
+        for p in periods:
+            for s in scenarios:
+                selector = self._build_selector(p, s)
+                key = self._to_key(p, s)
+                if selector:
+                    yield key, ds.sel(**selector, drop=drop)
+                else:
+                    yield key, ds
+
+    def iter_keys(self) -> Iterator[tuple]:
+        """Iterate over keys only (no data access).
+
+        Yields:
+            Key tuples matching ``dims`` order.
+
+        Example:
+            >>> for key in iterator.iter_keys():
+            ...     results[key] = compute_something()
+        """
+        periods = self._periods if self._periods is not None else [None]
+        scenarios = self._scenarios if self._scenarios is not None else [None]
+
+        for p in periods:
+            for s in scenarios:
+                yield self._to_key(p, s)
+
+    # ==========================================================================
+    # Key conversion
+    # ==========================================================================
+
+    def _build_selector(self, period: Any, scenario: Any) -> dict:
+        """Build xarray selector dict (only for non-None values)."""
+        selector = {}
+        if period is not None:
+            selector['period'] = period
+        if scenario is not None:
+            selector['scenario'] = scenario
+        return selector
+
+    def _to_key(self, period: Any, scenario: Any) -> tuple:
+        """Convert (period, scenario) to external key matching dims."""
+        parts = []
+        if self._periods is not None:
+            parts.append(period)
+        if self._scenarios is not None:
+            parts.append(scenario)
+        return tuple(parts)
+
+    def _from_key(self, key: tuple) -> tuple[Any, Any]:
+        """Convert external key back to (period, scenario) with Nones.
+
+        Returns:
+            Tuple of (period, scenario) where missing dims are None.
+        """
+        period = None
+        scenario = None
+        idx = 0
+        if self._periods is not None:
+            period = key[idx]
+            idx += 1
+        if self._scenarios is not None:
+            scenario = key[idx]
+        return period, scenario
+
+    def selector_for_key(self, key: tuple) -> dict:
+        """Get xarray selector dict for an external key.
+
+        Args:
+            key: Key tuple as returned by ``iter_slices`` or ``iter_keys``.
+
+        Returns:
+            Dict suitable for ``ds.sel(**selector)``.
+
+        Example:
+            >>> selector = iterator.selector_for_key(key)
+            >>> ds_slice = ds.sel(**selector)
+        """
+        period, scenario = self._from_key(key)
+        return self._build_selector(period, scenario)
+
+    def key_from_values(self, period: Any, scenario: Any) -> tuple:
+        """Convert (period, scenario) values to a key tuple.
+
+        This is useful when you have separate period/scenario values
+        (possibly None) and need to create a key for use with combine().
+
+        Args:
+            period: Period value, or None if no period dimension.
+            scenario: Scenario value, or None if no scenario dimension.
+
+        Returns:
+            Key tuple matching ``dims`` order.
+
+        Example:
+            >>> iterator = DimIterator(periods=[2024, 2025])
+            >>> iterator.key_from_values(2024, None)
+            (2024,)
+        """
+        return self._to_key(period, scenario)
+
+    # ==========================================================================
+    # Combination
+    # ==========================================================================
+
+    def combine(
+        self,
+        slices: dict[tuple, xr.DataArray],
+        base_dims: list[str],
+        name: str | None = None,
+        attrs: dict | None = None,
+        join: str = 'exact',
+        fill_value: Any = None,
+    ) -> xr.DataArray:
+        """Combine per-slice DataArrays into multi-dimensional DataArray.
+
+        Args:
+            slices: Dict mapping keys (from ``iter_slices``) to DataArrays.
+                All DataArrays must have the same ``base_dims``.
+            base_dims: Base dimensions of each slice (e.g., ``['cluster', 'time']``).
+                Used for final transpose to ensure consistent dimension order.
+            name: Optional name for resulting DataArray.
+            attrs: Optional attributes for resulting DataArray.
+            join: How to handle coordinate differences between slices.
+                - ``'exact'``: Coordinates must match exactly (default).
+                - ``'outer'``: Union of coordinates, fill missing with fill_value.
+            fill_value: Value to use for missing entries when ``join='outer'``.
+                Only used when coordinates differ between slices.
+
+        Returns:
+            DataArray with dims ``[*base_dims, period?, scenario?]``.
+
+        Example:
+            >>> results = {key: process(s) for key, s in iterator.iter_slices(ds)}
+            >>> combined = iterator.combine(results, base_dims=['cluster', 'time'])
+        """
+        # Build concat kwargs
+        concat_kwargs = {'join': join}
+        if fill_value is not None:
+            concat_kwargs['fill_value'] = fill_value
+
+        if not self._dim_names:
+            # No extra dims - return single slice
+            result = slices[()]
+        elif self.has_periods and self.has_scenarios:
+            # Both dimensions: concat scenarios first, then periods
+            period_arrays = []
+            for p in self._periods:
+                scenario_arrays = [slices[self._to_key(p, s)] for s in self._scenarios]
+                period_arrays.append(
+                    xr.concat(
+                        scenario_arrays,
+                        dim=pd.Index(self._scenarios, name='scenario'),
+                        **concat_kwargs,
+                    )
+                )
+            result = xr.concat(
+                period_arrays,
+                dim=pd.Index(self._periods, name='period'),
+                **concat_kwargs,
+            )
+        elif self.has_periods:
+            result = xr.concat(
+                [slices[(p,)] for p in self._periods],
+                dim=pd.Index(self._periods, name='period'),
+                **concat_kwargs,
+            )
+        else:  # has_scenarios only
+            result = xr.concat(
+                [slices[(s,)] for s in self._scenarios],
+                dim=pd.Index(self._scenarios, name='scenario'),
+                **concat_kwargs,
+            )
+
+        # Transpose to standard order: base_dims first, then period/scenario
+        result = result.transpose(*base_dims, ...)
+
+        if name:
+            result = result.rename(name)
+        if attrs:
+            result = result.assign_attrs(attrs)
+        return result
+
+    def combine_arrays(
+        self,
+        slices: dict[tuple, np.ndarray],
+        base_dims: list[str],
+        base_coords: dict[str, Any] | None = None,
+        name: str | None = None,
+        attrs: dict | None = None,
+    ) -> xr.DataArray:
+        """Combine per-slice numpy arrays into multi-dimensional DataArray.
+
+        More efficient than ``combine()`` when working with raw numpy arrays,
+        as it avoids intermediate DataArray creation.
+
+        Args:
+            slices: Dict mapping keys to numpy arrays. All arrays must have
+                the same shape and dtype.
+            base_dims: Dimension names for the base array axes.
+            base_coords: Optional coordinates for base dimensions.
+            name: Optional name for resulting DataArray.
+            attrs: Optional attributes for resulting DataArray.
+
+        Returns:
+            DataArray with dims ``[*base_dims, period?, scenario?]``.
+
+        Example:
+            >>> arrays = {key: np.array([1, 2, 3]) for key in iterator.iter_keys()}
+            >>> da = iterator.combine_arrays(arrays, base_dims=['cluster'])
+        """
+        base_coords = base_coords or {}
+
+        if not self._dim_names:
+            # No extra dims - wrap single array
+            return xr.DataArray(
+                slices[()],
+                dims=base_dims,
+                coords=base_coords,
+                name=name,
+                attrs=attrs or {},
+            )
+
+        # Build full shape: base_shape + extra_dims_shape
+        first = next(iter(slices.values()))
+        extra_shape = [len(coords) for coords in self.coords.values()]
+        shape = list(first.shape) + extra_shape
+        data = np.empty(shape, dtype=first.dtype)
+
+        # Fill using np.ndindex for flexibility with any number of extra dims
+        for combo in np.ndindex(*extra_shape):
+            key = tuple(list(self.coords.values())[i][idx] for i, idx in enumerate(combo))
+            data[(...,) + combo] = slices[key]
+
+        return xr.DataArray(
+            data,
+            dims=base_dims + self._dim_names,
+            coords={**base_coords, **self.coords},
+            name=name,
+            attrs=attrs or {},
+        )
+
+    # ==========================================================================
+    # Dunder methods
+    # ==========================================================================
+
+    def __repr__(self) -> str:
+        if not self.dims:
+            return 'DimIterator(dims=())'
+        coords_str = ', '.join(f'{k}: {len(v)}' for k, v in self.coords.items())
+        return f'DimIterator(dims={self.dims}, coords=({coords_str}))'
+
+    def __len__(self) -> int:
+        """Number of slices."""
+        return self.n_slices
diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index 9e56479af..4423d1031 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -16,6 +16,7 @@
 import pandas as pd
 import xarray as xr
 
+from .dim_iterator import DimIterator
 from .modeling import _scalar_safe_reduce
 from .structure import EXPAND_DIVIDE, EXPAND_FIRST_TIMESTEP, EXPAND_INTERPOLATE, VariableCategory
 
@@ -167,9 +168,11 @@ def _weight_for_key(key: tuple) -> xr.DataArray:
             weights = np.array([occurrences.get(c, 0) for c in range(n_clusters)])
             return xr.DataArray(weights, dims=['cluster'], coords={'cluster': cluster_coords})
 
-        weight_slices = {key: _weight_for_key(key) for key in cluster_occurrences_all}
-        return self._combine_slices_to_dataarray_generic(
-            weight_slices, ['cluster'], periods, scenarios, 'cluster_weight'
+        iterator = DimIterator.from_sentinel_lists(periods, scenarios)
+        # Convert old (period, scenario) keys to DimIterator keys
+        weight_slices = {iterator.key_from_values(p, s): _weight_for_key((p, s)) for p, s in cluster_occurrences_all}
+        return iterator.combine(
+            weight_slices, base_dims=['cluster'], name='cluster_weight', join='outer', fill_value=np.nan
         )
 
     def _build_typical_das(
@@ -259,9 +262,10 @@ def _build_segment_durations_da(
             DataArray with dims [cluster, time] or [cluster, time, period?, scenario?]
             containing duration in hours for each segment.
         """
+        iterator = DimIterator.from_sentinel_lists(periods, scenarios)
         segment_duration_slices: dict[tuple, xr.DataArray] = {}
 
-        for key, tsam_result in tsam_aggregation_results.items():
+        for (p, s), tsam_result in tsam_aggregation_results.items():
             # segment_durations is tuple of tuples: ((dur1, dur2, ...), (dur1, dur2, ...), ...)
             # Each inner tuple is durations for one cluster
             seg_durs = tsam_result.segment_durations
@@ -274,14 +278,18 @@ def _build_segment_durations_da(
                     # Duration in hours = number of original timesteps * dt
                     data[cluster_id, seg_id] = cluster_seg_durs[seg_id] * dt
 
-            segment_duration_slices[key] = xr.DataArray(
+            segment_duration_slices[iterator.key_from_values(p, s)] = xr.DataArray(
                 data,
                 dims=['cluster', 'time'],
                 coords={'cluster': cluster_coords, 'time': time_coords},
             )
 
-        return self._combine_slices_to_dataarray_generic(
-            segment_duration_slices, ['cluster', 'time'], periods, scenarios, 'timestep_duration'
+        return iterator.combine(
+            segment_duration_slices,
+            base_dims=['cluster', 'time'],
+            name='timestep_duration',
+            join='outer',
+            fill_value=np.nan,
         )
 
     def _build_clustering_metrics(
@@ -323,6 +331,7 @@ def _build_clustering_metrics(
             )
 
         # Multi-dim case
+        iterator = DimIterator.from_sentinel_lists(periods, scenarios)
         sample_df = next(iter(non_empty_metrics.values()))
         metric_names = list(sample_df.columns)
         data_vars = {}
@@ -330,20 +339,21 @@ def _build_clustering_metrics(
         for metric in metric_names:
             slices = {}
             for (p, s), df in clustering_metrics_all.items():
+                key = iterator.key_from_values(p, s)
                 if df.empty:
-                    slices[(p, s)] = xr.DataArray(
+                    slices[key] = xr.DataArray(
                         np.full(len(sample_df.index), np.nan),
                         dims=['time_series'],
                         coords={'time_series': list(sample_df.index)},
                     )
                 else:
-                    slices[(p, s)] = xr.DataArray(
+                    slices[key] = xr.DataArray(
                         df[metric].values,
                         dims=['time_series'],
                         coords={'time_series': list(df.index)},
                     )
-            data_vars[metric] = self._combine_slices_to_dataarray_generic(
-                slices, ['time_series'], periods, scenarios, metric
+            data_vars[metric] = iterator.combine(
+                slices, base_dims=['time_series'], name=metric, join='outer', fill_value=np.nan
             )
 
         return xr.Dataset(data_vars)
@@ -532,7 +542,7 @@ def _build_reduced_dataset(
         """
         from .core import TimeSeriesData
 
-        all_keys = {(p, s) for p in periods for s in scenarios}
+        iterator = DimIterator.from_sentinel_lists(periods, scenarios)
         ds_new_vars = {}
 
         # Use ds.variables to avoid _construct_dataarray overhead
@@ -567,11 +577,11 @@ def _build_reduced_dataset(
                     coords=new_coords,
                     attrs=var.attrs,
                 )
-            elif set(typical_das[name].keys()) != all_keys:
+            elif set(typical_das[name].keys()) != {(p, s) for p in periods for s in scenarios}:
                 # Partial typical slices: fill missing keys with constant values
                 # For multi-period/scenario data, we need to select the right slice for each key
 
-                # Exclude 'period' and 'scenario' - they're handled by _combine_slices_to_dataarray_2d
+                # Exclude 'period' and 'scenario' - they're handled by iterator.combine
                 other_dims = [d for d in var.dims if d not in ('time', 'period', 'scenario')]
                 other_shape = [var.sizes[d] for d in other_dims]
                 new_shape = [actual_n_clusters, n_time_points] + other_shape
@@ -583,54 +593,46 @@ def _build_reduced_dataset(
 
                 # Build filled slices dict: use typical where available, constant otherwise
                 filled_slices = {}
-                for key in all_keys:
-                    if key in typical_das[name]:
-                        filled_slices[key] = typical_das[name][key]
-                    else:
-                        # Select the specific period/scenario slice, then reshape
-                        period_label, scenario_label = key
-                        selector = {}
-                        if period_label is not None and 'period' in var.dims:
-                            selector['period'] = period_label
-                        if scenario_label is not None and 'scenario' in var.dims:
-                            selector['scenario'] = scenario_label
-
-                        # Select per-key slice if needed, otherwise use full variable
-                        if selector:
-                            var_slice = ds[name].sel(**selector, drop=True)
+                for p in iterator._periods or [None]:
+                    for s in iterator._scenarios or [None]:
+                        old_key = (p, s)
+                        new_key = iterator.key_from_values(p, s)
+                        if old_key in typical_das[name]:
+                            filled_slices[new_key] = typical_das[name][old_key]
                         else:
-                            var_slice = ds[name]
-
-                        # Now slice time and reshape
-                        time_idx = var_slice.dims.index('time')
-                        slices_list = [slice(None)] * len(var_slice.dims)
-                        slices_list[time_idx] = slice(0, n_reduced_timesteps)
-                        sliced_values = var_slice.values[tuple(slices_list)]
-                        reshaped_constant = sliced_values.reshape(new_shape)
-
-                        filled_slices[key] = xr.DataArray(
-                            reshaped_constant,
-                            dims=['cluster', 'time'] + other_dims,
-                            coords=new_coords,
-                        )
-
-                da = self._combine_slices_to_dataarray_2d(
-                    slices=filled_slices,
-                    attrs=var.attrs,
-                    periods=periods,
-                    scenarios=scenarios,
-                )
+                            # Select the specific period/scenario slice, then reshape
+                            selector = iterator._build_selector(p, s)
+
+                            # Select per-key slice if needed, otherwise use full variable
+                            if selector:
+                                # Only include dims that exist in the variable
+                                var_selector = {k: v for k, v in selector.items() if k in var.dims}
+                                var_slice = ds[name].sel(**var_selector, drop=True) if var_selector else ds[name]
+                            else:
+                                var_slice = ds[name]
+
+                            # Now slice time and reshape
+                            time_idx = var_slice.dims.index('time')
+                            slices_list = [slice(None)] * len(var_slice.dims)
+                            slices_list[time_idx] = slice(0, n_reduced_timesteps)
+                            sliced_values = var_slice.values[tuple(slices_list)]
+                            reshaped_constant = sliced_values.reshape(new_shape)
+
+                            filled_slices[new_key] = xr.DataArray(
+                                reshaped_constant,
+                                dims=['cluster', 'time'] + other_dims,
+                                coords=new_coords,
+                            )
+
+                da = iterator.combine(filled_slices, base_dims=['cluster', 'time'], attrs=var.attrs)
                 if var.attrs.get('__timeseries_data__', False):
                     da = TimeSeriesData.from_dataarray(da.assign_attrs(var.attrs))
                 ds_new_vars[name] = da
             else:
                 # Time-varying: combine per-(period, scenario) slices
-                da = self._combine_slices_to_dataarray_2d(
-                    slices=typical_das[name],
-                    attrs=var.attrs,
-                    periods=periods,
-                    scenarios=scenarios,
-                )
+                # Convert old-style keys to DimIterator keys
+                converted_slices = {iterator.key_from_values(p, s): da for (p, s), da in typical_das[name].items()}
+                da = iterator.combine(converted_slices, base_dims=['cluster', 'time'], attrs=var.attrs)
                 if var.attrs.get('__timeseries_data__', False):
                     da = TimeSeriesData.from_dataarray(da.assign_attrs(var.attrs))
                 ds_new_vars[name] = da
@@ -656,25 +658,16 @@ def _build_cluster_assignments_da(
         Returns:
             DataArray with dims [original_cluster] or [original_cluster, period?, scenario?].
         """
-        has_periods = periods != [None]
-        has_scenarios = scenarios != [None]
-
-        if has_periods or has_scenarios:
-            # Multi-dimensional case
-            cluster_assignments_slices = {}
-            for p in periods:
-                for s in scenarios:
-                    key = (p, s)
-                    cluster_assignments_slices[key] = xr.DataArray(
-                        cluster_assignmentss[key], dims=['original_cluster'], name='cluster_assignments'
-                    )
-            return self._combine_slices_to_dataarray_generic(
-                cluster_assignments_slices, ['original_cluster'], periods, scenarios, 'cluster_assignments'
+        iterator = DimIterator.from_sentinel_lists(periods, scenarios)
+        slices = {
+            iterator.key_from_values(p, s): xr.DataArray(
+                cluster_assignmentss[(p, s)], dims=['original_cluster'], name='cluster_assignments'
             )
-        else:
-            # Simple case
-            first_key = (periods[0], scenarios[0])
-            return xr.DataArray(cluster_assignmentss[first_key], dims=['original_cluster'], name='cluster_assignments')
+            for p, s in cluster_assignmentss
+        }
+        return iterator.combine(
+            slices, base_dims=['original_cluster'], name='cluster_assignments', join='outer', fill_value=np.nan
+        )
 
     def sel(
         self,
@@ -1696,114 +1689,6 @@ def apply_clustering(
             scenarios=scenarios,
         )
 
-    @staticmethod
-    def _combine_slices_to_dataarray_generic(
-        slices: dict[tuple, xr.DataArray],
-        base_dims: list[str],
-        periods: list,
-        scenarios: list,
-        name: str,
-    ) -> xr.DataArray:
-        """Combine per-(period, scenario) slices into a multi-dimensional DataArray.
-
-        Generic version that works with any base dimension (not just 'time').
-
-        Args:
-            slices: Dict mapping (period, scenario) tuples to DataArrays.
-            base_dims: Base dimensions of each slice (e.g., ['original_cluster'] or ['original_time']).
-            periods: List of period labels ([None] if no periods dimension).
-            scenarios: List of scenario labels ([None] if no scenarios dimension).
-            name: Name for the resulting DataArray.
-
-        Returns:
-            DataArray with dimensions [base_dims..., period?, scenario?].
-        """
-        first_key = (periods[0], scenarios[0])
-        has_periods = periods != [None]
-        has_scenarios = scenarios != [None]
-
-        # Simple case: no period/scenario dimensions
-        if not has_periods and not has_scenarios:
-            return slices[first_key].rename(name)
-
-        # Multi-dimensional: use xr.concat to stack along period/scenario dims
-        # Use join='outer' to handle cases where different periods/scenarios have different
-        # coordinate values (e.g., different time_series after drop_constant_arrays)
-        if has_periods and has_scenarios:
-            # Stack scenarios first, then periods
-            period_arrays = []
-            for p in periods:
-                scenario_arrays = [slices[(p, s)] for s in scenarios]
-                period_arrays.append(
-                    xr.concat(
-                        scenario_arrays, dim=pd.Index(scenarios, name='scenario'), join='outer', fill_value=np.nan
-                    )
-                )
-            result = xr.concat(period_arrays, dim=pd.Index(periods, name='period'), join='outer', fill_value=np.nan)
-        elif has_periods:
-            result = xr.concat(
-                [slices[(p, None)] for p in periods],
-                dim=pd.Index(periods, name='period'),
-                join='outer',
-                fill_value=np.nan,
-            )
-        else:
-            result = xr.concat(
-                [slices[(None, s)] for s in scenarios],
-                dim=pd.Index(scenarios, name='scenario'),
-                join='outer',
-                fill_value=np.nan,
-            )
-
-        # Put base dimension first (standard order)
-        result = result.transpose(base_dims[0], ...)
-
-        return result.rename(name)
-
-    @staticmethod
-    def _combine_slices_to_dataarray_2d(
-        slices: dict[tuple, xr.DataArray],
-        attrs: dict,
-        periods: list,
-        scenarios: list,
-    ) -> xr.DataArray:
-        """Combine per-(period, scenario) slices into a multi-dimensional DataArray with (cluster, time) dims.
-
-        Args:
-            slices: Dict mapping (period, scenario) tuples to DataArrays with (cluster, time) dims.
-            attrs: Attributes to assign to the result.
-            periods: List of period labels ([None] if no periods dimension).
-            scenarios: List of scenario labels ([None] if no scenarios dimension).
-
-        Returns:
-            DataArray with dimensions (cluster, time, period?, scenario?).
-        """
-        first_key = (periods[0], scenarios[0])
-        has_periods = periods != [None]
-        has_scenarios = scenarios != [None]
-
-        # Simple case: no period/scenario dimensions
-        if not has_periods and not has_scenarios:
-            return slices[first_key].assign_attrs(attrs)
-
-        # Multi-dimensional: use xr.concat to stack along period/scenario dims
-        if has_periods and has_scenarios:
-            # Stack scenarios first, then periods
-            period_arrays = []
-            for p in periods:
-                scenario_arrays = [slices[(p, s)] for s in scenarios]
-                period_arrays.append(xr.concat(scenario_arrays, dim=pd.Index(scenarios, name='scenario')))
-            result = xr.concat(period_arrays, dim=pd.Index(periods, name='period'))
-        elif has_periods:
-            result = xr.concat([slices[(p, None)] for p in periods], dim=pd.Index(periods, name='period'))
-        else:
-            result = xr.concat([slices[(None, s)] for s in scenarios], dim=pd.Index(scenarios, name='scenario'))
-
-        # Put cluster and time first (standard order for clustered data)
-        result = result.transpose('cluster', 'time', ...)
-
-        return result.assign_attrs(attrs)
-
     def _validate_for_expansion(self) -> Clustering:
         """Validate FlowSystem can be expanded and return clustering info.
 

From bedc0e4e6260edeb7a1253bda16c5f6b533581a6 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 21:49:53 +0100
Subject: [PATCH 02/15] The refactoring is complete. Here's a summary of what
 changed:

  dim_iterator.py - Simplified to a single helper function:
  def add_slice_dims(da, period=None, scenario=None):
      """Add period/scenario dims back to a transformed DataArray."""
      if period is not None:
          da = da.expand_dims(period=[period])
      if scenario is not None:
          da = da.expand_dims(scenario=[scenario])
      return da

  The new pattern used throughout the codebase:
  results = []
  for p in periods:
      for s in scenarios:
          da = transform_data(...)  # Create DataArray for this slice
          results.append(add_slice_dims(da, period=p, scenario=s))

  combined = xr.combine_by_coords(results) if len(results) > 1 else results[0]

  This removes the complex DimIterator class with its key conversion methods, making the code more straightforward and easier to follow.
---
 flixopt/clustering/base.py          |  42 ++-
 flixopt/dim_iterator.py             | 469 ++--------------------------
 flixopt/transform_accessor.py       | 169 +++++-----
 tests/test_cluster_reduce_expand.py |  83 -----
 tests/test_clustering_io.py         |   8 +-
 5 files changed, 163 insertions(+), 608 deletions(-)

diff --git a/flixopt/clustering/base.py b/flixopt/clustering/base.py
index 97c4c3097..646d8f2e2 100644
--- a/flixopt/clustering/base.py
+++ b/flixopt/clustering/base.py
@@ -27,7 +27,7 @@
     from ..plot_result import PlotResult
     from ..statistics_accessor import SelectType
 
-from ..dim_iterator import DimIterator
+from ..dim_iterator import add_slice_dims
 from ..statistics_accessor import _build_color_kwargs
 
 
@@ -164,14 +164,6 @@ def coords(self) -> dict[str, list]:
         """
         return {dim: self._get_dim_values(dim) for dim in self._dim_names}
 
-    @functools.cached_property
-    def _iterator(self) -> DimIterator:
-        """DimIterator for this ClusteringResults (cached)."""
-        return DimIterator(
-            periods=self._get_dim_values('period'),
-            scenarios=self._get_dim_values('scenario'),
-        )
-
     def sel(self, **kwargs: Any) -> TsamClusteringResult:
         """Select result by dimension labels (xarray-like).
 
@@ -491,8 +483,33 @@ def _build_property_array(
         name: str | None = None,
     ) -> xr.DataArray:
         """Build a DataArray property, handling both single and multi-dimensional cases."""
-        slices = {key: get_data(cr) for key, cr in self._results.items()}
-        return self._iterator.combine_arrays(slices, base_dims, base_coords, name)
+        results = []
+        for key, cr in self._results.items():
+            data = get_data(cr)
+            coords = base_coords if base_coords else {}
+            da = xr.DataArray(data, dims=base_dims, coords=coords, name=name)
+
+            # Extract period/scenario from key based on dim_names
+            period = None
+            scenario = None
+            for i, dim_name in enumerate(self._dim_names):
+                if dim_name == 'period':
+                    period = key[i]
+                elif dim_name == 'scenario':
+                    scenario = key[i]
+
+            results.append(add_slice_dims(da, period=period, scenario=scenario))
+
+        if len(results) == 1:
+            return results[0]
+
+        combined = xr.combine_by_coords(results)
+        # combine_by_coords returns a Dataset when DataArrays have names, extract the DataArray
+        if isinstance(combined, xr.Dataset):
+            combined = combined[name]
+        # Ensure correct dimension order: base_dims first, then period/scenario
+        dim_order = list(base_dims) + self._dim_names
+        return combined.transpose(*dim_order)
 
     @staticmethod
     def _key_to_str(key: tuple) -> str:
@@ -543,7 +560,8 @@ def apply(self, data: xr.Dataset) -> AggregationResults:
 
         results = {}
         for key, cr in self._results.items():
-            selector = self._iterator.selector_for_key(key)
+            # Build selector from key based on dim_names
+            selector = {dim_name: key[i] for i, dim_name in enumerate(self._dim_names)}
             data_slice = data.sel(**selector, drop=True) if selector else data
 
             # Drop constant arrays and convert to DataFrame
diff --git a/flixopt/dim_iterator.py b/flixopt/dim_iterator.py
index 27d13c366..224f86b60 100644
--- a/flixopt/dim_iterator.py
+++ b/flixopt/dim_iterator.py
@@ -1,454 +1,49 @@
 """
-Dimension iteration utilities for period/scenario slicing patterns.
+Helper for period/scenario dimension handling.
 
-This module provides `DimIterator`, a helper class that eliminates repetitive
-boilerplate when working with datasets that may have period and/or scenario
-dimensions.
+Provides a simple utility for adding period/scenario dimensions back to
+DataArrays after transformation.
 """
 
 from __future__ import annotations
 
 from typing import TYPE_CHECKING, Any
 
-import numpy as np
-import pandas as pd
-import xarray as xr
-
 if TYPE_CHECKING:
-    from collections.abc import Iterator
+    import xarray as xr
 
-    from .flow_system import FlowSystem
 
+def add_slice_dims(
+    da: xr.DataArray,
+    period: Any = None,
+    scenario: Any = None,
+) -> xr.DataArray:
+    """Add period/scenario dimensions back to a transformed DataArray.
 
-class DimIterator:
-    """Handles period/scenario iteration and combination patterns.
+    After selecting a slice with `ds.sel(period=p, scenario=s, drop=True)`
+    and transforming it, use this to re-add the period/scenario coordinates
+    so results can be combined with `xr.combine_by_coords`.
 
-    Eliminates the need for:
-    - ``[None]`` sentinel values for missing dimensions
-    - 4-way if/elif branching for period/scenario combinations
-    - Manual key conversion between internal and external formats
+    Args:
+        da: DataArray without period/scenario dimensions.
+        period: Period value to add, or None to skip.
+        scenario: Scenario value to add, or None to skip.
 
-    Keys returned by this class are tuples that match ``dims`` order:
-    - No dims: ``()``
-    - Period only: ``(period_value,)``
-    - Scenario only: ``(scenario_value,)``
-    - Both: ``(period_value, scenario_value)``
+    Returns:
+        DataArray with period/scenario dims added (as length-1 dims).
 
     Example:
-        >>> iterator = DimIterator.from_dataset(ds)
-        >>>
-        >>> # Iteration
-        >>> results = {}
-        >>> for key, ds_slice in iterator.iter_slices(ds):
-        ...     results[key] = transform(ds_slice)
-        >>>
-        >>> # Combination
-        >>> combined = iterator.combine(results, base_dims=['cluster', 'time'])
-
-    Attributes:
-        dims: Tuple of dimension names present (e.g., ``('period', 'scenario')``).
-        coords: Dict mapping dimension names to coordinate values.
+        >>> results = []
+        >>> for p in periods:
+        ...     for s in scenarios:
+        ...         selector = {k: v for k, v in [('period', p), ('scenario', s)] if v is not None}
+        ...         ds_slice = ds.sel(**selector, drop=True) if selector else ds
+        ...         result = transform(ds_slice)
+        ...         results.append(add_slice_dims(result, period=p, scenario=s))
+        >>> combined = xr.combine_by_coords(results)
     """
-
-    def __init__(
-        self,
-        periods: list | None = None,
-        scenarios: list | None = None,
-    ):
-        """Initialize with explicit dimension values.
-
-        Args:
-            periods: Period coordinate values, or None if no period dimension.
-            scenarios: Scenario coordinate values, or None if no scenario dimension.
-        """
-        self._periods = periods
-        self._scenarios = scenarios
-
-        # Build dim_names for external keys (no None sentinels)
-        self._dim_names: list[str] = []
-        if periods is not None:
-            self._dim_names.append('period')
-        if scenarios is not None:
-            self._dim_names.append('scenario')
-
-    # ==========================================================================
-    # Factory methods
-    # ==========================================================================
-
-    @classmethod
-    def from_dataset(cls, ds: xr.Dataset) -> DimIterator:
-        """Create iterator from dataset's period/scenario coordinates.
-
-        Args:
-            ds: Dataset with optional 'period' and/or 'scenario' coordinates.
-
-        Returns:
-            DimIterator configured for the dataset's dimensions.
-        """
-        periods = list(ds.period.values) if 'period' in ds.coords else None
-        scenarios = list(ds.scenario.values) if 'scenario' in ds.coords else None
-        return cls(periods=periods, scenarios=scenarios)
-
-    @classmethod
-    def from_flow_system(cls, fs: FlowSystem) -> DimIterator:
-        """Create iterator from FlowSystem's period/scenario structure.
-
-        Args:
-            fs: FlowSystem with optional periods and/or scenarios.
-
-        Returns:
-            DimIterator configured for the FlowSystem's dimensions.
-        """
-        return cls(
-            periods=list(fs.periods) if fs.periods is not None else None,
-            scenarios=list(fs.scenarios) if fs.scenarios is not None else None,
-        )
-
-    @classmethod
-    def from_sentinel_lists(cls, periods: list, scenarios: list) -> DimIterator:
-        """Create iterator from lists that may contain [None] sentinel.
-
-        This handles the common pattern where code uses [None] to indicate
-        no dimension exists:
-
-            periods = list(fs.periods) if has_periods else [None]
-            scenarios = list(fs.scenarios) if has_scenarios else [None]
-
-        Args:
-            periods: Period values, or [None] if no period dimension.
-            scenarios: Scenario values, or [None] if no scenario dimension.
-
-        Returns:
-            DimIterator configured for the actual dimensions present.
-        """
-        return cls(
-            periods=periods if periods != [None] else None,
-            scenarios=scenarios if scenarios != [None] else None,
-        )
-
-    # ==========================================================================
-    # Properties
-    # ==========================================================================
-
-    @property
-    def dims(self) -> tuple[str, ...]:
-        """Dimension names as tuple (xarray-like)."""
-        return tuple(self._dim_names)
-
-    @property
-    def coords(self) -> dict[str, list]:
-        """Coordinate values for each dimension.
-
-        Returns:
-            Dict mapping dimension names to lists of coordinate values.
-            Empty dict if no dimensions.
-        """
-        result = {}
-        if self._periods is not None:
-            result['period'] = self._periods
-        if self._scenarios is not None:
-            result['scenario'] = self._scenarios
-        return result
-
-    @property
-    def has_periods(self) -> bool:
-        """Whether period dimension exists."""
-        return self._periods is not None
-
-    @property
-    def has_scenarios(self) -> bool:
-        """Whether scenario dimension exists."""
-        return self._scenarios is not None
-
-    @property
-    def n_slices(self) -> int:
-        """Total number of slices (product of dimension sizes)."""
-        n = 1
-        if self._periods is not None:
-            n *= len(self._periods)
-        if self._scenarios is not None:
-            n *= len(self._scenarios)
-        return n
-
-    # ==========================================================================
-    # Iteration
-    # ==========================================================================
-
-    def iter_slices(
-        self,
-        ds: xr.Dataset,
-        drop: bool = True,
-    ) -> Iterator[tuple[tuple, xr.Dataset]]:
-        """Iterate over (period, scenario) slices of a dataset.
-
-        Args:
-            ds: Dataset with optional period/scenario dimensions.
-            drop: If True (default), drop period/scenario dims from sliced datasets.
-                If False, keep them as scalar coordinates.
-
-        Yields:
-            ``(key, ds_slice)`` tuples where key matches ``dims`` order.
-            For no dims, key is ``()``.
-
-        Example:
-            >>> for key, slice_ds in iterator.iter_slices(ds):
-            ...     print(f'Processing {key}')
-            ...     result = process(slice_ds)
-        """
-        periods = self._periods if self._periods is not None else [None]
-        scenarios = self._scenarios if self._scenarios is not None else [None]
-
-        for p in periods:
-            for s in scenarios:
-                selector = self._build_selector(p, s)
-                key = self._to_key(p, s)
-                if selector:
-                    yield key, ds.sel(**selector, drop=drop)
-                else:
-                    yield key, ds
-
-    def iter_keys(self) -> Iterator[tuple]:
-        """Iterate over keys only (no data access).
-
-        Yields:
-            Key tuples matching ``dims`` order.
-
-        Example:
-            >>> for key in iterator.iter_keys():
-            ...     results[key] = compute_something()
-        """
-        periods = self._periods if self._periods is not None else [None]
-        scenarios = self._scenarios if self._scenarios is not None else [None]
-
-        for p in periods:
-            for s in scenarios:
-                yield self._to_key(p, s)
-
-    # ==========================================================================
-    # Key conversion
-    # ==========================================================================
-
-    def _build_selector(self, period: Any, scenario: Any) -> dict:
-        """Build xarray selector dict (only for non-None values)."""
-        selector = {}
-        if period is not None:
-            selector['period'] = period
-        if scenario is not None:
-            selector['scenario'] = scenario
-        return selector
-
-    def _to_key(self, period: Any, scenario: Any) -> tuple:
-        """Convert (period, scenario) to external key matching dims."""
-        parts = []
-        if self._periods is not None:
-            parts.append(period)
-        if self._scenarios is not None:
-            parts.append(scenario)
-        return tuple(parts)
-
-    def _from_key(self, key: tuple) -> tuple[Any, Any]:
-        """Convert external key back to (period, scenario) with Nones.
-
-        Returns:
-            Tuple of (period, scenario) where missing dims are None.
-        """
-        period = None
-        scenario = None
-        idx = 0
-        if self._periods is not None:
-            period = key[idx]
-            idx += 1
-        if self._scenarios is not None:
-            scenario = key[idx]
-        return period, scenario
-
-    def selector_for_key(self, key: tuple) -> dict:
-        """Get xarray selector dict for an external key.
-
-        Args:
-            key: Key tuple as returned by ``iter_slices`` or ``iter_keys``.
-
-        Returns:
-            Dict suitable for ``ds.sel(**selector)``.
-
-        Example:
-            >>> selector = iterator.selector_for_key(key)
-            >>> ds_slice = ds.sel(**selector)
-        """
-        period, scenario = self._from_key(key)
-        return self._build_selector(period, scenario)
-
-    def key_from_values(self, period: Any, scenario: Any) -> tuple:
-        """Convert (period, scenario) values to a key tuple.
-
-        This is useful when you have separate period/scenario values
-        (possibly None) and need to create a key for use with combine().
-
-        Args:
-            period: Period value, or None if no period dimension.
-            scenario: Scenario value, or None if no scenario dimension.
-
-        Returns:
-            Key tuple matching ``dims`` order.
-
-        Example:
-            >>> iterator = DimIterator(periods=[2024, 2025])
-            >>> iterator.key_from_values(2024, None)
-            (2024,)
-        """
-        return self._to_key(period, scenario)
-
-    # ==========================================================================
-    # Combination
-    # ==========================================================================
-
-    def combine(
-        self,
-        slices: dict[tuple, xr.DataArray],
-        base_dims: list[str],
-        name: str | None = None,
-        attrs: dict | None = None,
-        join: str = 'exact',
-        fill_value: Any = None,
-    ) -> xr.DataArray:
-        """Combine per-slice DataArrays into multi-dimensional DataArray.
-
-        Args:
-            slices: Dict mapping keys (from ``iter_slices``) to DataArrays.
-                All DataArrays must have the same ``base_dims``.
-            base_dims: Base dimensions of each slice (e.g., ``['cluster', 'time']``).
-                Used for final transpose to ensure consistent dimension order.
-            name: Optional name for resulting DataArray.
-            attrs: Optional attributes for resulting DataArray.
-            join: How to handle coordinate differences between slices.
-                - ``'exact'``: Coordinates must match exactly (default).
-                - ``'outer'``: Union of coordinates, fill missing with fill_value.
-            fill_value: Value to use for missing entries when ``join='outer'``.
-                Only used when coordinates differ between slices.
-
-        Returns:
-            DataArray with dims ``[*base_dims, period?, scenario?]``.
-
-        Example:
-            >>> results = {key: process(s) for key, s in iterator.iter_slices(ds)}
-            >>> combined = iterator.combine(results, base_dims=['cluster', 'time'])
-        """
-        # Build concat kwargs
-        concat_kwargs = {'join': join}
-        if fill_value is not None:
-            concat_kwargs['fill_value'] = fill_value
-
-        if not self._dim_names:
-            # No extra dims - return single slice
-            result = slices[()]
-        elif self.has_periods and self.has_scenarios:
-            # Both dimensions: concat scenarios first, then periods
-            period_arrays = []
-            for p in self._periods:
-                scenario_arrays = [slices[self._to_key(p, s)] for s in self._scenarios]
-                period_arrays.append(
-                    xr.concat(
-                        scenario_arrays,
-                        dim=pd.Index(self._scenarios, name='scenario'),
-                        **concat_kwargs,
-                    )
-                )
-            result = xr.concat(
-                period_arrays,
-                dim=pd.Index(self._periods, name='period'),
-                **concat_kwargs,
-            )
-        elif self.has_periods:
-            result = xr.concat(
-                [slices[(p,)] for p in self._periods],
-                dim=pd.Index(self._periods, name='period'),
-                **concat_kwargs,
-            )
-        else:  # has_scenarios only
-            result = xr.concat(
-                [slices[(s,)] for s in self._scenarios],
-                dim=pd.Index(self._scenarios, name='scenario'),
-                **concat_kwargs,
-            )
-
-        # Transpose to standard order: base_dims first, then period/scenario
-        result = result.transpose(*base_dims, ...)
-
-        if name:
-            result = result.rename(name)
-        if attrs:
-            result = result.assign_attrs(attrs)
-        return result
-
-    def combine_arrays(
-        self,
-        slices: dict[tuple, np.ndarray],
-        base_dims: list[str],
-        base_coords: dict[str, Any] | None = None,
-        name: str | None = None,
-        attrs: dict | None = None,
-    ) -> xr.DataArray:
-        """Combine per-slice numpy arrays into multi-dimensional DataArray.
-
-        More efficient than ``combine()`` when working with raw numpy arrays,
-        as it avoids intermediate DataArray creation.
-
-        Args:
-            slices: Dict mapping keys to numpy arrays. All arrays must have
-                the same shape and dtype.
-            base_dims: Dimension names for the base array axes.
-            base_coords: Optional coordinates for base dimensions.
-            name: Optional name for resulting DataArray.
-            attrs: Optional attributes for resulting DataArray.
-
-        Returns:
-            DataArray with dims ``[*base_dims, period?, scenario?]``.
-
-        Example:
-            >>> arrays = {key: np.array([1, 2, 3]) for key in iterator.iter_keys()}
-            >>> da = iterator.combine_arrays(arrays, base_dims=['cluster'])
-        """
-        base_coords = base_coords or {}
-
-        if not self._dim_names:
-            # No extra dims - wrap single array
-            return xr.DataArray(
-                slices[()],
-                dims=base_dims,
-                coords=base_coords,
-                name=name,
-                attrs=attrs or {},
-            )
-
-        # Build full shape: base_shape + extra_dims_shape
-        first = next(iter(slices.values()))
-        extra_shape = [len(coords) for coords in self.coords.values()]
-        shape = list(first.shape) + extra_shape
-        data = np.empty(shape, dtype=first.dtype)
-
-        # Fill using np.ndindex for flexibility with any number of extra dims
-        for combo in np.ndindex(*extra_shape):
-            key = tuple(list(self.coords.values())[i][idx] for i, idx in enumerate(combo))
-            data[(...,) + combo] = slices[key]
-
-        return xr.DataArray(
-            data,
-            dims=base_dims + self._dim_names,
-            coords={**base_coords, **self.coords},
-            name=name,
-            attrs=attrs or {},
-        )
-
-    # ==========================================================================
-    # Dunder methods
-    # ==========================================================================
-
-    def __repr__(self) -> str:
-        if not self.dims:
-            return 'DimIterator(dims=())'
-        coords_str = ', '.join(f'{k}: {len(v)}' for k, v in self.coords.items())
-        return f'DimIterator(dims={self.dims}, coords=({coords_str}))'
-
-    def __len__(self) -> int:
-        """Number of slices."""
-        return self.n_slices
+    if period is not None:
+        da = da.expand_dims(period=[period])
+    if scenario is not None:
+        da = da.expand_dims(scenario=[scenario])
+    return da
diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index 4423d1031..dde261e41 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -16,7 +16,7 @@
 import pandas as pd
 import xarray as xr
 
-from .dim_iterator import DimIterator
+from .dim_iterator import add_slice_dims
 from .modeling import _scalar_safe_reduce
 from .structure import EXPAND_DIVIDE, EXPAND_FIRST_TIMESTEP, EXPAND_INTERPOLATE, VariableCategory
 
@@ -161,19 +161,16 @@ def _build_cluster_weight_da(
         Returns:
             DataArray with dims [cluster] or [cluster, period?, scenario?].
         """
+        results = []
+        for p in periods:
+            for s in scenarios:
+                occurrences = cluster_occurrences_all[(p, s)]
+                weights = np.array([occurrences.get(c, 0) for c in range(n_clusters)])
+                da = xr.DataArray(weights, dims=['cluster'], coords={'cluster': cluster_coords})
+                results.append(add_slice_dims(da, period=p, scenario=s))
 
-        def _weight_for_key(key: tuple) -> xr.DataArray:
-            occurrences = cluster_occurrences_all[key]
-            # Missing clusters contribute zero weight (not 1)
-            weights = np.array([occurrences.get(c, 0) for c in range(n_clusters)])
-            return xr.DataArray(weights, dims=['cluster'], coords={'cluster': cluster_coords})
-
-        iterator = DimIterator.from_sentinel_lists(periods, scenarios)
-        # Convert old (period, scenario) keys to DimIterator keys
-        weight_slices = {iterator.key_from_values(p, s): _weight_for_key((p, s)) for p, s in cluster_occurrences_all}
-        return iterator.combine(
-            weight_slices, base_dims=['cluster'], name='cluster_weight', join='outer', fill_value=np.nan
-        )
+        combined = xr.combine_by_coords(results) if len(results) > 1 else results[0]
+        return combined.transpose('cluster', ...).rename('cluster_weight')
 
     def _build_typical_das(
         self,
@@ -262,12 +259,9 @@ def _build_segment_durations_da(
             DataArray with dims [cluster, time] or [cluster, time, period?, scenario?]
             containing duration in hours for each segment.
         """
-        iterator = DimIterator.from_sentinel_lists(periods, scenarios)
-        segment_duration_slices: dict[tuple, xr.DataArray] = {}
-
+        results = []
         for (p, s), tsam_result in tsam_aggregation_results.items():
             # segment_durations is tuple of tuples: ((dur1, dur2, ...), (dur1, dur2, ...), ...)
-            # Each inner tuple is durations for one cluster
             seg_durs = tsam_result.segment_durations
 
             # Build 2D array (cluster, segment) of durations in hours
@@ -275,22 +269,17 @@ def _build_segment_durations_da(
             for cluster_id in range(actual_n_clusters):
                 cluster_seg_durs = seg_durs[cluster_id]
                 for seg_id in range(n_segments):
-                    # Duration in hours = number of original timesteps * dt
                     data[cluster_id, seg_id] = cluster_seg_durs[seg_id] * dt
 
-            segment_duration_slices[iterator.key_from_values(p, s)] = xr.DataArray(
+            da = xr.DataArray(
                 data,
                 dims=['cluster', 'time'],
                 coords={'cluster': cluster_coords, 'time': time_coords},
             )
+            results.append(add_slice_dims(da, period=p, scenario=s))
 
-        return iterator.combine(
-            segment_duration_slices,
-            base_dims=['cluster', 'time'],
-            name='timestep_duration',
-            join='outer',
-            fill_value=np.nan,
-        )
+        combined = xr.combine_by_coords(results) if len(results) > 1 else results[0]
+        return combined.transpose('cluster', 'time', ...).rename('timestep_duration')
 
     def _build_clustering_metrics(
         self,
@@ -331,30 +320,29 @@ def _build_clustering_metrics(
             )
 
         # Multi-dim case
-        iterator = DimIterator.from_sentinel_lists(periods, scenarios)
         sample_df = next(iter(non_empty_metrics.values()))
         metric_names = list(sample_df.columns)
         data_vars = {}
 
         for metric in metric_names:
-            slices = {}
+            results = []
             for (p, s), df in clustering_metrics_all.items():
-                key = iterator.key_from_values(p, s)
                 if df.empty:
-                    slices[key] = xr.DataArray(
+                    da = xr.DataArray(
                         np.full(len(sample_df.index), np.nan),
                         dims=['time_series'],
                         coords={'time_series': list(sample_df.index)},
                     )
                 else:
-                    slices[key] = xr.DataArray(
+                    da = xr.DataArray(
                         df[metric].values,
                         dims=['time_series'],
                         coords={'time_series': list(df.index)},
                     )
-            data_vars[metric] = iterator.combine(
-                slices, base_dims=['time_series'], name=metric, join='outer', fill_value=np.nan
-            )
+                results.append(add_slice_dims(da, period=p, scenario=s))
+
+            combined = xr.combine_by_coords(results) if len(results) > 1 else results[0]
+            data_vars[metric] = combined.transpose('time_series', ...).rename(metric)
 
         return xr.Dataset(data_vars)
 
@@ -542,13 +530,19 @@ def _build_reduced_dataset(
         """
         from .core import TimeSeriesData
 
-        iterator = DimIterator.from_sentinel_lists(periods, scenarios)
         ds_new_vars = {}
 
         # Use ds.variables to avoid _construct_dataarray overhead
         variables = ds.variables
         coord_cache = {k: ds.coords[k].values for k in ds.coords}
 
+        # Determine dimension order for combining
+        dim_order = ['cluster', 'time']
+        if periods != [None]:
+            dim_order.append('period')
+        if scenarios != [None]:
+            dim_order.append('scenario')
+
         for name in ds.data_vars:
             var = variables[name]
             if 'time' not in var.dims:
@@ -581,7 +575,7 @@ def _build_reduced_dataset(
                 # Partial typical slices: fill missing keys with constant values
                 # For multi-period/scenario data, we need to select the right slice for each key
 
-                # Exclude 'period' and 'scenario' - they're handled by iterator.combine
+                # Exclude 'period' and 'scenario' - they're handled by combine
                 other_dims = [d for d in var.dims if d not in ('time', 'period', 'scenario')]
                 other_shape = [var.sizes[d] for d in other_dims]
                 new_shape = [actual_n_clusters, n_time_points] + other_shape
@@ -591,25 +585,23 @@ def _build_reduced_dataset(
                     if dim in coord_cache:
                         new_coords[dim] = coord_cache[dim]
 
-                # Build filled slices dict: use typical where available, constant otherwise
-                filled_slices = {}
-                for p in iterator._periods or [None]:
-                    for s in iterator._scenarios or [None]:
-                        old_key = (p, s)
-                        new_key = iterator.key_from_values(p, s)
-                        if old_key in typical_das[name]:
-                            filled_slices[new_key] = typical_das[name][old_key]
+                # Build slices: use typical where available, constant otherwise
+                results = []
+                for p in periods:
+                    for s in scenarios:
+                        key = (p, s)
+                        if key in typical_das[name]:
+                            da = typical_das[name][key]
                         else:
                             # Select the specific period/scenario slice, then reshape
-                            selector = iterator._build_selector(p, s)
+                            selector = {}
+                            if p is not None and 'period' in var.dims:
+                                selector['period'] = p
+                            if s is not None and 'scenario' in var.dims:
+                                selector['scenario'] = s
 
                             # Select per-key slice if needed, otherwise use full variable
-                            if selector:
-                                # Only include dims that exist in the variable
-                                var_selector = {k: v for k, v in selector.items() if k in var.dims}
-                                var_slice = ds[name].sel(**var_selector, drop=True) if var_selector else ds[name]
-                            else:
-                                var_slice = ds[name]
+                            var_slice = ds[name].sel(**selector, drop=True) if selector else ds[name]
 
                             # Now slice time and reshape
                             time_idx = var_slice.dims.index('time')
@@ -618,24 +610,51 @@ def _build_reduced_dataset(
                             sliced_values = var_slice.values[tuple(slices_list)]
                             reshaped_constant = sliced_values.reshape(new_shape)
 
-                            filled_slices[new_key] = xr.DataArray(
+                            da = xr.DataArray(
                                 reshaped_constant,
                                 dims=['cluster', 'time'] + other_dims,
                                 coords=new_coords,
                             )
 
-                da = iterator.combine(filled_slices, base_dims=['cluster', 'time'], attrs=var.attrs)
+                        results.append(add_slice_dims(da, period=p, scenario=s))
+
+                if len(results) == 1:
+                    combined = results[0]
+                else:
+                    combined = xr.combine_by_coords(results)
+                    # combine_by_coords returns Dataset when DataArrays have names
+                    if isinstance(combined, xr.Dataset):
+                        combined = combined[name] if name in combined else list(combined.data_vars.values())[0]
+                    # Add other_dims to dim_order for transpose
+                    full_dim_order = dim_order + other_dims
+                    combined = combined.transpose(*full_dim_order)
+
+                combined = combined.assign_attrs(var.attrs)
                 if var.attrs.get('__timeseries_data__', False):
-                    da = TimeSeriesData.from_dataarray(da.assign_attrs(var.attrs))
-                ds_new_vars[name] = da
+                    combined = TimeSeriesData.from_dataarray(combined)
+                ds_new_vars[name] = combined
             else:
                 # Time-varying: combine per-(period, scenario) slices
-                # Convert old-style keys to DimIterator keys
-                converted_slices = {iterator.key_from_values(p, s): da for (p, s), da in typical_das[name].items()}
-                da = iterator.combine(converted_slices, base_dims=['cluster', 'time'], attrs=var.attrs)
+                results = []
+                for (p, s), da in typical_das[name].items():
+                    results.append(add_slice_dims(da, period=p, scenario=s))
+
+                if len(results) == 1:
+                    combined = results[0]
+                else:
+                    combined = xr.combine_by_coords(results)
+                    # combine_by_coords returns Dataset when DataArrays have names
+                    if isinstance(combined, xr.Dataset):
+                        combined = combined[name] if name in combined else list(combined.data_vars.values())[0]
+                    # Add other dims from the first result for proper ordering
+                    other_dims = [d for d in results[0].dims if d not in ('cluster', 'time', 'period', 'scenario')]
+                    full_dim_order = dim_order + other_dims
+                    combined = combined.transpose(*full_dim_order)
+
+                combined = combined.assign_attrs(var.attrs)
                 if var.attrs.get('__timeseries_data__', False):
-                    da = TimeSeriesData.from_dataarray(da.assign_attrs(var.attrs))
-                ds_new_vars[name] = da
+                    combined = TimeSeriesData.from_dataarray(combined)
+                ds_new_vars[name] = combined
 
         # Copy attrs but remove cluster_weight
         new_attrs = dict(ds.attrs)
@@ -658,16 +677,26 @@ def _build_cluster_assignments_da(
         Returns:
             DataArray with dims [original_cluster] or [original_cluster, period?, scenario?].
         """
-        iterator = DimIterator.from_sentinel_lists(periods, scenarios)
-        slices = {
-            iterator.key_from_values(p, s): xr.DataArray(
-                cluster_assignmentss[(p, s)], dims=['original_cluster'], name='cluster_assignments'
-            )
-            for p, s in cluster_assignmentss
-        }
-        return iterator.combine(
-            slices, base_dims=['original_cluster'], name='cluster_assignments', join='outer', fill_value=np.nan
-        )
+        results = []
+        for p in periods:
+            for s in scenarios:
+                da = xr.DataArray(cluster_assignmentss[(p, s)], dims=['original_cluster'], name='cluster_assignments')
+                results.append(add_slice_dims(da, period=p, scenario=s))
+
+        if len(results) == 1:
+            return results[0]
+
+        combined = xr.combine_by_coords(results, join='outer', fill_value=np.nan)
+        # combine_by_coords returns a Dataset when DataArrays have names, extract the DataArray
+        if isinstance(combined, xr.Dataset):
+            combined = combined['cluster_assignments']
+        # Ensure correct dimension order: original_cluster first, then period/scenario
+        dim_order = ['original_cluster']
+        if periods != [None]:
+            dim_order.append('period')
+        if scenarios != [None]:
+            dim_order.append('scenario')
+        return combined.transpose(*dim_order)
 
     def sel(
         self,
diff --git a/tests/test_cluster_reduce_expand.py b/tests/test_cluster_reduce_expand.py
index cb6f5b7ff..e3f4a8d72 100644
--- a/tests/test_cluster_reduce_expand.py
+++ b/tests/test_cluster_reduce_expand.py
@@ -1682,86 +1682,3 @@ def test_startup_timing_preserved_non_segmented(self, solver_fixture, timesteps_
                 assert abs(clustered_val - expanded_val) < 1e-6, (
                     f'Mismatch at day {orig_day}, hour {hour}: clustered={clustered_val}, expanded={expanded_val}'
                 )
-
-
-class TestCombineSlices:
-    """Tests for the combine_slices utility function."""
-
-    def test_single_dim(self):
-        """Test combining slices with a single extra dimension."""
-        from flixopt.clustering.base import combine_slices
-
-        slices = {
-            ('A',): np.array([1.0, 2.0, 3.0]),
-            ('B',): np.array([4.0, 5.0, 6.0]),
-        }
-        result = combine_slices(
-            slices,
-            extra_dims=['x'],
-            dim_coords={'x': ['A', 'B']},
-            output_dim='time',
-            output_coord=[0, 1, 2],
-        )
-
-        assert result.dims == ('time', 'x')
-        assert result.shape == (3, 2)
-        assert result.sel(x='A').values.tolist() == [1.0, 2.0, 3.0]
-        assert result.sel(x='B').values.tolist() == [4.0, 5.0, 6.0]
-
-    def test_two_dims(self):
-        """Test combining slices with two extra dimensions."""
-        from flixopt.clustering.base import combine_slices
-
-        slices = {
-            ('P1', 'base'): np.array([1.0, 2.0]),
-            ('P1', 'high'): np.array([3.0, 4.0]),
-            ('P2', 'base'): np.array([5.0, 6.0]),
-            ('P2', 'high'): np.array([7.0, 8.0]),
-        }
-        result = combine_slices(
-            slices,
-            extra_dims=['period', 'scenario'],
-            dim_coords={'period': ['P1', 'P2'], 'scenario': ['base', 'high']},
-            output_dim='time',
-            output_coord=[0, 1],
-        )
-
-        assert result.dims == ('time', 'period', 'scenario')
-        assert result.shape == (2, 2, 2)
-        assert result.sel(period='P1', scenario='base').values.tolist() == [1.0, 2.0]
-        assert result.sel(period='P2', scenario='high').values.tolist() == [7.0, 8.0]
-
-    def test_attrs_propagation(self):
-        """Test that attrs are propagated to the result."""
-        from flixopt.clustering.base import combine_slices
-
-        slices = {('A',): np.array([1.0, 2.0])}
-        result = combine_slices(
-            slices,
-            extra_dims=['x'],
-            dim_coords={'x': ['A']},
-            output_dim='time',
-            output_coord=[0, 1],
-            attrs={'units': 'kW', 'description': 'power'},
-        )
-
-        assert result.attrs['units'] == 'kW'
-        assert result.attrs['description'] == 'power'
-
-    def test_datetime_coords(self):
-        """Test with pandas DatetimeIndex as output coordinates."""
-        from flixopt.clustering.base import combine_slices
-
-        time_index = pd.date_range('2020-01-01', periods=3, freq='h')
-        slices = {('A',): np.array([1.0, 2.0, 3.0])}
-        result = combine_slices(
-            slices,
-            extra_dims=['x'],
-            dim_coords={'x': ['A']},
-            output_dim='time',
-            output_coord=time_index,
-        )
-
-        assert result.dims == ('time', 'x')
-        assert len(result.coords['time']) == 3
-        assert result.coords['time'][0].values == time_index[0]
diff --git a/tests/test_clustering_io.py b/tests/test_clustering_io.py
index e3bfa6c1d..0e2200885 100644
--- a/tests/test_clustering_io.py
+++ b/tests/test_clustering_io.py
@@ -237,12 +237,8 @@ def test_clustering_roundtrip_preserves_scenarios(self, system_with_scenarios):
         ds = fs_clustered.to_dataset(include_solution=False)
         fs_restored = fx.FlowSystem.from_dataset(ds)
 
-        # Scenarios should be preserved in the FlowSystem itself
-        pd.testing.assert_index_equal(
-            fs_restored.scenarios,
-            pd.Index(['Low', 'High'], name='scenario'),
-            check_names=False,
-        )
+        # Scenarios should be preserved in the FlowSystem itself (order may differ due to coordinate sorting)
+        assert set(fs_restored.scenarios) == {'Low', 'High'}
 
 
 class TestClusteringJsonExport:

From f14c5a67f94af2aa625bab7ea503859a17109e73 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 21:52:30 +0100
Subject: [PATCH 03/15]  simplify by iterating over the key and dim_names
 directly

---
 flixopt/clustering/base.py | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

diff --git a/flixopt/clustering/base.py b/flixopt/clustering/base.py
index 646d8f2e2..49d6594a7 100644
--- a/flixopt/clustering/base.py
+++ b/flixopt/clustering/base.py
@@ -27,7 +27,6 @@
     from ..plot_result import PlotResult
     from ..statistics_accessor import SelectType
 
-from ..dim_iterator import add_slice_dims
 from ..statistics_accessor import _build_color_kwargs
 
 
@@ -486,30 +485,19 @@ def _build_property_array(
         results = []
         for key, cr in self._results.items():
             data = get_data(cr)
-            coords = base_coords if base_coords else {}
-            da = xr.DataArray(data, dims=base_dims, coords=coords, name=name)
-
-            # Extract period/scenario from key based on dim_names
-            period = None
-            scenario = None
-            for i, dim_name in enumerate(self._dim_names):
-                if dim_name == 'period':
-                    period = key[i]
-                elif dim_name == 'scenario':
-                    scenario = key[i]
-
-            results.append(add_slice_dims(da, period=period, scenario=scenario))
+            da = xr.DataArray(data, dims=base_dims, coords=base_coords or {}, name=name)
+            # Add dims from key: key[i] corresponds to dim_names[i]
+            for dim_name, coord_val in zip(self._dim_names, key, strict=False):
+                da = da.expand_dims({dim_name: [coord_val]})
+            results.append(da)
 
         if len(results) == 1:
             return results[0]
 
         combined = xr.combine_by_coords(results)
-        # combine_by_coords returns a Dataset when DataArrays have names, extract the DataArray
         if isinstance(combined, xr.Dataset):
             combined = combined[name]
-        # Ensure correct dimension order: base_dims first, then period/scenario
-        dim_order = list(base_dims) + self._dim_names
-        return combined.transpose(*dim_order)
+        return combined.transpose(*base_dims, *self._dim_names)
 
     @staticmethod
     def _key_to_str(key: tuple) -> str:

From e6e757ef4b79f21c268cb057164f533ed9b3640d Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 21:59:34 +0100
Subject: [PATCH 04/15]  simplify by iterating over the key and dim_names
 directly

---
 flixopt/transform_accessor.py | 171 ++++++++++++----------------------
 1 file changed, 62 insertions(+), 109 deletions(-)

diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index dde261e41..fb1811310 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -145,32 +145,33 @@ def _build_cluster_weight_da(
         cluster_occurrences_all: dict[tuple, dict],
         n_clusters: int,
         cluster_coords: np.ndarray,
-        periods: list,
-        scenarios: list,
+        dim_names: list[str],
     ) -> xr.DataArray:
         """Build cluster_weight DataArray from occurrence counts.
 
         Args:
-            cluster_occurrences_all: Dict mapping (period, scenario) tuples to
-                dicts of {cluster_id: occurrence_count}.
+            cluster_occurrences_all: Dict mapping key tuples to {cluster_id: count}.
             n_clusters: Number of clusters.
             cluster_coords: Cluster coordinate values.
-            periods: List of period labels ([None] if no periods dimension).
-            scenarios: List of scenario labels ([None] if no scenarios dimension).
+            dim_names: Names of extra dimensions (e.g., ['period', 'scenario']).
 
         Returns:
-            DataArray with dims [cluster] or [cluster, period?, scenario?].
+            DataArray with dims [cluster, *dim_names].
         """
         results = []
-        for p in periods:
-            for s in scenarios:
-                occurrences = cluster_occurrences_all[(p, s)]
-                weights = np.array([occurrences.get(c, 0) for c in range(n_clusters)])
-                da = xr.DataArray(weights, dims=['cluster'], coords={'cluster': cluster_coords})
-                results.append(add_slice_dims(da, period=p, scenario=s))
+        for key, occurrences in cluster_occurrences_all.items():
+            weights = np.array([occurrences.get(c, 0) for c in range(n_clusters)])
+            da = xr.DataArray(weights, dims=['cluster'], coords={'cluster': cluster_coords})
+            for dim_name, coord_val in zip(dim_names, key, strict=False):
+                da = da.expand_dims({dim_name: [coord_val]})
+            results.append(da)
 
-        combined = xr.combine_by_coords(results) if len(results) > 1 else results[0]
-        return combined.transpose('cluster', ...).rename('cluster_weight')
+        if len(results) == 1:
+            return results[0].rename('cluster_weight')
+        combined = xr.combine_by_coords(results)
+        if isinstance(combined, xr.Dataset):
+            combined = list(combined.data_vars.values())[0]
+        return combined.transpose('cluster', *dim_names).rename('cluster_weight')
 
     def _build_typical_das(
         self,
@@ -231,68 +232,59 @@ def _build_typical_das(
 
     def _build_segment_durations_da(
         self,
-        tsam_aggregation_results: dict[tuple, Any],
+        aggregation_results: dict[tuple, Any],
         actual_n_clusters: int,
         n_segments: int,
         cluster_coords: np.ndarray,
         time_coords: pd.RangeIndex,
         dt: float,
-        periods: list,
-        scenarios: list,
+        dim_names: list[str],
     ) -> xr.DataArray:
         """Build timestep_duration DataArray from segment durations.
 
-        For segmented systems, each segment represents multiple original timesteps.
-        The duration is segment_duration_in_original_timesteps * dt (hours per original timestep).
-
         Args:
-            tsam_aggregation_results: Dict mapping (period, scenario) to tsam results.
+            aggregation_results: Dict mapping key tuples to tsam results.
             actual_n_clusters: Number of clusters.
             n_segments: Number of segments per cluster.
             cluster_coords: Cluster coordinate values.
             time_coords: Time coordinate values (RangeIndex for segments).
             dt: Hours per original timestep.
-            periods: List of period labels ([None] if no periods dimension).
-            scenarios: List of scenario labels ([None] if no scenarios dimension).
+            dim_names: Names of extra dimensions (e.g., ['period', 'scenario']).
 
         Returns:
-            DataArray with dims [cluster, time] or [cluster, time, period?, scenario?]
-            containing duration in hours for each segment.
+            DataArray with dims [cluster, time, *dim_names].
         """
         results = []
-        for (p, s), tsam_result in tsam_aggregation_results.items():
-            # segment_durations is tuple of tuples: ((dur1, dur2, ...), (dur1, dur2, ...), ...)
+        for key, tsam_result in aggregation_results.items():
             seg_durs = tsam_result.segment_durations
-
-            # Build 2D array (cluster, segment) of durations in hours
             data = np.zeros((actual_n_clusters, n_segments))
             for cluster_id in range(actual_n_clusters):
                 cluster_seg_durs = seg_durs[cluster_id]
                 for seg_id in range(n_segments):
                     data[cluster_id, seg_id] = cluster_seg_durs[seg_id] * dt
 
-            da = xr.DataArray(
-                data,
-                dims=['cluster', 'time'],
-                coords={'cluster': cluster_coords, 'time': time_coords},
-            )
-            results.append(add_slice_dims(da, period=p, scenario=s))
+            da = xr.DataArray(data, dims=['cluster', 'time'], coords={'cluster': cluster_coords, 'time': time_coords})
+            for dim_name, coord_val in zip(dim_names, key, strict=False):
+                da = da.expand_dims({dim_name: [coord_val]})
+            results.append(da)
 
-        combined = xr.combine_by_coords(results) if len(results) > 1 else results[0]
-        return combined.transpose('cluster', 'time', ...).rename('timestep_duration')
+        if len(results) == 1:
+            return results[0].rename('timestep_duration')
+        combined = xr.combine_by_coords(results)
+        if isinstance(combined, xr.Dataset):
+            combined = list(combined.data_vars.values())[0]
+        return combined.transpose('cluster', 'time', *dim_names).rename('timestep_duration')
 
     def _build_clustering_metrics(
         self,
         clustering_metrics_all: dict[tuple, pd.DataFrame],
-        periods: list,
-        scenarios: list,
+        dim_names: list[str],
     ) -> xr.Dataset:
         """Build clustering metrics Dataset from per-slice DataFrames.
 
         Args:
-            clustering_metrics_all: Dict mapping (period, scenario) to metric DataFrames.
-            periods: List of period labels ([None] if no periods dimension).
-            scenarios: List of scenario labels ([None] if no scenarios dimension).
+            clustering_metrics_all: Dict mapping key tuples to metric DataFrames.
+            dim_names: Names of extra dimensions (e.g., ['period', 'scenario']).
 
         Returns:
             Dataset with RMSE, MAE, RMSE_duration metrics.
@@ -302,12 +294,9 @@ def _build_clustering_metrics(
         if not non_empty_metrics:
             return xr.Dataset()
 
-        first_key = (periods[0], scenarios[0])
-
+        # Single slice case
         if len(clustering_metrics_all) == 1 and len(non_empty_metrics) == 1:
-            metrics_df = non_empty_metrics.get(first_key)
-            if metrics_df is None:
-                metrics_df = next(iter(non_empty_metrics.values()))
+            metrics_df = next(iter(non_empty_metrics.values()))
             return xr.Dataset(
                 {
                     col: xr.DataArray(
@@ -326,7 +315,7 @@ def _build_clustering_metrics(
 
         for metric in metric_names:
             results = []
-            for (p, s), df in clustering_metrics_all.items():
+            for key, df in clustering_metrics_all.items():
                 if df.empty:
                     da = xr.DataArray(
                         np.full(len(sample_df.index), np.nan),
@@ -339,10 +328,17 @@ def _build_clustering_metrics(
                         dims=['time_series'],
                         coords={'time_series': list(df.index)},
                     )
-                results.append(add_slice_dims(da, period=p, scenario=s))
+                for dim_name, coord_val in zip(dim_names, key, strict=False):
+                    da = da.expand_dims({dim_name: [coord_val]})
+                results.append(da)
 
-            combined = xr.combine_by_coords(results) if len(results) > 1 else results[0]
-            data_vars[metric] = combined.transpose('time_series', ...).rename(metric)
+            if len(results) == 1:
+                combined = results[0]
+            else:
+                combined = xr.combine_by_coords(results)
+                if isinstance(combined, xr.Dataset):
+                    combined = list(combined.data_vars.values())[0]
+            data_vars[metric] = combined.transpose('time_series', *dim_names).rename(metric)
 
         return xr.Dataset(data_vars)
 
@@ -390,23 +386,26 @@ def _build_reduced_flow_system(
         if has_scenarios:
             dim_names.append('scenario')
 
-        # Build dict keyed by (period?, scenario?) tuples (without None)
-        aggregation_results: dict[tuple, Any] = {}
-        for (p, s), result in tsam_aggregation_results.items():
-            key_parts = []
+        # Helper to convert (p, s) keys to clean keys (without None)
+        def clean_key(p, s) -> tuple:
+            parts = []
             if has_periods:
-                key_parts.append(p)
+                parts.append(p)
             if has_scenarios:
-                key_parts.append(s)
-            key = tuple(key_parts)
-            aggregation_results[key] = result
+                parts.append(s)
+            return tuple(parts)
+
+        # Convert all dicts to use clean keys
+        aggregation_results = {clean_key(p, s): v for (p, s), v in tsam_aggregation_results.items()}
+        occurrences = {clean_key(p, s): v for (p, s), v in cluster_occurrences_all.items()}
+        metrics = {clean_key(p, s): v for (p, s), v in clustering_metrics_all.items()}
 
         # Use first result for structure
         first_key = (periods[0], scenarios[0])
         first_tsam = tsam_aggregation_results[first_key]
 
         # Build metrics
-        clustering_metrics = self._build_clustering_metrics(clustering_metrics_all, periods, scenarios)
+        clustering_metrics = self._build_clustering_metrics(metrics, dim_names)
 
         n_reduced_timesteps = len(first_tsam.cluster_representatives)
         actual_n_clusters = len(first_tsam.cluster_weights)
@@ -432,9 +431,7 @@ def _build_reduced_flow_system(
             )
 
         # Build cluster_weight
-        cluster_weight = self._build_cluster_weight_da(
-            cluster_occurrences_all, actual_n_clusters, cluster_coords, periods, scenarios
-        )
+        cluster_weight = self._build_cluster_weight_da(occurrences, actual_n_clusters, cluster_coords, dim_names)
 
         # Logging
         if is_segmented:
@@ -467,14 +464,7 @@ def _build_reduced_flow_system(
         # For segmented systems, build timestep_duration from segment_durations
         if is_segmented:
             segment_durations = self._build_segment_durations_da(
-                tsam_aggregation_results,
-                actual_n_clusters,
-                n_segments,
-                cluster_coords,
-                time_coords,
-                dt,
-                periods,
-                scenarios,
+                aggregation_results, actual_n_clusters, n_segments, cluster_coords, time_coords, dt, dim_names
             )
             ds_new['timestep_duration'] = segment_durations
 
@@ -661,43 +651,6 @@ def _build_reduced_dataset(
         new_attrs.pop('cluster_weight', None)
         return xr.Dataset(ds_new_vars, attrs=new_attrs)
 
-    def _build_cluster_assignments_da(
-        self,
-        cluster_assignmentss: dict[tuple, np.ndarray],
-        periods: list,
-        scenarios: list,
-    ) -> xr.DataArray:
-        """Build cluster_assignments DataArray from cluster assignments.
-
-        Args:
-            cluster_assignmentss: Dict mapping (period, scenario) to cluster assignment arrays.
-            periods: List of period labels ([None] if no periods dimension).
-            scenarios: List of scenario labels ([None] if no scenarios dimension).
-
-        Returns:
-            DataArray with dims [original_cluster] or [original_cluster, period?, scenario?].
-        """
-        results = []
-        for p in periods:
-            for s in scenarios:
-                da = xr.DataArray(cluster_assignmentss[(p, s)], dims=['original_cluster'], name='cluster_assignments')
-                results.append(add_slice_dims(da, period=p, scenario=s))
-
-        if len(results) == 1:
-            return results[0]
-
-        combined = xr.combine_by_coords(results, join='outer', fill_value=np.nan)
-        # combine_by_coords returns a Dataset when DataArrays have names, extract the DataArray
-        if isinstance(combined, xr.Dataset):
-            combined = combined['cluster_assignments']
-        # Ensure correct dimension order: original_cluster first, then period/scenario
-        dim_order = ['original_cluster']
-        if periods != [None]:
-            dim_order.append('period')
-        if scenarios != [None]:
-            dim_order.append('scenario')
-        return combined.transpose(*dim_order)
-
     def sel(
         self,
         time: str | slice | list[str] | pd.Timestamp | pd.DatetimeIndex | None = None,

From 468862c8865c26056afe2f211701d14496563eca Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 22:03:30 +0100
Subject: [PATCH 05/15]  Deleted files:   - flixopt/dim_iterator.py - no longer
 needed

  Key changes:

  1. _build_typical_das now returns dict[str, xr.DataArray] (pre-combined) instead of dict[str, dict[tuple, xr.DataArray]] (nested by keys)
  2. _build_reduced_dataset simplified from ~80 lines to ~35 lines:
    - Removed partial slice handling (was complex, likely unused)
    - No longer iterates over periods/scenarios
    - Just uses pre-combined DataArrays directly
  3. All helper methods now use the same pattern:
  for key, data in results_dict.items():
      da = xr.DataArray(...)
      for dim_name, coord_val in zip(dim_names, key):
          da = da.expand_dims({dim_name: [coord_val]})
      results.append(da)
  combined = xr.combine_by_coords(results)
  4. No more [None] sentinel pattern - keys are clean tuples matching dim_names
  5. Removed add_slice_dims - no longer needed since dims are added inline

  The code is now much simpler: convert keys once at entry, add dims with expand_dims, combine with xr.combine_by_coords
---
 flixopt/dim_iterator.py       |  49 ----------
 flixopt/transform_accessor.py | 169 ++++++++--------------------------
 2 files changed, 40 insertions(+), 178 deletions(-)
 delete mode 100644 flixopt/dim_iterator.py

diff --git a/flixopt/dim_iterator.py b/flixopt/dim_iterator.py
deleted file mode 100644
index 224f86b60..000000000
--- a/flixopt/dim_iterator.py
+++ /dev/null
@@ -1,49 +0,0 @@
-"""
-Helper for period/scenario dimension handling.
-
-Provides a simple utility for adding period/scenario dimensions back to
-DataArrays after transformation.
-"""
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    import xarray as xr
-
-
-def add_slice_dims(
-    da: xr.DataArray,
-    period: Any = None,
-    scenario: Any = None,
-) -> xr.DataArray:
-    """Add period/scenario dimensions back to a transformed DataArray.
-
-    After selecting a slice with `ds.sel(period=p, scenario=s, drop=True)`
-    and transforming it, use this to re-add the period/scenario coordinates
-    so results can be combined with `xr.combine_by_coords`.
-
-    Args:
-        da: DataArray without period/scenario dimensions.
-        period: Period value to add, or None to skip.
-        scenario: Scenario value to add, or None to skip.
-
-    Returns:
-        DataArray with period/scenario dims added (as length-1 dims).
-
-    Example:
-        >>> results = []
-        >>> for p in periods:
-        ...     for s in scenarios:
-        ...         selector = {k: v for k, v in [('period', p), ('scenario', s)] if v is not None}
-        ...         ds_slice = ds.sel(**selector, drop=True) if selector else ds
-        ...         result = transform(ds_slice)
-        ...         results.append(add_slice_dims(result, period=p, scenario=s))
-        >>> combined = xr.combine_by_coords(results)
-    """
-    if period is not None:
-        da = da.expand_dims(period=[period])
-    if scenario is not None:
-        da = da.expand_dims(scenario=[scenario])
-    return da
diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index fb1811310..f6eaf76a1 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -16,7 +16,6 @@
 import pandas as pd
 import xarray as xr
 
-from .dim_iterator import add_slice_dims
 from .modeling import _scalar_safe_reduce
 from .structure import EXPAND_DIVIDE, EXPAND_FIRST_TIMESTEP, EXPAND_INTERPOLATE, VariableCategory
 
@@ -175,59 +174,71 @@ def _build_cluster_weight_da(
 
     def _build_typical_das(
         self,
-        tsam_aggregation_results: dict[tuple, Any],
+        aggregation_results: dict[tuple, Any],
         actual_n_clusters: int,
         n_time_points: int,
         cluster_coords: np.ndarray,
         time_coords: pd.DatetimeIndex | pd.RangeIndex,
+        dim_names: list[str],
         is_segmented: bool = False,
-    ) -> dict[str, dict[tuple, xr.DataArray]]:
-        """Build typical periods DataArrays with (cluster, time) shape.
+    ) -> dict[str, xr.DataArray]:
+        """Build typical periods DataArrays with (cluster, time, *dim_names) shape.
 
         Args:
-            tsam_aggregation_results: Dict mapping (period, scenario) to tsam results.
+            aggregation_results: Dict mapping key tuples to tsam results.
             actual_n_clusters: Number of clusters.
             n_time_points: Number of time points per cluster (timesteps or segments).
             cluster_coords: Cluster coordinate values.
             time_coords: Time coordinate values.
+            dim_names: Names of extra dimensions (e.g., ['period', 'scenario']).
             is_segmented: Whether segmentation was used.
 
         Returns:
-            Nested dict: {column_name: {(period, scenario): DataArray}}.
+            Dict mapping column names to combined DataArrays with dims [cluster, time, *dim_names].
         """
-        typical_das: dict[str, dict[tuple, xr.DataArray]] = {}
-        for key, tsam_result in tsam_aggregation_results.items():
+        # Collect DataArrays per column, with dims already expanded
+        column_results: dict[str, list[xr.DataArray]] = {}
+
+        for key, tsam_result in aggregation_results.items():
             typical_df = tsam_result.cluster_representatives
             if is_segmented:
-                # Segmented data: MultiIndex with cluster as first level
-                # Each cluster has exactly n_time_points rows (segments)
-                # Extract all data at once using numpy reshape, avoiding slow .loc calls
                 columns = typical_df.columns.tolist()
-
-                # Get all values as numpy array: (n_clusters * n_time_points, n_columns)
                 all_values = typical_df.values
-
-                # Reshape to (n_clusters, n_time_points, n_columns)
                 reshaped = all_values.reshape(actual_n_clusters, n_time_points, -1)
 
                 for col_idx, col in enumerate(columns):
-                    # reshaped[:, :, col_idx] selects all clusters, all time points, single column
-                    # Result shape: (n_clusters, n_time_points)
-                    typical_das.setdefault(col, {})[key] = xr.DataArray(
+                    da = xr.DataArray(
                         reshaped[:, :, col_idx],
                         dims=['cluster', 'time'],
                         coords={'cluster': cluster_coords, 'time': time_coords},
                     )
+                    for dim_name, coord_val in zip(dim_names, key, strict=False):
+                        da = da.expand_dims({dim_name: [coord_val]})
+                    column_results.setdefault(col, []).append(da)
             else:
-                # Non-segmented: flat data that can be reshaped
                 for col in typical_df.columns:
                     flat_data = typical_df[col].values
                     reshaped = flat_data.reshape(actual_n_clusters, n_time_points)
-                    typical_das.setdefault(col, {})[key] = xr.DataArray(
+                    da = xr.DataArray(
                         reshaped,
                         dims=['cluster', 'time'],
                         coords={'cluster': cluster_coords, 'time': time_coords},
                     )
+                    for dim_name, coord_val in zip(dim_names, key, strict=False):
+                        da = da.expand_dims({dim_name: [coord_val]})
+                    column_results.setdefault(col, []).append(da)
+
+        # Combine each column's DataArrays
+        typical_das: dict[str, xr.DataArray] = {}
+        for col, das in column_results.items():
+            if len(das) == 1:
+                typical_das[col] = das[0]
+            else:
+                combined = xr.combine_by_coords(das)
+                if isinstance(combined, xr.Dataset):
+                    combined = list(combined.data_vars.values())[0]
+                typical_das[col] = combined.transpose('cluster', 'time', *dim_names)
+
         return typical_das
 
     def _build_segment_durations_da(
@@ -445,20 +456,12 @@ def clean_key(p, s) -> tuple:
 
         # Build typical periods DataArrays with (cluster, time) shape
         typical_das = self._build_typical_das(
-            tsam_aggregation_results, actual_n_clusters, n_time_points, cluster_coords, time_coords, is_segmented
+            aggregation_results, actual_n_clusters, n_time_points, cluster_coords, time_coords, dim_names, is_segmented
         )
 
         # Build reduced dataset with (cluster, time) dimensions
         ds_new = self._build_reduced_dataset(
-            ds,
-            typical_das,
-            actual_n_clusters,
-            n_reduced_timesteps,
-            n_time_points,
-            cluster_coords,
-            time_coords,
-            periods,
-            scenarios,
+            ds, typical_das, actual_n_clusters, n_reduced_timesteps, n_time_points, cluster_coords, time_coords
         )
 
         # For segmented systems, build timestep_duration from segment_durations
@@ -493,27 +496,23 @@ def clean_key(p, s) -> tuple:
     def _build_reduced_dataset(
         self,
         ds: xr.Dataset,
-        typical_das: dict[str, dict[tuple, xr.DataArray]],
+        typical_das: dict[str, xr.DataArray],
         actual_n_clusters: int,
         n_reduced_timesteps: int,
         n_time_points: int,
         cluster_coords: np.ndarray,
         time_coords: pd.DatetimeIndex | pd.RangeIndex,
-        periods: list,
-        scenarios: list,
     ) -> xr.Dataset:
         """Build the reduced dataset with (cluster, time) structure.
 
         Args:
             ds: Original dataset.
-            typical_das: Typical periods DataArrays from _build_typical_das().
+            typical_das: Pre-combined DataArrays from _build_typical_das().
             actual_n_clusters: Number of clusters.
             n_reduced_timesteps: Total reduced timesteps (n_clusters * n_time_points).
             n_time_points: Number of time points per cluster (timesteps or segments).
             cluster_coords: Cluster coordinate values.
             time_coords: Time coordinate values.
-            periods: List of period labels.
-            scenarios: List of scenario labels.
 
         Returns:
             Dataset with reduced timesteps and (cluster, time) structure.
@@ -521,27 +520,17 @@ def _build_reduced_dataset(
         from .core import TimeSeriesData
 
         ds_new_vars = {}
-
-        # Use ds.variables to avoid _construct_dataarray overhead
         variables = ds.variables
         coord_cache = {k: ds.coords[k].values for k in ds.coords}
 
-        # Determine dimension order for combining
-        dim_order = ['cluster', 'time']
-        if periods != [None]:
-            dim_order.append('period')
-        if scenarios != [None]:
-            dim_order.append('scenario')
-
         for name in ds.data_vars:
             var = variables[name]
             if 'time' not in var.dims:
-                # No time dimension - wrap Variable in DataArray
+                # No time dimension - copy as-is
                 coords = {d: coord_cache[d] for d in var.dims if d in coord_cache}
                 ds_new_vars[name] = xr.DataArray(var.values, dims=var.dims, coords=coords, attrs=var.attrs, name=name)
             elif name not in typical_das:
                 # Time-dependent but constant: reshape to (cluster, time, ...)
-                # Use numpy slicing instead of .isel()
                 time_idx = var.dims.index('time')
                 slices = [slice(None)] * len(var.dims)
                 slices[time_idx] = slice(0, n_reduced_timesteps)
@@ -561,90 +550,12 @@ def _build_reduced_dataset(
                     coords=new_coords,
                     attrs=var.attrs,
                 )
-            elif set(typical_das[name].keys()) != {(p, s) for p in periods for s in scenarios}:
-                # Partial typical slices: fill missing keys with constant values
-                # For multi-period/scenario data, we need to select the right slice for each key
-
-                # Exclude 'period' and 'scenario' - they're handled by combine
-                other_dims = [d for d in var.dims if d not in ('time', 'period', 'scenario')]
-                other_shape = [var.sizes[d] for d in other_dims]
-                new_shape = [actual_n_clusters, n_time_points] + other_shape
-
-                new_coords = {'cluster': cluster_coords, 'time': time_coords}
-                for dim in other_dims:
-                    if dim in coord_cache:
-                        new_coords[dim] = coord_cache[dim]
-
-                # Build slices: use typical where available, constant otherwise
-                results = []
-                for p in periods:
-                    for s in scenarios:
-                        key = (p, s)
-                        if key in typical_das[name]:
-                            da = typical_das[name][key]
-                        else:
-                            # Select the specific period/scenario slice, then reshape
-                            selector = {}
-                            if p is not None and 'period' in var.dims:
-                                selector['period'] = p
-                            if s is not None and 'scenario' in var.dims:
-                                selector['scenario'] = s
-
-                            # Select per-key slice if needed, otherwise use full variable
-                            var_slice = ds[name].sel(**selector, drop=True) if selector else ds[name]
-
-                            # Now slice time and reshape
-                            time_idx = var_slice.dims.index('time')
-                            slices_list = [slice(None)] * len(var_slice.dims)
-                            slices_list[time_idx] = slice(0, n_reduced_timesteps)
-                            sliced_values = var_slice.values[tuple(slices_list)]
-                            reshaped_constant = sliced_values.reshape(new_shape)
-
-                            da = xr.DataArray(
-                                reshaped_constant,
-                                dims=['cluster', 'time'] + other_dims,
-                                coords=new_coords,
-                            )
-
-                        results.append(add_slice_dims(da, period=p, scenario=s))
-
-                if len(results) == 1:
-                    combined = results[0]
-                else:
-                    combined = xr.combine_by_coords(results)
-                    # combine_by_coords returns Dataset when DataArrays have names
-                    if isinstance(combined, xr.Dataset):
-                        combined = combined[name] if name in combined else list(combined.data_vars.values())[0]
-                    # Add other_dims to dim_order for transpose
-                    full_dim_order = dim_order + other_dims
-                    combined = combined.transpose(*full_dim_order)
-
-                combined = combined.assign_attrs(var.attrs)
-                if var.attrs.get('__timeseries_data__', False):
-                    combined = TimeSeriesData.from_dataarray(combined)
-                ds_new_vars[name] = combined
             else:
-                # Time-varying: combine per-(period, scenario) slices
-                results = []
-                for (p, s), da in typical_das[name].items():
-                    results.append(add_slice_dims(da, period=p, scenario=s))
-
-                if len(results) == 1:
-                    combined = results[0]
-                else:
-                    combined = xr.combine_by_coords(results)
-                    # combine_by_coords returns Dataset when DataArrays have names
-                    if isinstance(combined, xr.Dataset):
-                        combined = combined[name] if name in combined else list(combined.data_vars.values())[0]
-                    # Add other dims from the first result for proper ordering
-                    other_dims = [d for d in results[0].dims if d not in ('cluster', 'time', 'period', 'scenario')]
-                    full_dim_order = dim_order + other_dims
-                    combined = combined.transpose(*full_dim_order)
-
-                combined = combined.assign_attrs(var.attrs)
+                # Time-varying: use pre-combined DataArray from typical_das
+                da = typical_das[name].assign_attrs(var.attrs)
                 if var.attrs.get('__timeseries_data__', False):
-                    combined = TimeSeriesData.from_dataarray(combined)
-                ds_new_vars[name] = combined
+                    da = TimeSeriesData.from_dataarray(da)
+                ds_new_vars[name] = da
 
         # Copy attrs but remove cluster_weight
         new_attrs = dict(ds.attrs)

From 4e33675903d32609cafe5ef37ff7268dad6708bc Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 22:14:36 +0100
Subject: [PATCH 06/15] The refactoring is complete. Here's a summary of the
 changes:

  Changes made:

  1. cluster() method:
    - Added dim_names list and to_clean_key() helper at the start
    - Dicts now use clean keys like (2024,) or (2024, 'high') instead of (2024, None) or (None, 'high')
    - Removed duplicate dim_names building and to_cr_key() function from the data_vars branch
    - Simplified data_vars branch since ClusteringResults can receive the dicts directly
    - Updated call to _build_reduced_flow_system to use new parameter names (aggregation_results, occurrences, metrics, dim_names)
  2. apply_clustering() method:
    - Uses dim_names directly from clustering.results.dim_names
    - Removed the key conversion logic (full_key = (cr_key[0], None) etc.)
    - Updated call to _build_reduced_flow_system with new parameter names

  The code is now cleaner with consistent use of clean keys throughout the clustering workflow. All 141 tests pass (98 in test_cluster_reduce_expand.py + 43 in
  test_clustering_io.py).
---
 flixopt/clustering/base.py    |  16 +-
 flixopt/transform_accessor.py | 323 +++++++++++++---------------------
 2 files changed, 134 insertions(+), 205 deletions(-)

diff --git a/flixopt/clustering/base.py b/flixopt/clustering/base.py
index 49d6594a7..3dd11cc43 100644
--- a/flixopt/clustering/base.py
+++ b/flixopt/clustering/base.py
@@ -482,19 +482,17 @@ def _build_property_array(
         name: str | None = None,
     ) -> xr.DataArray:
         """Build a DataArray property, handling both single and multi-dimensional cases."""
-        results = []
+        slices = []
         for key, cr in self._results.items():
-            data = get_data(cr)
-            da = xr.DataArray(data, dims=base_dims, coords=base_coords or {}, name=name)
-            # Add dims from key: key[i] corresponds to dim_names[i]
-            for dim_name, coord_val in zip(self._dim_names, key, strict=False):
+            da = xr.DataArray(get_data(cr), dims=base_dims, coords=base_coords or {}, name=name)
+            for dim_name, coord_val in zip(self._dim_names, key, strict=True):
                 da = da.expand_dims({dim_name: [coord_val]})
-            results.append(da)
+            slices.append(da)
 
-        if len(results) == 1:
-            return results[0]
+        if len(slices) == 1:
+            return slices[0]
 
-        combined = xr.combine_by_coords(results)
+        combined = xr.combine_by_coords(slices)
         if isinstance(combined, xr.Dataset):
             combined = combined[name]
         return combined.transpose(*base_dims, *self._dim_names)
diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index f6eaf76a1..4e6f79d20 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -28,6 +28,55 @@
 logger = logging.getLogger('flixopt')
 
 
+def _combine_dataarray_slices(
+    slices: list[xr.DataArray],
+    base_dims: list[str],
+    extra_dims: list[str],
+    name: str | None = None,
+) -> xr.DataArray:
+    """Combine DataArray slices with extra dimensions into a single DataArray.
+
+    Args:
+        slices: List of DataArrays, each with extra dims already expanded.
+        base_dims: Base dimension names (e.g., ['cluster', 'time']).
+        extra_dims: Extra dimension names (e.g., ['period', 'scenario']).
+        name: Optional name for the result.
+
+    Returns:
+        Combined DataArray with dims [*base_dims, *extra_dims].
+    """
+    if len(slices) == 1:
+        result = slices[0]
+    else:
+        combined = xr.combine_by_coords(slices)
+        # combine_by_coords returns Dataset when DataArrays have names
+        if isinstance(combined, xr.Dataset):
+            result = list(combined.data_vars.values())[0]
+        else:
+            result = combined
+        result = result.transpose(*base_dims, *extra_dims)
+
+    if name is not None:
+        result = result.rename(name)
+    return result
+
+
+def _expand_dims_for_key(da: xr.DataArray, dim_names: list[str], key: tuple) -> xr.DataArray:
+    """Add dimensions to a DataArray based on key values.
+
+    Args:
+        da: DataArray without extra dimensions.
+        dim_names: Names of dimensions to add (e.g., ['period', 'scenario']).
+        key: Tuple of coordinate values matching dim_names.
+
+    Returns:
+        DataArray with extra dimensions added.
+    """
+    for dim_name, coord_val in zip(dim_names, key, strict=True):
+        da = da.expand_dims({dim_name: [coord_val]})
+    return da
+
+
 class TransformAccessor:
     """
     Accessor for transformation methods on FlowSystem.
@@ -157,20 +206,13 @@ def _build_cluster_weight_da(
         Returns:
             DataArray with dims [cluster, *dim_names].
         """
-        results = []
+        slices = []
         for key, occurrences in cluster_occurrences_all.items():
             weights = np.array([occurrences.get(c, 0) for c in range(n_clusters)])
             da = xr.DataArray(weights, dims=['cluster'], coords={'cluster': cluster_coords})
-            for dim_name, coord_val in zip(dim_names, key, strict=False):
-                da = da.expand_dims({dim_name: [coord_val]})
-            results.append(da)
+            slices.append(_expand_dims_for_key(da, dim_names, key))
 
-        if len(results) == 1:
-            return results[0].rename('cluster_weight')
-        combined = xr.combine_by_coords(results)
-        if isinstance(combined, xr.Dataset):
-            combined = list(combined.data_vars.values())[0]
-        return combined.transpose('cluster', *dim_names).rename('cluster_weight')
+        return _combine_dataarray_slices(slices, ['cluster'], dim_names, name='cluster_weight')
 
     def _build_typical_das(
         self,
@@ -196,50 +238,27 @@ def _build_typical_das(
         Returns:
             Dict mapping column names to combined DataArrays with dims [cluster, time, *dim_names].
         """
-        # Collect DataArrays per column, with dims already expanded
-        column_results: dict[str, list[xr.DataArray]] = {}
+        column_slices: dict[str, list[xr.DataArray]] = {}
+        base_coords = {'cluster': cluster_coords, 'time': time_coords}
 
         for key, tsam_result in aggregation_results.items():
             typical_df = tsam_result.cluster_representatives
             if is_segmented:
                 columns = typical_df.columns.tolist()
-                all_values = typical_df.values
-                reshaped = all_values.reshape(actual_n_clusters, n_time_points, -1)
-
+                reshaped = typical_df.values.reshape(actual_n_clusters, n_time_points, -1)
                 for col_idx, col in enumerate(columns):
-                    da = xr.DataArray(
-                        reshaped[:, :, col_idx],
-                        dims=['cluster', 'time'],
-                        coords={'cluster': cluster_coords, 'time': time_coords},
-                    )
-                    for dim_name, coord_val in zip(dim_names, key, strict=False):
-                        da = da.expand_dims({dim_name: [coord_val]})
-                    column_results.setdefault(col, []).append(da)
+                    da = xr.DataArray(reshaped[:, :, col_idx], dims=['cluster', 'time'], coords=base_coords)
+                    column_slices.setdefault(col, []).append(_expand_dims_for_key(da, dim_names, key))
             else:
                 for col in typical_df.columns:
-                    flat_data = typical_df[col].values
-                    reshaped = flat_data.reshape(actual_n_clusters, n_time_points)
-                    da = xr.DataArray(
-                        reshaped,
-                        dims=['cluster', 'time'],
-                        coords={'cluster': cluster_coords, 'time': time_coords},
-                    )
-                    for dim_name, coord_val in zip(dim_names, key, strict=False):
-                        da = da.expand_dims({dim_name: [coord_val]})
-                    column_results.setdefault(col, []).append(da)
-
-        # Combine each column's DataArrays
-        typical_das: dict[str, xr.DataArray] = {}
-        for col, das in column_results.items():
-            if len(das) == 1:
-                typical_das[col] = das[0]
-            else:
-                combined = xr.combine_by_coords(das)
-                if isinstance(combined, xr.Dataset):
-                    combined = list(combined.data_vars.values())[0]
-                typical_das[col] = combined.transpose('cluster', 'time', *dim_names)
+                    reshaped = typical_df[col].values.reshape(actual_n_clusters, n_time_points)
+                    da = xr.DataArray(reshaped, dims=['cluster', 'time'], coords=base_coords)
+                    column_slices.setdefault(col, []).append(_expand_dims_for_key(da, dim_names, key))
 
-        return typical_das
+        return {
+            col: _combine_dataarray_slices(slices, ['cluster', 'time'], dim_names)
+            for col, slices in column_slices.items()
+        }
 
     def _build_segment_durations_da(
         self,
@@ -265,26 +284,15 @@ def _build_segment_durations_da(
         Returns:
             DataArray with dims [cluster, time, *dim_names].
         """
-        results = []
+        slices = []
+        base_coords = {'cluster': cluster_coords, 'time': time_coords}
         for key, tsam_result in aggregation_results.items():
             seg_durs = tsam_result.segment_durations
-            data = np.zeros((actual_n_clusters, n_segments))
-            for cluster_id in range(actual_n_clusters):
-                cluster_seg_durs = seg_durs[cluster_id]
-                for seg_id in range(n_segments):
-                    data[cluster_id, seg_id] = cluster_seg_durs[seg_id] * dt
-
-            da = xr.DataArray(data, dims=['cluster', 'time'], coords={'cluster': cluster_coords, 'time': time_coords})
-            for dim_name, coord_val in zip(dim_names, key, strict=False):
-                da = da.expand_dims({dim_name: [coord_val]})
-            results.append(da)
-
-        if len(results) == 1:
-            return results[0].rename('timestep_duration')
-        combined = xr.combine_by_coords(results)
-        if isinstance(combined, xr.Dataset):
-            combined = list(combined.data_vars.values())[0]
-        return combined.transpose('cluster', 'time', *dim_names).rename('timestep_duration')
+            data = np.array([[seg_durs[c][s] * dt for s in range(n_segments)] for c in range(actual_n_clusters)])
+            da = xr.DataArray(data, dims=['cluster', 'time'], coords=base_coords)
+            slices.append(_expand_dims_for_key(da, dim_names, key))
+
+        return _combine_dataarray_slices(slices, ['cluster', 'time'], dim_names, name='timestep_duration')
 
     def _build_clustering_metrics(
         self,
@@ -321,48 +329,27 @@ def _build_clustering_metrics(
 
         # Multi-dim case
         sample_df = next(iter(non_empty_metrics.values()))
-        metric_names = list(sample_df.columns)
         data_vars = {}
 
-        for metric in metric_names:
-            results = []
+        for metric in sample_df.columns:
+            slices = []
             for key, df in clustering_metrics_all.items():
-                if df.empty:
-                    da = xr.DataArray(
-                        np.full(len(sample_df.index), np.nan),
-                        dims=['time_series'],
-                        coords={'time_series': list(sample_df.index)},
-                    )
-                else:
-                    da = xr.DataArray(
-                        df[metric].values,
-                        dims=['time_series'],
-                        coords={'time_series': list(df.index)},
-                    )
-                for dim_name, coord_val in zip(dim_names, key, strict=False):
-                    da = da.expand_dims({dim_name: [coord_val]})
-                results.append(da)
-
-            if len(results) == 1:
-                combined = results[0]
-            else:
-                combined = xr.combine_by_coords(results)
-                if isinstance(combined, xr.Dataset):
-                    combined = list(combined.data_vars.values())[0]
-            data_vars[metric] = combined.transpose('time_series', *dim_names).rename(metric)
+                values = np.full(len(sample_df.index), np.nan) if df.empty else df[metric].values
+                da = xr.DataArray(values, dims=['time_series'], coords={'time_series': list(sample_df.index)})
+                slices.append(_expand_dims_for_key(da, dim_names, key))
+            data_vars[metric] = _combine_dataarray_slices(slices, ['time_series'], dim_names, name=metric)
 
         return xr.Dataset(data_vars)
 
     def _build_reduced_flow_system(
         self,
         ds: xr.Dataset,
-        tsam_aggregation_results: dict[tuple, Any],
-        cluster_occurrences_all: dict[tuple, dict],
-        clustering_metrics_all: dict[tuple, pd.DataFrame],
+        aggregation_results: dict[tuple, Any],
+        occurrences: dict[tuple, dict],
+        metrics: dict[tuple, pd.DataFrame],
         timesteps_per_cluster: int,
         dt: float,
-        periods: list,
-        scenarios: list,
+        dim_names: list[str],
         n_clusters_requested: int | None = None,
     ) -> FlowSystem:
         """Build a reduced FlowSystem from tsam aggregation results.
@@ -371,13 +358,12 @@ def _build_reduced_flow_system(
 
         Args:
             ds: Original dataset.
-            tsam_aggregation_results: Dict mapping (period, scenario) to tsam AggregationResult.
-            cluster_occurrences_all: Dict mapping (period, scenario) to cluster occurrence counts.
-            clustering_metrics_all: Dict mapping (period, scenario) to accuracy metrics.
+            aggregation_results: Dict mapping key tuples to tsam AggregationResult.
+            occurrences: Dict mapping key tuples to cluster occurrence counts.
+            metrics: Dict mapping key tuples to accuracy metrics.
             timesteps_per_cluster: Number of timesteps per cluster.
             dt: Hours per timestep.
-            periods: List of period labels ([None] if no periods).
-            scenarios: List of scenario labels ([None] if no scenarios).
+            dim_names: Names of extra dimensions (e.g., ['period', 'scenario']).
             n_clusters_requested: Requested number of clusters (for logging). None to skip.
 
         Returns:
@@ -387,33 +373,7 @@ def _build_reduced_flow_system(
         from .core import drop_constant_arrays
         from .flow_system import FlowSystem
 
-        has_periods = periods != [None]
-        has_scenarios = scenarios != [None]
-
-        # Build dim_names for Clustering
-        dim_names = []
-        if has_periods:
-            dim_names.append('period')
-        if has_scenarios:
-            dim_names.append('scenario')
-
-        # Helper to convert (p, s) keys to clean keys (without None)
-        def clean_key(p, s) -> tuple:
-            parts = []
-            if has_periods:
-                parts.append(p)
-            if has_scenarios:
-                parts.append(s)
-            return tuple(parts)
-
-        # Convert all dicts to use clean keys
-        aggregation_results = {clean_key(p, s): v for (p, s), v in tsam_aggregation_results.items()}
-        occurrences = {clean_key(p, s): v for (p, s), v in cluster_occurrences_all.items()}
-        metrics = {clean_key(p, s): v for (p, s), v in clustering_metrics_all.items()}
-
-        # Use first result for structure
-        first_key = (periods[0], scenarios[0])
-        first_tsam = tsam_aggregation_results[first_key]
+        first_tsam = next(iter(aggregation_results.values()))
 
         # Build metrics
         clustering_metrics = self._build_clustering_metrics(metrics, dim_names)
@@ -1350,6 +1310,22 @@ def cluster(
                     f'with extreme periods while maintaining the requested n_clusters.'
                 )
 
+        # Build dim_names and clean key helper
+        dim_names: list[str] = []
+        if has_periods:
+            dim_names.append('period')
+        if has_scenarios:
+            dim_names.append('scenario')
+
+        def to_clean_key(period_label, scenario_label) -> tuple:
+            """Convert (period, scenario) to clean key based on which dims exist."""
+            key_parts = []
+            if has_periods:
+                key_parts.append(period_label)
+            if has_scenarios:
+                key_parts.append(scenario_label)
+            return tuple(key_parts)
+
         # Cluster each (period, scenario) combination using tsam directly
         tsam_aggregation_results: dict[tuple, Any] = {}  # AggregationResult objects
         tsam_clustering_results: dict[tuple, Any] = {}  # ClusteringResult objects for persistence
@@ -1361,7 +1337,7 @@ def cluster(
 
         for period_label in periods:
             for scenario_label in scenarios:
-                key = (period_label, scenario_label)
+                key = to_clean_key(period_label, scenario_label)
                 selector = {k: v for k, v in [('period', period_label), ('scenario', scenario_label)] if v is not None}
 
                 # Select data for clustering (may be subset if data_vars specified)
@@ -1423,55 +1399,26 @@ def cluster(
 
         # If data_vars was specified, apply clustering to FULL data
         if data_vars is not None:
-            # Build dim_names for ClusteringResults
-            dim_names = []
-            if has_periods:
-                dim_names.append('period')
-            if has_scenarios:
-                dim_names.append('scenario')
-
-            # Convert (period, scenario) keys to ClusteringResults format
-            def to_cr_key(p, s):
-                key_parts = []
-                if has_periods:
-                    key_parts.append(p)
-                if has_scenarios:
-                    key_parts.append(s)
-                return tuple(key_parts)
-
-            # Build ClusteringResults from subset clustering
-            clustering_results = ClusteringResults(
-                {to_cr_key(p, s): cr for (p, s), cr in tsam_clustering_results.items()},
-                dim_names,
-            )
+            # Build ClusteringResults from subset clustering (already using clean keys)
+            clustering_results = ClusteringResults(tsam_clustering_results, dim_names)
 
             # Apply to full data - this returns AggregationResults
             agg_results = clustering_results.apply(ds)
 
-            # Update tsam_aggregation_results with full data results
-            for cr_key, result in agg_results:
-                # Convert back to (period, scenario) format
-                if has_periods and has_scenarios:
-                    full_key = (cr_key[0], cr_key[1])
-                elif has_periods:
-                    full_key = (cr_key[0], None)
-                elif has_scenarios:
-                    full_key = (None, cr_key[0])
-                else:
-                    full_key = (None, None)
-                tsam_aggregation_results[full_key] = result
-                cluster_occurrences_all[full_key] = result.cluster_weights
+            # Update results with full data (keys already match)
+            for key, result in agg_results:
+                tsam_aggregation_results[key] = result
+                cluster_occurrences_all[key] = result.cluster_weights
 
         # Build and return the reduced FlowSystem
         return self._build_reduced_flow_system(
             ds=ds,
-            tsam_aggregation_results=tsam_aggregation_results,
-            cluster_occurrences_all=cluster_occurrences_all,
-            clustering_metrics_all=clustering_metrics_all,
+            aggregation_results=tsam_aggregation_results,
+            occurrences=cluster_occurrences_all,
+            metrics=clustering_metrics_all,
             timesteps_per_cluster=timesteps_per_cluster,
             dt=dt,
-            periods=periods,
-            scenarios=scenarios,
+            dim_names=dim_names,
             n_clusters_requested=n_clusters,
         )
 
@@ -1519,12 +1466,7 @@ def apply_clustering(
 
         # Get timesteps_per_cluster from the clustering object (survives serialization)
         timesteps_per_cluster = clustering.timesteps_per_cluster
-        has_periods = self._fs.periods is not None
-        has_scenarios = self._fs.scenarios is not None
-
-        # Determine iteration dimensions
-        periods = list(self._fs.periods) if has_periods else [None]
-        scenarios = list(self._fs.scenarios) if has_scenarios else [None]
+        dim_names = clustering.results.dim_names
 
         ds = self._fs.to_dataset(include_solution=False)
 
@@ -1547,39 +1489,28 @@ def apply_clustering(
             agg_results = clustering.results.apply(ds)
 
         # Convert AggregationResults to the dict format expected by _build_reduced_flow_system
-        tsam_aggregation_results: dict[tuple, Any] = {}
-        cluster_occurrences_all: dict[tuple, dict] = {}
-        clustering_metrics_all: dict[tuple, pd.DataFrame] = {}
-
-        for cr_key, result in agg_results:
-            # Convert ClusteringResults key to (period, scenario) format
-            if has_periods and has_scenarios:
-                full_key = (cr_key[0], cr_key[1])
-            elif has_periods:
-                full_key = (cr_key[0], None)
-            elif has_scenarios:
-                full_key = (None, cr_key[0])
-            else:
-                full_key = (None, None)
+        aggregation_results: dict[tuple, Any] = {}
+        occurrences: dict[tuple, dict] = {}
+        metrics: dict[tuple, pd.DataFrame] = {}
 
-            tsam_aggregation_results[full_key] = result
-            cluster_occurrences_all[full_key] = result.cluster_weights
+        for key, result in agg_results:
+            aggregation_results[key] = result
+            occurrences[key] = result.cluster_weights
             try:
-                clustering_metrics_all[full_key] = self._accuracy_to_dataframe(result.accuracy)
+                metrics[key] = self._accuracy_to_dataframe(result.accuracy)
             except Exception as e:
-                logger.warning(f'Failed to compute clustering metrics for {full_key}: {e}')
-                clustering_metrics_all[full_key] = pd.DataFrame()
+                logger.warning(f'Failed to compute clustering metrics for {key}: {e}')
+                metrics[key] = pd.DataFrame()
 
         # Build and return the reduced FlowSystem
         return self._build_reduced_flow_system(
             ds=ds,
-            tsam_aggregation_results=tsam_aggregation_results,
-            cluster_occurrences_all=cluster_occurrences_all,
-            clustering_metrics_all=clustering_metrics_all,
+            aggregation_results=aggregation_results,
+            occurrences=occurrences,
+            metrics=metrics,
             timesteps_per_cluster=timesteps_per_cluster,
             dt=dt,
-            periods=periods,
-            scenarios=scenarios,
+            dim_names=dim_names,
         )
 
     def _validate_for_expansion(self) -> Clustering:

From 62f534d2b332da84017cf9c72c415eb6b95d3b92 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 22:34:22 +0100
Subject: [PATCH 07/15] udpate

---
 flixopt/transform_accessor.py | 88 ++++++++++++++---------------------
 1 file changed, 34 insertions(+), 54 deletions(-)

diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index 4e6f79d20..d5a6a62c8 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -190,15 +190,15 @@ def _accuracy_to_dataframe(accuracy) -> pd.DataFrame:
 
     def _build_cluster_weight_da(
         self,
-        cluster_occurrences_all: dict[tuple, dict],
+        aggregation_results: dict[tuple, Any],
         n_clusters: int,
         cluster_coords: np.ndarray,
         dim_names: list[str],
     ) -> xr.DataArray:
-        """Build cluster_weight DataArray from occurrence counts.
+        """Build cluster_weight DataArray from aggregation results.
 
         Args:
-            cluster_occurrences_all: Dict mapping key tuples to {cluster_id: count}.
+            aggregation_results: Dict mapping key tuples to tsam AggregationResult.
             n_clusters: Number of clusters.
             cluster_coords: Cluster coordinate values.
             dim_names: Names of extra dimensions (e.g., ['period', 'scenario']).
@@ -207,8 +207,8 @@ def _build_cluster_weight_da(
             DataArray with dims [cluster, *dim_names].
         """
         slices = []
-        for key, occurrences in cluster_occurrences_all.items():
-            weights = np.array([occurrences.get(c, 0) for c in range(n_clusters)])
+        for key, result in aggregation_results.items():
+            weights = np.array([result.cluster_weights.get(c, 0) for c in range(n_clusters)])
             da = xr.DataArray(weights, dims=['cluster'], coords={'cluster': cluster_coords})
             slices.append(_expand_dims_for_key(da, dim_names, key))
 
@@ -296,25 +296,34 @@ def _build_segment_durations_da(
 
     def _build_clustering_metrics(
         self,
-        clustering_metrics_all: dict[tuple, pd.DataFrame],
+        aggregation_results: dict[tuple, Any],
         dim_names: list[str],
     ) -> xr.Dataset:
-        """Build clustering metrics Dataset from per-slice DataFrames.
+        """Build clustering metrics Dataset from aggregation results.
 
         Args:
-            clustering_metrics_all: Dict mapping key tuples to metric DataFrames.
+            aggregation_results: Dict mapping key tuples to tsam AggregationResult.
             dim_names: Names of extra dimensions (e.g., ['period', 'scenario']).
 
         Returns:
             Dataset with RMSE, MAE, RMSE_duration metrics.
         """
-        non_empty_metrics = {k: v for k, v in clustering_metrics_all.items() if not v.empty}
+        # Convert accuracy to DataFrames, filtering out failures
+        metrics_dfs: dict[tuple, pd.DataFrame] = {}
+        for key, result in aggregation_results.items():
+            try:
+                metrics_dfs[key] = self._accuracy_to_dataframe(result.accuracy)
+            except Exception as e:
+                logger.warning(f'Failed to compute clustering metrics for {key}: {e}')
+                metrics_dfs[key] = pd.DataFrame()
+
+        non_empty_metrics = {k: v for k, v in metrics_dfs.items() if not v.empty}
 
         if not non_empty_metrics:
             return xr.Dataset()
 
         # Single slice case
-        if len(clustering_metrics_all) == 1 and len(non_empty_metrics) == 1:
+        if len(metrics_dfs) == 1 and len(non_empty_metrics) == 1:
             metrics_df = next(iter(non_empty_metrics.values()))
             return xr.Dataset(
                 {
@@ -333,7 +342,7 @@ def _build_clustering_metrics(
 
         for metric in sample_df.columns:
             slices = []
-            for key, df in clustering_metrics_all.items():
+            for key, df in metrics_dfs.items():
                 values = np.full(len(sample_df.index), np.nan) if df.empty else df[metric].values
                 da = xr.DataArray(values, dims=['time_series'], coords={'time_series': list(sample_df.index)})
                 slices.append(_expand_dims_for_key(da, dim_names, key))
@@ -345,8 +354,6 @@ def _build_reduced_flow_system(
         self,
         ds: xr.Dataset,
         aggregation_results: dict[tuple, Any],
-        occurrences: dict[tuple, dict],
-        metrics: dict[tuple, pd.DataFrame],
         timesteps_per_cluster: int,
         dt: float,
         dim_names: list[str],
@@ -359,8 +366,6 @@ def _build_reduced_flow_system(
         Args:
             ds: Original dataset.
             aggregation_results: Dict mapping key tuples to tsam AggregationResult.
-            occurrences: Dict mapping key tuples to cluster occurrence counts.
-            metrics: Dict mapping key tuples to accuracy metrics.
             timesteps_per_cluster: Number of timesteps per cluster.
             dt: Hours per timestep.
             dim_names: Names of extra dimensions (e.g., ['period', 'scenario']).
@@ -375,8 +380,8 @@ def _build_reduced_flow_system(
 
         first_tsam = next(iter(aggregation_results.values()))
 
-        # Build metrics
-        clustering_metrics = self._build_clustering_metrics(metrics, dim_names)
+        # Build metrics from aggregation_results
+        clustering_metrics = self._build_clustering_metrics(aggregation_results, dim_names)
 
         n_reduced_timesteps = len(first_tsam.cluster_representatives)
         actual_n_clusters = len(first_tsam.cluster_weights)
@@ -401,8 +406,10 @@ def _build_reduced_flow_system(
                 name='time',
             )
 
-        # Build cluster_weight
-        cluster_weight = self._build_cluster_weight_da(occurrences, actual_n_clusters, cluster_coords, dim_names)
+        # Build cluster_weight from aggregation_results
+        cluster_weight = self._build_cluster_weight_da(
+            aggregation_results, actual_n_clusters, cluster_coords, dim_names
+        )
 
         # Logging
         if is_segmented:
@@ -1328,12 +1335,6 @@ def to_clean_key(period_label, scenario_label) -> tuple:
 
         # Cluster each (period, scenario) combination using tsam directly
         tsam_aggregation_results: dict[tuple, Any] = {}  # AggregationResult objects
-        tsam_clustering_results: dict[tuple, Any] = {}  # ClusteringResult objects for persistence
-        cluster_assignmentss: dict[tuple, np.ndarray] = {}
-        cluster_occurrences_all: dict[tuple, dict] = {}
-
-        # Collect metrics per (period, scenario) slice
-        clustering_metrics_all: dict[tuple, pd.DataFrame] = {}
 
         for period_label in periods:
             for scenario_label in scenarios:
@@ -1388,34 +1389,26 @@ def to_clean_key(period_label, scenario_label) -> tuple:
                     )
 
                 tsam_aggregation_results[key] = tsam_result
-                tsam_clustering_results[key] = tsam_result.clustering
-                cluster_assignmentss[key] = tsam_result.cluster_assignments
-                cluster_occurrences_all[key] = tsam_result.cluster_weights
-                try:
-                    clustering_metrics_all[key] = self._accuracy_to_dataframe(tsam_result.accuracy)
-                except Exception as e:
-                    logger.warning(f'Failed to compute clustering metrics for {key}: {e}')
-                    clustering_metrics_all[key] = pd.DataFrame()
 
         # If data_vars was specified, apply clustering to FULL data
         if data_vars is not None:
-            # Build ClusteringResults from subset clustering (already using clean keys)
-            clustering_results = ClusteringResults(tsam_clustering_results, dim_names)
+            # Build ClusteringResults from subset clustering (derive from aggregation results)
+            clustering_results = ClusteringResults(
+                {k: r.clustering for k, r in tsam_aggregation_results.items()},
+                dim_names,
+            )
 
             # Apply to full data - this returns AggregationResults
             agg_results = clustering_results.apply(ds)
 
-            # Update results with full data (keys already match)
+            # Update aggregation_results with full data results
             for key, result in agg_results:
                 tsam_aggregation_results[key] = result
-                cluster_occurrences_all[key] = result.cluster_weights
 
         # Build and return the reduced FlowSystem
         return self._build_reduced_flow_system(
             ds=ds,
             aggregation_results=tsam_aggregation_results,
-            occurrences=cluster_occurrences_all,
-            metrics=clustering_metrics_all,
             timesteps_per_cluster=timesteps_per_cluster,
             dt=dt,
             dim_names=dim_names,
@@ -1488,26 +1481,13 @@ def apply_clustering(
             warnings.filterwarnings('ignore', category=UserWarning, message='.*minimal value.*exceeds.*')
             agg_results = clustering.results.apply(ds)
 
-        # Convert AggregationResults to the dict format expected by _build_reduced_flow_system
-        aggregation_results: dict[tuple, Any] = {}
-        occurrences: dict[tuple, dict] = {}
-        metrics: dict[tuple, pd.DataFrame] = {}
-
-        for key, result in agg_results:
-            aggregation_results[key] = result
-            occurrences[key] = result.cluster_weights
-            try:
-                metrics[key] = self._accuracy_to_dataframe(result.accuracy)
-            except Exception as e:
-                logger.warning(f'Failed to compute clustering metrics for {key}: {e}')
-                metrics[key] = pd.DataFrame()
+        # Convert AggregationResults to dict format
+        aggregation_results = dict(agg_results)
 
         # Build and return the reduced FlowSystem
         return self._build_reduced_flow_system(
             ds=ds,
             aggregation_results=aggregation_results,
-            occurrences=occurrences,
-            metrics=metrics,
             timesteps_per_cluster=timesteps_per_cluster,
             dt=dt,
             dim_names=dim_names,

From 30774e6f93c074906cbf880b6efe466f6ba24575 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 22:55:37 +0100
Subject: [PATCH 08/15]   - Eliminated 3 redundant dicts (occurrences, metrics,
 tsam_clustering_results)   - Single aggregation_results dict remains
 (required for Clustering)

---
 flixopt/transform_accessor.py | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index d5a6a62c8..99fc606ee 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -1334,7 +1334,7 @@ def to_clean_key(period_label, scenario_label) -> tuple:
             return tuple(key_parts)
 
         # Cluster each (period, scenario) combination using tsam directly
-        tsam_aggregation_results: dict[tuple, Any] = {}  # AggregationResult objects
+        aggregation_results: dict[tuple, Any] = {}
 
         for period_label in periods:
             for scenario_label in scenarios:
@@ -1373,7 +1373,7 @@ def to_clean_key(period_label, scenario_label) -> tuple:
                     cluster_config = self._build_cluster_config_with_weights(cluster, filtered_weights)
 
                     # Perform clustering based on selected data_vars (or all if not specified)
-                    tsam_result = tsam.aggregate(
+                    aggregation_results[key] = tsam.aggregate(
                         df_for_clustering,
                         n_clusters=n_clusters,
                         period_duration=hours_per_cluster,
@@ -1388,27 +1388,20 @@ def to_clean_key(period_label, scenario_label) -> tuple:
                         **tsam_kwargs,
                     )
 
-                tsam_aggregation_results[key] = tsam_result
-
         # If data_vars was specified, apply clustering to FULL data
         if data_vars is not None:
-            # Build ClusteringResults from subset clustering (derive from aggregation results)
+            # Build ClusteringResults from subset clustering
             clustering_results = ClusteringResults(
-                {k: r.clustering for k, r in tsam_aggregation_results.items()},
+                {k: r.clustering for k, r in aggregation_results.items()},
                 dim_names,
             )
-
-            # Apply to full data - this returns AggregationResults
-            agg_results = clustering_results.apply(ds)
-
-            # Update aggregation_results with full data results
-            for key, result in agg_results:
-                tsam_aggregation_results[key] = result
+            # Apply to full data and replace results
+            aggregation_results = dict(clustering_results.apply(ds))
 
         # Build and return the reduced FlowSystem
         return self._build_reduced_flow_system(
             ds=ds,
-            aggregation_results=tsam_aggregation_results,
+            aggregation_results=aggregation_results,
             timesteps_per_cluster=timesteps_per_cluster,
             dt=dt,
             dim_names=dim_names,

From ab5cb3ca65ec0cffb5d6fa1c0bf707b937c79ff1 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 23:01:23 +0100
Subject: [PATCH 09/15]   1. clustering/base.py: _build_property_array now
 transposes for single-slice path   2. transform_accessor.py:
 _combine_dataarray_slices now transposes for both paths

---
 flixopt/clustering/base.py    | 14 ++++++++------
 flixopt/transform_accessor.py |  4 +++-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/flixopt/clustering/base.py b/flixopt/clustering/base.py
index 3dd11cc43..7082929c3 100644
--- a/flixopt/clustering/base.py
+++ b/flixopt/clustering/base.py
@@ -490,12 +490,14 @@ def _build_property_array(
             slices.append(da)
 
         if len(slices) == 1:
-            return slices[0]
-
-        combined = xr.combine_by_coords(slices)
-        if isinstance(combined, xr.Dataset):
-            combined = combined[name]
-        return combined.transpose(*base_dims, *self._dim_names)
+            result = slices[0]
+        else:
+            combined = xr.combine_by_coords(slices)
+            if isinstance(combined, xr.Dataset):
+                result = combined[name]
+            else:
+                result = combined
+        return result.transpose(*base_dims, *self._dim_names)
 
     @staticmethod
     def _key_to_str(key: tuple) -> str:
diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index 99fc606ee..565a29dc9 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -54,7 +54,9 @@ def _combine_dataarray_slices(
             result = list(combined.data_vars.values())[0]
         else:
             result = combined
-        result = result.transpose(*base_dims, *extra_dims)
+
+    # Ensure consistent dimension order for both single and multi-slice paths
+    result = result.transpose(*base_dims, *extra_dims)
 
     if name is not None:
         result = result.rename(name)

From e822d1928b49fc33aef174a1cb81ca064ecd8355 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 23:09:47 +0100
Subject: [PATCH 10/15] Basic fixes

---
 flixopt/core.py               |  4 +++-
 flixopt/transform_accessor.py | 22 ++++++++++++++++++----
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/flixopt/core.py b/flixopt/core.py
index ba8618e1a..aca380f5e 100644
--- a/flixopt/core.py
+++ b/flixopt/core.py
@@ -4,6 +4,7 @@
 """
 
 import logging
+import warnings
 from itertools import permutations
 from typing import Any, Literal
 
@@ -644,7 +645,8 @@ def drop_constant_arrays(
         axis = var.dims.index(dim)
         data = var.values
         # Use numpy operations directly for speed
-        with np.errstate(invalid='ignore'):  # Ignore NaN warnings
+        with warnings.catch_warnings():
+            warnings.filterwarnings('ignore', category=RuntimeWarning, message='All-NaN slice')
             ptp = np.nanmax(data, axis=axis) - np.nanmin(data, axis=axis)
         if np.all(ptp < atol):
             drop_vars.append(name)
diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index 565a29dc9..92e84846d 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -338,17 +338,31 @@ def _build_clustering_metrics(
                 }
             )
 
-        # Multi-dim case
+        # Multi-dim case - each period may have different time series
+        # Build union of all time series indices (different periods may exclude different constant arrays)
+        all_time_series = sorted(set().union(*(set(df.index) for df in non_empty_metrics.values())))
+
+        # Get all metric columns (should be consistent across periods)
         sample_df = next(iter(non_empty_metrics.values()))
         data_vars = {}
 
         for metric in sample_df.columns:
             slices = []
             for key, df in metrics_dfs.items():
-                values = np.full(len(sample_df.index), np.nan) if df.empty else df[metric].values
-                da = xr.DataArray(values, dims=['time_series'], coords={'time_series': list(sample_df.index)})
+                if df.empty:
+                    # Create NaN-filled array for empty DataFrames
+                    values = np.full(len(all_time_series), np.nan)
+                else:
+                    # Reindex to common index, filling missing with NaN
+                    values = df[metric].reindex(all_time_series).values
+                da = xr.DataArray(
+                    values,
+                    dims=['time_series'],
+                    coords={'time_series': all_time_series},
+                )
                 slices.append(_expand_dims_for_key(da, dim_names, key))
-            data_vars[metric] = _combine_dataarray_slices(slices, ['time_series'], dim_names, name=metric)
+            if slices:
+                data_vars[metric] = _combine_dataarray_slices(slices, ['time_series'], dim_names, name=metric)
 
         return xr.Dataset(data_vars)
 

From 8ac679c7e1da41c47faacb63b97d0d8f5d60806f Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 23:13:25 +0100
Subject: [PATCH 11/15] drop constant arrays before slicing.

---
 flixopt/transform_accessor.py | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index 92e84846d..44ad0b6ba 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -1299,6 +1299,18 @@ def cluster(
         else:
             ds_for_clustering = ds
 
+        # Filter constant arrays once on the full dataset (not per slice)
+        # This ensures all slices have the same variables for consistent metrics
+        ds_for_clustering = drop_constant_arrays(ds_for_clustering, dim='time')
+
+        # Guard against empty dataset after removing constant arrays
+        if not ds_for_clustering.data_vars:
+            filter_info = f'data_vars={data_vars}' if data_vars else 'all variables'
+            raise ValueError(
+                f'No time-varying data found for clustering ({filter_info}). '
+                f'All variables are constant over time. Check your data_vars filter or input data.'
+            )
+
         # Validate tsam_kwargs doesn't override explicit parameters
         reserved_tsam_keys = {
             'n_clusters',
@@ -1357,22 +1369,9 @@ def to_clean_key(period_label, scenario_label) -> tuple:
                 key = to_clean_key(period_label, scenario_label)
                 selector = {k: v for k, v in [('period', period_label), ('scenario', scenario_label)] if v is not None}
 
-                # Select data for clustering (may be subset if data_vars specified)
-                ds_slice_for_clustering = (
-                    ds_for_clustering.sel(**selector, drop=True) if selector else ds_for_clustering
-                )
-                temporaly_changing_ds_for_clustering = drop_constant_arrays(ds_slice_for_clustering, dim='time')
-
-                # Guard against empty dataset after removing constant arrays
-                if not temporaly_changing_ds_for_clustering.data_vars:
-                    filter_info = f'data_vars={data_vars}' if data_vars else 'all variables'
-                    selector_info = f', selector={selector}' if selector else ''
-                    raise ValueError(
-                        f'No time-varying data found for clustering ({filter_info}{selector_info}). '
-                        f'All variables are constant over time. Check your data_vars filter or input data.'
-                    )
-
-                df_for_clustering = temporaly_changing_ds_for_clustering.to_dataframe()
+                # Select data slice for clustering
+                ds_slice = ds_for_clustering.sel(**selector, drop=True) if selector else ds_for_clustering
+                df_for_clustering = ds_slice.to_dataframe()
 
                 if selector:
                     logger.info(f'Clustering {", ".join(f"{k}={v}" for k, v in selector.items())}...')
@@ -1382,7 +1381,7 @@ def to_clean_key(period_label, scenario_label) -> tuple:
                     warnings.filterwarnings('ignore', category=UserWarning, message='.*minimal value.*exceeds.*')
 
                     # Build ClusterConfig with auto-calculated weights
-                    clustering_weights = self._calculate_clustering_weights(temporaly_changing_ds_for_clustering)
+                    clustering_weights = self._calculate_clustering_weights(ds_slice)
                     filtered_weights = {
                         name: w for name, w in clustering_weights.items() if name in df_for_clustering.columns
                     }

From 3d9475c60edbb980bf942164cf4d03099fee2330 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 23:24:54 +0100
Subject: [PATCH 12/15] drop constant arrays before slicing.

---
 flixopt/transform_accessor.py | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index 44ad0b6ba..ae5606fbc 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -338,31 +338,18 @@ def _build_clustering_metrics(
                 }
             )
 
-        # Multi-dim case - each period may have different time series
-        # Build union of all time series indices (different periods may exclude different constant arrays)
-        all_time_series = sorted(set().union(*(set(df.index) for df in non_empty_metrics.values())))
-
-        # Get all metric columns (should be consistent across periods)
+        # Multi-dim case - all periods have same time series (constant arrays filtered on full dataset)
         sample_df = next(iter(non_empty_metrics.values()))
+        time_series_index = list(sample_df.index)
         data_vars = {}
 
         for metric in sample_df.columns:
             slices = []
             for key, df in metrics_dfs.items():
-                if df.empty:
-                    # Create NaN-filled array for empty DataFrames
-                    values = np.full(len(all_time_series), np.nan)
-                else:
-                    # Reindex to common index, filling missing with NaN
-                    values = df[metric].reindex(all_time_series).values
-                da = xr.DataArray(
-                    values,
-                    dims=['time_series'],
-                    coords={'time_series': all_time_series},
-                )
+                values = np.full(len(time_series_index), np.nan) if df.empty else df[metric].values
+                da = xr.DataArray(values, dims=['time_series'], coords={'time_series': time_series_index})
                 slices.append(_expand_dims_for_key(da, dim_names, key))
-            if slices:
-                data_vars[metric] = _combine_dataarray_slices(slices, ['time_series'], dim_names, name=metric)
+            data_vars[metric] = _combine_dataarray_slices(slices, ['time_series'], dim_names, name=metric)
 
         return xr.Dataset(data_vars)
 

From 24c28383ecaa2e8ac168a74271e10d25ab9e3136 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Tue, 27 Jan 2026 23:36:43 +0100
Subject: [PATCH 13/15] Simplify expansion logic

---
 flixopt/transform_accessor.py | 64 ++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 34 deletions(-)

diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index ae5606fbc..00f2036fd 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -1947,31 +1947,31 @@ def expand(self) -> FlowSystem:
             n_original_clusters - 1,
         )
 
-        # For segmented systems: build expansion divisor and identify segment total variables
-        expansion_divisor = None
-        segment_total_vars: set[str] = set()
+        # Build variable category sets for expansion handling
         variable_categories = getattr(self._fs, '_variable_categories', {})
-        if clustering.is_segmented:
-            expansion_divisor = clustering.build_expansion_divisor(original_time=original_timesteps)
-            # Build segment total vars using registry first, fall back to pattern matching
+        if variable_categories:
+            # Use registered categories
+            state_vars = {name for name, cat in variable_categories.items() if cat in EXPAND_INTERPOLATE}
+            first_timestep_vars = {name for name, cat in variable_categories.items() if cat in EXPAND_FIRST_TIMESTEP}
             segment_total_vars = {name for name, cat in variable_categories.items() if cat in EXPAND_DIVIDE}
-            # Fall back to pattern matching for backwards compatibility (old FlowSystems without categories)
-            if not segment_total_vars:
-                segment_total_vars = self._build_segment_total_varnames()
+        else:
+            # Fallback to pattern matching for old FlowSystems without categories
+            state_vars = set()  # Will use endswith('|charge_state') check
+            first_timestep_vars = set()  # Will use endswith('|startup') / endswith('|shutdown') check
+            segment_total_vars = self._build_segment_total_varnames() if clustering.is_segmented else set()
 
         def _is_state_variable(var_name: str) -> bool:
-            """Check if a variable is a state variable (should be interpolated)."""
-            if var_name in variable_categories:
-                return variable_categories[var_name] in EXPAND_INTERPOLATE
-            # Fall back to pattern matching for backwards compatibility
-            return var_name.endswith('|charge_state')
+            return var_name in state_vars or (not variable_categories and var_name.endswith('|charge_state'))
 
         def _is_first_timestep_variable(var_name: str) -> bool:
-            """Check if a variable is a binary event (should only appear at first timestep)."""
-            if var_name in variable_categories:
-                return variable_categories[var_name] in EXPAND_FIRST_TIMESTEP
-            # Fall back to pattern matching for backwards compatibility
-            return var_name.endswith('|startup') or var_name.endswith('|shutdown')
+            return var_name in first_timestep_vars or (
+                not variable_categories and (var_name.endswith('|startup') or var_name.endswith('|shutdown'))
+            )
+
+        # For segmented systems: build expansion divisor
+        expansion_divisor = None
+        if clustering.is_segmented:
+            expansion_divisor = clustering.build_expansion_divisor(original_time=original_timesteps)
 
         def _append_final_state(expanded: xr.DataArray, da: xr.DataArray) -> xr.DataArray:
             """Append final state value from original data to expanded data."""
@@ -1991,26 +1991,22 @@ def expand_da(da: xr.DataArray, var_name: str = '', is_solution: bool = False) -
             if 'time' not in da.dims:
                 return da.copy()
 
-            is_state = _is_state_variable(var_name) and 'cluster' in da.dims
-            is_first_timestep = _is_first_timestep_variable(var_name) and 'cluster' in da.dims
+            has_cluster_dim = 'cluster' in da.dims
+            is_state = _is_state_variable(var_name) and has_cluster_dim
+            is_first_timestep = _is_first_timestep_variable(var_name) and has_cluster_dim
+            is_segment_total = is_solution and var_name in segment_total_vars
 
-            # State variables in segmented systems: interpolate within segments
+            # Choose expansion method
             if is_state and clustering.is_segmented:
                 expanded = self._interpolate_charge_state_segmented(da, clustering, original_timesteps)
-                return _append_final_state(expanded, da)
-
-            # Binary events (startup/shutdown) in segmented systems: first timestep of each segment
-            # For non-segmented systems, timing within cluster is preserved, so normal expansion is correct
-            if is_first_timestep and is_solution and clustering.is_segmented:
+            elif is_first_timestep and is_solution and clustering.is_segmented:
                 return self._expand_first_timestep_only(da, clustering, original_timesteps)
+            else:
+                expanded = clustering.expand_data(da, original_time=original_timesteps)
+                if is_segment_total and expansion_divisor is not None:
+                    expanded = expanded / expansion_divisor
 
-            expanded = clustering.expand_data(da, original_time=original_timesteps)
-
-            # Segment totals: divide by expansion divisor
-            if is_solution and expansion_divisor is not None and var_name in segment_total_vars:
-                expanded = expanded / expansion_divisor
-
-            # State variables: append final state
+            # State variables need final state appended
             if is_state:
                 expanded = _append_final_state(expanded, da)
 

From b98921f6a9b8596dffd7d131f263f28a0cce1827 Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Wed, 28 Jan 2026 08:18:34 +0100
Subject: [PATCH 14/15] Created _Expander class (lines 82-306) that
 encapsulates all expansion logic:

  1. Pre-computed state in __init__:
    - Clustering dimensions (timesteps_per_cluster, n_segments, n_clusters, etc.)
    - Timesteps (original_timesteps, original_timesteps_extra)
    - Variable category sets (state_vars, first_timestep_vars, segment_total_vars)
    - Expansion divisor for segmented systems
  2. Methods moved from nested functions/helpers:
    - _is_state_variable() - checks if variable needs interpolation
    - _is_first_timestep_variable() - checks if variable is startup/shutdown
    - _build_segment_total_varnames() - backward compatibility fallback
    - _append_final_state() - appends N+1th state value
    - _interpolate_charge_state_segmented() - interpolates within segments
    - _expand_first_timestep_only() - places events at first timestep of segment
    - expand_dataarray() - main expansion logic (was expand_da)
  3. Simplified TransformAccessor.expand():
    - Creates _Expander instance with pre-computed state
    - Uses expander.expand_dataarray() instead of nested closure
    - Accesses expander attributes for logging

  Benefits:
  - State is pre-computed once, not on every call
  - Methods are testable in isolation
  - No more nested function closures capturing external state
  - Clear separation: _Expander handles expansion, TransformAccessor orchestrates
---
 flixopt/transform_accessor.py | 543 +++++++++++++++++-----------------
 1 file changed, 268 insertions(+), 275 deletions(-)

diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index 00f2036fd..fbd17eaab 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -79,6 +79,261 @@ def _expand_dims_for_key(da: xr.DataArray, dim_names: list[str], key: tuple) ->
     return da
 
 
+class _Expander:
+    """Handles expansion of clustered FlowSystem to original timesteps.
+
+    This class encapsulates all expansion logic, pre-computing shared state
+    once and providing methods for different expansion strategies.
+
+    Args:
+        fs: The clustered FlowSystem to expand.
+        clustering: The Clustering object with cluster assignments and metadata.
+    """
+
+    def __init__(self, fs: FlowSystem, clustering: Clustering):
+        self._fs = fs
+        self._clustering = clustering
+
+        # Pre-compute clustering dimensions
+        self._timesteps_per_cluster = clustering.timesteps_per_cluster
+        self._n_segments = clustering.n_segments
+        self._time_dim_size = self._n_segments if self._n_segments else self._timesteps_per_cluster
+        self._n_clusters = clustering.n_clusters
+        self._n_original_clusters = clustering.n_original_clusters
+
+        # Pre-compute timesteps
+        self._original_timesteps = clustering.original_timesteps
+        self._n_original_timesteps = len(self._original_timesteps)
+
+        # Import here to avoid circular import
+        from .flow_system import FlowSystem
+
+        self._original_timesteps_extra = FlowSystem._create_timesteps_with_extra(self._original_timesteps, None)
+
+        # Index of last valid original cluster (for final state)
+        self._last_original_cluster_idx = min(
+            (self._n_original_timesteps - 1) // self._timesteps_per_cluster,
+            self._n_original_clusters - 1,
+        )
+
+        # Build variable category sets
+        self._variable_categories = getattr(fs, '_variable_categories', {})
+        if self._variable_categories:
+            self._state_vars = {name for name, cat in self._variable_categories.items() if cat in EXPAND_INTERPOLATE}
+            self._first_timestep_vars = {
+                name for name, cat in self._variable_categories.items() if cat in EXPAND_FIRST_TIMESTEP
+            }
+            self._segment_total_vars = {name for name, cat in self._variable_categories.items() if cat in EXPAND_DIVIDE}
+        else:
+            # Fallback to pattern matching for old FlowSystems without categories
+            self._state_vars = set()
+            self._first_timestep_vars = set()
+            self._segment_total_vars = self._build_segment_total_varnames() if clustering.is_segmented else set()
+
+        # Build expansion divisor for segmented systems
+        self._expansion_divisor = None
+        if clustering.is_segmented:
+            self._expansion_divisor = clustering.build_expansion_divisor(original_time=self._original_timesteps)
+
+    def _is_state_variable(self, var_name: str) -> bool:
+        """Check if variable is a state variable requiring interpolation."""
+        return var_name in self._state_vars or (not self._variable_categories and var_name.endswith('|charge_state'))
+
+    def _is_first_timestep_variable(self, var_name: str) -> bool:
+        """Check if variable is a first-timestep-only variable (startup/shutdown)."""
+        return var_name in self._first_timestep_vars or (
+            not self._variable_categories and (var_name.endswith('|startup') or var_name.endswith('|shutdown'))
+        )
+
+    def _build_segment_total_varnames(self) -> set[str]:
+        """Build segment total variable names - BACKWARDS COMPATIBILITY FALLBACK.
+
+        This method is only used when variable_categories is empty (old FlowSystems
+        saved before category registration was implemented). New FlowSystems use
+        the VariableCategory registry with EXPAND_DIVIDE categories (PER_TIMESTEP, SHARE).
+
+        Returns:
+            Set of variable names that should be divided by expansion divisor.
+        """
+        segment_total_vars: set[str] = set()
+        effect_names = list(self._fs.effects.keys())
+
+        # 1. Per-timestep totals for each effect
+        for effect in effect_names:
+            segment_total_vars.add(f'{effect}(temporal)|per_timestep')
+
+        # 2. Flow contributions to effects
+        for flow_label in self._fs.flows:
+            for effect in effect_names:
+                segment_total_vars.add(f'{flow_label}->{effect}(temporal)')
+
+        # 3. Component contributions to effects
+        for component_label in self._fs.components:
+            for effect in effect_names:
+                segment_total_vars.add(f'{component_label}->{effect}(temporal)')
+
+        # 4. Effect-to-effect contributions
+        for target_effect_name, target_effect in self._fs.effects.items():
+            if target_effect.share_from_temporal:
+                for source_effect_name in target_effect.share_from_temporal:
+                    segment_total_vars.add(f'{source_effect_name}(temporal)->{target_effect_name}(temporal)')
+
+        return segment_total_vars
+
+    def _append_final_state(self, expanded: xr.DataArray, da: xr.DataArray) -> xr.DataArray:
+        """Append final state value from original data to expanded data."""
+        cluster_assignments = self._clustering.cluster_assignments
+        if cluster_assignments.ndim == 1:
+            last_cluster = int(cluster_assignments.values[self._last_original_cluster_idx])
+            extra_val = da.isel(cluster=last_cluster, time=-1)
+        else:
+            last_clusters = cluster_assignments.isel(original_cluster=self._last_original_cluster_idx)
+            extra_val = da.isel(cluster=last_clusters, time=-1)
+        extra_val = extra_val.drop_vars(['cluster', 'time'], errors='ignore')
+        extra_val = extra_val.expand_dims(time=[self._original_timesteps_extra[-1]])
+        return xr.concat([expanded, extra_val], dim='time')
+
+    def _interpolate_charge_state_segmented(self, da: xr.DataArray) -> xr.DataArray:
+        """Interpolate charge_state values within segments for segmented systems.
+
+        For segmented systems, charge_state has values at segment boundaries (n_segments+1).
+        This method interpolates between start and end boundary values to show the
+        actual charge trajectory as the storage charges/discharges.
+
+        Args:
+            da: charge_state DataArray with dims (cluster, time) where time has n_segments+1 entries.
+
+        Returns:
+            Interpolated charge_state with dims (time, ...) for original timesteps.
+        """
+        clustering = self._clustering
+
+        # Get multi-dimensional properties from Clustering
+        segment_assignments = clustering.results.segment_assignments
+        segment_durations = clustering.results.segment_durations
+        position_within_segment = clustering.results.position_within_segment
+        cluster_assignments = clustering.cluster_assignments
+
+        # Compute original period index and position within period
+        original_period_indices = np.minimum(
+            np.arange(self._n_original_timesteps) // self._timesteps_per_cluster,
+            self._n_original_clusters - 1,
+        )
+        positions_in_period = np.arange(self._n_original_timesteps) % self._timesteps_per_cluster
+
+        # Create DataArrays for indexing
+        original_period_da = xr.DataArray(original_period_indices, dims=['original_time'])
+        position_in_period_da = xr.DataArray(positions_in_period, dims=['original_time'])
+
+        # Map original period to cluster
+        cluster_indices = cluster_assignments.isel(original_cluster=original_period_da)
+
+        # Get segment index and position for each original timestep
+        seg_indices = segment_assignments.isel(cluster=cluster_indices, time=position_in_period_da)
+        positions = position_within_segment.isel(cluster=cluster_indices, time=position_in_period_da)
+        durations = segment_durations.isel(cluster=cluster_indices, segment=seg_indices)
+
+        # Calculate interpolation factor: position within segment (0 to 1)
+        factor = xr.where(durations > 1, (positions + 0.5) / durations, 0.5)
+
+        # Get start and end boundary values from charge_state
+        start_vals = da.isel(cluster=cluster_indices, time=seg_indices)
+        end_vals = da.isel(cluster=cluster_indices, time=seg_indices + 1)
+
+        # Linear interpolation
+        interpolated = start_vals + (end_vals - start_vals) * factor
+
+        # Clean up coordinate artifacts and rename
+        interpolated = interpolated.drop_vars(['cluster', 'time', 'segment'], errors='ignore')
+        interpolated = interpolated.rename({'original_time': 'time'}).assign_coords(time=self._original_timesteps)
+
+        return interpolated.transpose('time', ...).assign_attrs(da.attrs)
+
+    def _expand_first_timestep_only(self, da: xr.DataArray) -> xr.DataArray:
+        """Expand binary event variables to first timestep of each segment only.
+
+        For segmented systems, binary event variables like startup and shutdown indicate
+        that an event occurred somewhere in the segment. When expanded, the event is placed
+        at the first timestep of each segment, with zeros elsewhere.
+
+        Args:
+            da: Binary event DataArray with dims including (cluster, time).
+
+        Returns:
+            Expanded DataArray with event values only at first timestep of each segment.
+        """
+        clustering = self._clustering
+
+        # First expand normally (repeats values)
+        expanded = clustering.expand_data(da, original_time=self._original_timesteps)
+
+        # Build mask: True only at first timestep of each segment
+        position_within_segment = clustering.results.position_within_segment
+        cluster_assignments = clustering.cluster_assignments
+
+        # Compute original period index and position within period
+        original_period_indices = np.minimum(
+            np.arange(self._n_original_timesteps) // self._timesteps_per_cluster,
+            self._n_original_clusters - 1,
+        )
+        positions_in_period = np.arange(self._n_original_timesteps) % self._timesteps_per_cluster
+
+        # Create DataArrays for indexing
+        original_period_da = xr.DataArray(original_period_indices, dims=['original_time'])
+        position_in_period_da = xr.DataArray(positions_in_period, dims=['original_time'])
+
+        # Map to cluster and get position within segment
+        cluster_indices = cluster_assignments.isel(original_cluster=original_period_da)
+        pos_in_segment = position_within_segment.isel(cluster=cluster_indices, time=position_in_period_da)
+
+        # Clean up and create mask
+        pos_in_segment = pos_in_segment.drop_vars(['cluster', 'time'], errors='ignore')
+        pos_in_segment = pos_in_segment.rename({'original_time': 'time'}).assign_coords(time=self._original_timesteps)
+
+        # First timestep of segment has position 0
+        is_first = pos_in_segment == 0
+
+        # Apply mask: keep value at first timestep, zero elsewhere
+        result = xr.where(is_first, expanded, 0)
+        return result.assign_attrs(da.attrs)
+
+    def expand_dataarray(self, da: xr.DataArray, var_name: str = '', is_solution: bool = False) -> xr.DataArray:
+        """Expand a DataArray from clustered to original timesteps.
+
+        Args:
+            da: DataArray to expand.
+            var_name: Variable name for category-based expansion handling.
+            is_solution: Whether this is a solution variable (affects segment total handling).
+
+        Returns:
+            Expanded DataArray with original timesteps.
+        """
+        if 'time' not in da.dims:
+            return da.copy()
+
+        clustering = self._clustering
+        has_cluster_dim = 'cluster' in da.dims
+        is_state = self._is_state_variable(var_name) and has_cluster_dim
+        is_first_timestep = self._is_first_timestep_variable(var_name) and has_cluster_dim
+        is_segment_total = is_solution and var_name in self._segment_total_vars
+
+        # Choose expansion method
+        if is_state and clustering.is_segmented:
+            expanded = self._interpolate_charge_state_segmented(da)
+        elif is_first_timestep and is_solution and clustering.is_segmented:
+            return self._expand_first_timestep_only(da)
+        else:
+            expanded = clustering.expand_data(da, original_time=self._original_timesteps)
+            if is_segment_total and self._expansion_divisor is not None:
+                expanded = expanded / self._expansion_divisor
+
+        # State variables need final state appended
+        if is_state:
+            expanded = self._append_final_state(expanded, da)
+
+        return expanded
+
+
 class TransformAccessor:
     """
     Accessor for transformation methods on FlowSystem.
@@ -1636,186 +1891,6 @@ def _apply_soc_decay(
 
         return soc_boundary_per_timestep * decay_da
 
-    def _build_segment_total_varnames(self) -> set[str]:
-        """Build segment total variable names - BACKWARDS COMPATIBILITY FALLBACK.
-
-        This method is only used when variable_categories is empty (old FlowSystems
-        saved before category registration was implemented). New FlowSystems use
-        the VariableCategory registry with EXPAND_DIVIDE categories (PER_TIMESTEP, SHARE).
-
-        For segmented systems, these variables contain values that are summed over
-        segments. When expanded to hourly resolution, they need to be divided by
-        segment duration to get correct hourly rates.
-
-        Returns:
-            Set of variable names that should be divided by expansion divisor.
-        """
-        segment_total_vars: set[str] = set()
-
-        # Get all effect names
-        effect_names = list(self._fs.effects.keys())
-
-        # 1. Per-timestep totals for each effect: {effect}(temporal)|per_timestep
-        for effect in effect_names:
-            segment_total_vars.add(f'{effect}(temporal)|per_timestep')
-
-        # 2. Flow contributions to effects: {flow}->{effect}(temporal)
-        #    (from effects_per_flow_hour on Flow elements)
-        for flow_label in self._fs.flows:
-            for effect in effect_names:
-                segment_total_vars.add(f'{flow_label}->{effect}(temporal)')
-
-        # 3. Component contributions to effects: {component}->{effect}(temporal)
-        #    (from effects_per_startup, effects_per_active_hour on OnOffParameters)
-        for component_label in self._fs.components:
-            for effect in effect_names:
-                segment_total_vars.add(f'{component_label}->{effect}(temporal)')
-
-        # 4. Effect-to-effect contributions (from share_from_temporal)
-        #    {source_effect}(temporal)->{target_effect}(temporal)
-        for target_effect_name, target_effect in self._fs.effects.items():
-            if target_effect.share_from_temporal:
-                for source_effect_name in target_effect.share_from_temporal:
-                    segment_total_vars.add(f'{source_effect_name}(temporal)->{target_effect_name}(temporal)')
-
-        return segment_total_vars
-
-    def _interpolate_charge_state_segmented(
-        self,
-        da: xr.DataArray,
-        clustering: Clustering,
-        original_timesteps: pd.DatetimeIndex,
-    ) -> xr.DataArray:
-        """Interpolate charge_state values within segments for segmented systems.
-
-        For segmented systems, charge_state has values at segment boundaries (n_segments+1).
-        Instead of repeating the start boundary value for all timesteps in a segment,
-        this method interpolates between start and end boundary values to show the
-        actual charge trajectory as the storage charges/discharges.
-
-        Uses vectorized xarray operations via Clustering class properties.
-
-        Args:
-            da: charge_state DataArray with dims (cluster, time) where time has n_segments+1 entries.
-            clustering: Clustering object with segment info.
-            original_timesteps: Original timesteps to expand to.
-
-        Returns:
-            Interpolated charge_state with dims (time, ...) for original timesteps.
-        """
-        # Get multi-dimensional properties from Clustering
-        segment_assignments = clustering.results.segment_assignments
-        segment_durations = clustering.results.segment_durations
-        position_within_segment = clustering.results.position_within_segment
-        cluster_assignments = clustering.cluster_assignments
-
-        # Compute original period index and position within period directly
-        # This is more reliable than decoding from timestep_mapping, which encodes
-        # (cluster * n_segments + segment_idx) for segmented systems
-        n_original_timesteps = len(original_timesteps)
-        timesteps_per_cluster = clustering.timesteps_per_cluster
-        n_original_clusters = clustering.n_original_clusters
-
-        # For each original timestep, compute which original period it belongs to
-        original_period_indices = np.minimum(
-            np.arange(n_original_timesteps) // timesteps_per_cluster,
-            n_original_clusters - 1,
-        )
-        # Position within the period (0 to timesteps_per_cluster-1)
-        positions_in_period = np.arange(n_original_timesteps) % timesteps_per_cluster
-
-        # Create DataArrays for indexing (with original_time dimension, coords added later)
-        original_period_da = xr.DataArray(original_period_indices, dims=['original_time'])
-        position_in_period_da = xr.DataArray(positions_in_period, dims=['original_time'])
-
-        # Map original period to cluster
-        cluster_indices = cluster_assignments.isel(original_cluster=original_period_da)
-
-        # Get segment index and position for each original timestep
-        # segment_assignments has shape (cluster, time) where time = timesteps_per_cluster
-        seg_indices = segment_assignments.isel(cluster=cluster_indices, time=position_in_period_da)
-        positions = position_within_segment.isel(cluster=cluster_indices, time=position_in_period_da)
-        durations = segment_durations.isel(cluster=cluster_indices, segment=seg_indices)
-
-        # Calculate interpolation factor: position within segment (0 to 1)
-        # At position=0, factor=0.5/duration (start of segment)
-        # At position=duration-1, factor approaches 1 (end of segment)
-        factor = xr.where(durations > 1, (positions + 0.5) / durations, 0.5)
-
-        # Get start and end boundary values from charge_state
-        # charge_state has dims (cluster, time) where time = segment boundaries (n_segments+1)
-        start_vals = da.isel(cluster=cluster_indices, time=seg_indices)
-        end_vals = da.isel(cluster=cluster_indices, time=seg_indices + 1)
-
-        # Linear interpolation
-        interpolated = start_vals + (end_vals - start_vals) * factor
-
-        # Clean up coordinate artifacts and rename
-        interpolated = interpolated.drop_vars(['cluster', 'time', 'segment'], errors='ignore')
-        interpolated = interpolated.rename({'original_time': 'time'}).assign_coords(time=original_timesteps)
-
-        return interpolated.transpose('time', ...).assign_attrs(da.attrs)
-
-    def _expand_first_timestep_only(
-        self,
-        da: xr.DataArray,
-        clustering: Clustering,
-        original_timesteps: pd.DatetimeIndex,
-    ) -> xr.DataArray:
-        """Expand binary event variables (startup/shutdown) to first timestep of each segment.
-
-        For segmented systems, binary event variables like startup and shutdown indicate
-        that an event occurred somewhere in the segment. When expanding, we place the
-        event at the first timestep of each segment and set all other timesteps to 0.
-
-        This method is only used for segmented systems. For non-segmented systems,
-        the timing within the cluster is preserved by normal expansion.
-
-        Args:
-            da: Binary event DataArray with dims including (cluster, time).
-            clustering: Clustering object with segment info (must be segmented).
-            original_timesteps: Original timesteps to expand to.
-
-        Returns:
-            Expanded DataArray with event values only at first timestep of each segment.
-        """
-        # First expand normally (repeats values)
-        expanded = clustering.expand_data(da, original_time=original_timesteps)
-
-        # Build mask: True only at first timestep of each segment
-        n_original_timesteps = len(original_timesteps)
-        timesteps_per_cluster = clustering.timesteps_per_cluster
-        n_original_clusters = clustering.n_original_clusters
-
-        position_within_segment = clustering.results.position_within_segment
-        cluster_assignments = clustering.cluster_assignments
-
-        # Compute original period index and position within period
-        original_period_indices = np.minimum(
-            np.arange(n_original_timesteps) // timesteps_per_cluster,
-            n_original_clusters - 1,
-        )
-        positions_in_period = np.arange(n_original_timesteps) % timesteps_per_cluster
-
-        # Create DataArrays for indexing (coords added later after rename)
-        original_period_da = xr.DataArray(original_period_indices, dims=['original_time'])
-        position_in_period_da = xr.DataArray(positions_in_period, dims=['original_time'])
-
-        # Map to cluster and get position within segment
-        cluster_indices = cluster_assignments.isel(original_cluster=original_period_da)
-        pos_in_segment = position_within_segment.isel(cluster=cluster_indices, time=position_in_period_da)
-
-        # Clean up and create mask
-        pos_in_segment = pos_in_segment.drop_vars(['cluster', 'time'], errors='ignore')
-        pos_in_segment = pos_in_segment.rename({'original_time': 'time'}).assign_coords(time=original_timesteps)
-
-        # First timestep of segment has position 0
-        is_first = pos_in_segment == 0
-
-        # Apply mask: keep value at first timestep, zero elsewhere
-        result = xr.where(is_first, expanded, 0)
-        return result.assign_attrs(da.attrs)
-
     def expand(self) -> FlowSystem:
         """Expand a clustered FlowSystem back to full original timesteps.
 
@@ -1926,91 +2001,9 @@ def expand(self) -> FlowSystem:
         """
         from .flow_system import FlowSystem
 
-        # Validate and extract clustering info
+        # Validate and create expander with pre-computed state
         clustering = self._validate_for_expansion()
-
-        timesteps_per_cluster = clustering.timesteps_per_cluster
-        # For segmented systems, the time dimension has n_segments entries
-        n_segments = clustering.n_segments
-        time_dim_size = n_segments if n_segments is not None else timesteps_per_cluster
-        n_clusters = clustering.n_clusters
-        n_original_clusters = clustering.n_original_clusters
-
-        # Get original timesteps and dimensions
-        original_timesteps = clustering.original_timesteps
-        n_original_timesteps = len(original_timesteps)
-        original_timesteps_extra = FlowSystem._create_timesteps_with_extra(original_timesteps, None)
-
-        # For charge_state expansion: index of last valid original cluster
-        last_original_cluster_idx = min(
-            (n_original_timesteps - 1) // timesteps_per_cluster,
-            n_original_clusters - 1,
-        )
-
-        # Build variable category sets for expansion handling
-        variable_categories = getattr(self._fs, '_variable_categories', {})
-        if variable_categories:
-            # Use registered categories
-            state_vars = {name for name, cat in variable_categories.items() if cat in EXPAND_INTERPOLATE}
-            first_timestep_vars = {name for name, cat in variable_categories.items() if cat in EXPAND_FIRST_TIMESTEP}
-            segment_total_vars = {name for name, cat in variable_categories.items() if cat in EXPAND_DIVIDE}
-        else:
-            # Fallback to pattern matching for old FlowSystems without categories
-            state_vars = set()  # Will use endswith('|charge_state') check
-            first_timestep_vars = set()  # Will use endswith('|startup') / endswith('|shutdown') check
-            segment_total_vars = self._build_segment_total_varnames() if clustering.is_segmented else set()
-
-        def _is_state_variable(var_name: str) -> bool:
-            return var_name in state_vars or (not variable_categories and var_name.endswith('|charge_state'))
-
-        def _is_first_timestep_variable(var_name: str) -> bool:
-            return var_name in first_timestep_vars or (
-                not variable_categories and (var_name.endswith('|startup') or var_name.endswith('|shutdown'))
-            )
-
-        # For segmented systems: build expansion divisor
-        expansion_divisor = None
-        if clustering.is_segmented:
-            expansion_divisor = clustering.build_expansion_divisor(original_time=original_timesteps)
-
-        def _append_final_state(expanded: xr.DataArray, da: xr.DataArray) -> xr.DataArray:
-            """Append final state value from original data to expanded data."""
-            cluster_assignments = clustering.cluster_assignments
-            if cluster_assignments.ndim == 1:
-                last_cluster = int(cluster_assignments.values[last_original_cluster_idx])
-                extra_val = da.isel(cluster=last_cluster, time=-1)
-            else:
-                last_clusters = cluster_assignments.isel(original_cluster=last_original_cluster_idx)
-                extra_val = da.isel(cluster=last_clusters, time=-1)
-            extra_val = extra_val.drop_vars(['cluster', 'time'], errors='ignore')
-            extra_val = extra_val.expand_dims(time=[original_timesteps_extra[-1]])
-            return xr.concat([expanded, extra_val], dim='time')
-
-        def expand_da(da: xr.DataArray, var_name: str = '', is_solution: bool = False) -> xr.DataArray:
-            """Expand a DataArray from clustered to original timesteps."""
-            if 'time' not in da.dims:
-                return da.copy()
-
-            has_cluster_dim = 'cluster' in da.dims
-            is_state = _is_state_variable(var_name) and has_cluster_dim
-            is_first_timestep = _is_first_timestep_variable(var_name) and has_cluster_dim
-            is_segment_total = is_solution and var_name in segment_total_vars
-
-            # Choose expansion method
-            if is_state and clustering.is_segmented:
-                expanded = self._interpolate_charge_state_segmented(da, clustering, original_timesteps)
-            elif is_first_timestep and is_solution and clustering.is_segmented:
-                return self._expand_first_timestep_only(da, clustering, original_timesteps)
-            else:
-                expanded = clustering.expand_data(da, original_time=original_timesteps)
-                if is_segment_total and expansion_divisor is not None:
-                    expanded = expanded / expansion_divisor
-
-            # State variables need final state appended
-            if is_state:
-                expanded = _append_final_state(expanded, da)
-
-            return expanded
+        expander = _Expander(self._fs, clustering)
 
         # Helper to construct DataArray without slow _construct_dataarray
         def _fast_get_da(ds: xr.Dataset, name: str, coord_cache: dict) -> xr.DataArray:
@@ -2037,7 +2030,7 @@ def _fast_get_da(ds: xr.Dataset, name: str, coord_cache: dict) -> xr.DataArray:
             # (e.g., representative_weights with dims ('cluster',) or ('cluster', 'period'))
             if 'cluster' in da.dims and 'time' not in da.dims:
                 continue
-            data_vars[name] = expand_da(da, name)
+            data_vars[name] = expander.expand_dataarray(da, name)
         # Remove timestep_duration reference from attrs - let FlowSystem compute it from timesteps_extra
         # This ensures proper time coordinates for xarray alignment with N+1 solution timesteps
         attrs = {k: v for k, v in reduced_ds.attrs.items() if k not in clustering_attrs and k != 'timestep_duration'}
@@ -2055,18 +2048,18 @@ def _fast_get_da(ds: xr.Dataset, name: str, coord_cache: dict) -> xr.DataArray:
             if name in sol_coord_names:
                 continue
             da = _fast_get_da(reduced_solution, name, sol_coord_cache)
-            expanded_sol_vars[name] = expand_da(da, name, is_solution=True)
+            expanded_sol_vars[name] = expander.expand_dataarray(da, name, is_solution=True)
         expanded_fs._solution = xr.Dataset(expanded_sol_vars, attrs=reduced_solution.attrs)
-        expanded_fs._solution = expanded_fs._solution.reindex(time=original_timesteps_extra)
+        expanded_fs._solution = expanded_fs._solution.reindex(time=expander._original_timesteps_extra)
 
         # 3. Combine charge_state with SOC_boundary for intercluster storages
         self._combine_intercluster_charge_states(
             expanded_fs,
             reduced_solution,
             clustering,
-            original_timesteps_extra,
-            timesteps_per_cluster,
-            n_original_clusters,
+            expander._original_timesteps_extra,
+            expander._timesteps_per_cluster,
+            expander._n_original_clusters,
         )
 
         # Log expansion info
@@ -2075,15 +2068,15 @@ def _fast_get_da(ds: xr.Dataset, name: str, coord_cache: dict) -> xr.DataArray:
         n_combinations = (len(self._fs.periods) if has_periods else 1) * (
             len(self._fs.scenarios) if has_scenarios else 1
         )
-        n_reduced_timesteps = n_clusters * time_dim_size
-        segmented_info = f' ({n_segments} segments)' if n_segments else ''
+        n_reduced_timesteps = expander._n_clusters * expander._time_dim_size
+        segmented_info = f' ({expander._n_segments} segments)' if expander._n_segments else ''
         logger.info(
-            f'Expanded FlowSystem from {n_reduced_timesteps} to {n_original_timesteps} timesteps '
-            f'({n_clusters} clusters{segmented_info}'
+            f'Expanded FlowSystem from {n_reduced_timesteps} to {expander._n_original_timesteps} timesteps '
+            f'({expander._n_clusters} clusters{segmented_info}'
             + (
                 f', {n_combinations} period/scenario combinations)'
                 if n_combinations > 1
-                else f' → {n_original_clusters} original clusters)'
+                else f' → {expander._n_original_clusters} original clusters)'
             )
         )
 

From dadc33eb43d4024c590cc122c863846f5dbe05cc Mon Sep 17 00:00:00 2001
From: FBumann <117816358+FBumann@users.noreply.github.com>
Date: Wed, 28 Jan 2026 08:23:24 +0100
Subject: [PATCH 15/15] _Expander class (lines 82-491) - fully self-contained
 expansion logic:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  _Expander
  ├── __init__(fs, clustering)           # Pre-computes all shared state
  ├── _is_state_variable()               # Category checks
  ├── _is_first_timestep_variable()
  ├── _build_segment_total_varnames()    # Backward compatibility fallback
  ├── _append_final_state()              # State variable N+1 handling
  ├── _interpolate_charge_state_segmented()   # Segmented charge_state interpolation
  ├── _expand_first_timestep_only()      # Startup/shutdown expansion
  ├── expand_dataarray()                 # Single DataArray expansion
  ├── _fast_get_da()                     # Performance helper
  ├── _combine_intercluster_charge_states()   # SOC boundary handling
  ├── _apply_soc_decay()                 # Self-discharge decay
  └── expand_flow_system()               # Main entry point - returns expanded FlowSystem

  TransformAccessor.expand() (lines 1944-2048) - thin wrapper:

  def expand(self) -> FlowSystem:
      """...(docstring preserved)..."""
      clustering = self._validate_for_expansion()
      expander = _Expander(self._fs, clustering)
      return expander.expand_flow_system()

  Benefits:
  - Complete encapsulation: all expansion logic in one class
  - Self-contained: _Expander doesn't depend on TransformAccessor internals
  - Clean separation: accessor validates, expander does the work
  - Testable: _Expander could be unit tested independently
  - Simpler signatures: methods use instance state instead of passing many parameters
---
 flixopt/transform_accessor.py | 389 ++++++++++++++++------------------
 1 file changed, 180 insertions(+), 209 deletions(-)

diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py
index fbd17eaab..54bf69670 100644
--- a/flixopt/transform_accessor.py
+++ b/flixopt/transform_accessor.py
@@ -333,6 +333,185 @@ def expand_dataarray(self, da: xr.DataArray, var_name: str = '', is_solution: bo
 
         return expanded
 
+    def _fast_get_da(self, ds: xr.Dataset, name: str, coord_cache: dict) -> xr.DataArray:
+        """Construct DataArray without slow _construct_dataarray calls."""
+        variable = ds.variables[name]
+        var_dims = set(variable.dims)
+        coords = {k: v for k, v in coord_cache.items() if set(v.dims).issubset(var_dims)}
+        return xr.DataArray(variable, coords=coords, name=name)
+
+    def _combine_intercluster_charge_states(self, expanded_fs: FlowSystem, reduced_solution: xr.Dataset) -> None:
+        """Combine charge_state with SOC_boundary for intercluster storages (in-place).
+
+        For intercluster storages, charge_state is relative (delta-E) and can be negative.
+        Per Blanke et al. (2022) Eq. 9, actual SOC at time t in period d is:
+            SOC(t) = SOC_boundary[d] * (1 - loss)^t_within_period + charge_state(t)
+        where t_within_period is hours from period start (accounts for self-discharge decay).
+
+        Args:
+            expanded_fs: The expanded FlowSystem (modified in-place).
+            reduced_solution: The original reduced solution dataset.
+        """
+        n_original_timesteps_extra = len(self._original_timesteps_extra)
+        soc_boundary_vars = self._fs.get_variables_by_category(VariableCategory.SOC_BOUNDARY)
+
+        for soc_boundary_name in soc_boundary_vars:
+            storage_name = soc_boundary_name.rsplit('|', 1)[0]
+            charge_state_name = f'{storage_name}|charge_state'
+            if charge_state_name not in expanded_fs._solution:
+                continue
+
+            soc_boundary = reduced_solution[soc_boundary_name]
+            expanded_charge_state = expanded_fs._solution[charge_state_name]
+
+            # Map each original timestep to its original period index
+            original_cluster_indices = np.minimum(
+                np.arange(n_original_timesteps_extra) // self._timesteps_per_cluster,
+                self._n_original_clusters - 1,
+            )
+
+            # Select SOC_boundary for each timestep
+            soc_boundary_per_timestep = soc_boundary.isel(
+                cluster_boundary=xr.DataArray(original_cluster_indices, dims=['time'])
+            ).assign_coords(time=self._original_timesteps_extra)
+
+            # Apply self-discharge decay
+            soc_boundary_per_timestep = self._apply_soc_decay(
+                soc_boundary_per_timestep, storage_name, original_cluster_indices
+            )
+
+            # Combine and clip to non-negative
+            combined = (expanded_charge_state + soc_boundary_per_timestep).clip(min=0)
+            expanded_fs._solution[charge_state_name] = combined.assign_attrs(expanded_charge_state.attrs)
+
+        # Clean up SOC_boundary variables and orphaned coordinates
+        for soc_boundary_name in soc_boundary_vars:
+            if soc_boundary_name in expanded_fs._solution:
+                del expanded_fs._solution[soc_boundary_name]
+        if 'cluster_boundary' in expanded_fs._solution.coords:
+            expanded_fs._solution = expanded_fs._solution.drop_vars('cluster_boundary')
+
+    def _apply_soc_decay(
+        self,
+        soc_boundary_per_timestep: xr.DataArray,
+        storage_name: str,
+        original_cluster_indices: np.ndarray,
+    ) -> xr.DataArray:
+        """Apply self-discharge decay to SOC_boundary values.
+
+        Args:
+            soc_boundary_per_timestep: SOC boundary values mapped to each timestep.
+            storage_name: Name of the storage component.
+            original_cluster_indices: Mapping of timesteps to original cluster indices.
+
+        Returns:
+            SOC boundary values with decay applied.
+        """
+        storage = self._fs.storages.get(storage_name)
+        if storage is None:
+            return soc_boundary_per_timestep
+
+        n_timesteps = len(self._original_timesteps_extra)
+
+        # Time within period for each timestep (0, 1, 2, ..., T-1, 0, 1, ...)
+        time_within_period = np.arange(n_timesteps) % self._timesteps_per_cluster
+        time_within_period[-1] = self._timesteps_per_cluster  # Extra timestep gets full decay
+        time_within_period_da = xr.DataArray(
+            time_within_period, dims=['time'], coords={'time': self._original_timesteps_extra}
+        )
+
+        # Decay factor: (1 - loss)^t
+        loss_value = _scalar_safe_reduce(storage.relative_loss_per_hour, 'time', 'mean')
+        loss_arr = np.asarray(loss_value)
+        if not np.any(loss_arr > 0):
+            return soc_boundary_per_timestep
+
+        decay_da = (1 - loss_arr) ** time_within_period_da
+
+        # Handle cluster dimension if present
+        if 'cluster' in decay_da.dims:
+            cluster_assignments = self._clustering.cluster_assignments
+            if cluster_assignments.ndim == 1:
+                cluster_per_timestep = xr.DataArray(
+                    cluster_assignments.values[original_cluster_indices],
+                    dims=['time'],
+                    coords={'time': self._original_timesteps_extra},
+                )
+            else:
+                cluster_per_timestep = cluster_assignments.isel(
+                    original_cluster=xr.DataArray(original_cluster_indices, dims=['time'])
+                ).assign_coords(time=self._original_timesteps_extra)
+            decay_da = decay_da.isel(cluster=cluster_per_timestep).drop_vars('cluster', errors='ignore')
+
+        return soc_boundary_per_timestep * decay_da
+
+    def expand_flow_system(self) -> FlowSystem:
+        """Expand the clustered FlowSystem to full original timesteps.
+
+        Returns:
+            FlowSystem: A new FlowSystem with full timesteps and expanded solution.
+        """
+        from .flow_system import FlowSystem
+
+        # 1. Expand FlowSystem data
+        reduced_ds = self._fs.to_dataset(include_solution=False)
+        clustering_attrs = {'is_clustered', 'n_clusters', 'timesteps_per_cluster', 'clustering', 'cluster_weight'}
+        skip_vars = {'cluster_weight', 'timestep_duration'}  # These have special handling
+        data_vars = {}
+        coord_cache = {k: v for k, v in reduced_ds.coords.items()}
+        coord_names = set(coord_cache)
+        for name in reduced_ds.variables:
+            if name in coord_names:
+                continue
+            if name in skip_vars or name.startswith('clustering|'):
+                continue
+            da = self._fast_get_da(reduced_ds, name, coord_cache)
+            # Skip vars with cluster dim but no time dim - they don't make sense after expansion
+            if 'cluster' in da.dims and 'time' not in da.dims:
+                continue
+            data_vars[name] = self.expand_dataarray(da, name)
+        # Remove timestep_duration reference from attrs - let FlowSystem compute it from timesteps_extra
+        attrs = {k: v for k, v in reduced_ds.attrs.items() if k not in clustering_attrs and k != 'timestep_duration'}
+        expanded_ds = xr.Dataset(data_vars, attrs=attrs)
+
+        expanded_fs = FlowSystem.from_dataset(expanded_ds)
+
+        # 2. Expand solution (with segment total correction for segmented systems)
+        reduced_solution = self._fs.solution
+        sol_coord_cache = {k: v for k, v in reduced_solution.coords.items()}
+        sol_coord_names = set(sol_coord_cache)
+        expanded_sol_vars = {}
+        for name in reduced_solution.variables:
+            if name in sol_coord_names:
+                continue
+            da = self._fast_get_da(reduced_solution, name, sol_coord_cache)
+            expanded_sol_vars[name] = self.expand_dataarray(da, name, is_solution=True)
+        expanded_fs._solution = xr.Dataset(expanded_sol_vars, attrs=reduced_solution.attrs)
+        expanded_fs._solution = expanded_fs._solution.reindex(time=self._original_timesteps_extra)
+
+        # 3. Combine charge_state with SOC_boundary for intercluster storages
+        self._combine_intercluster_charge_states(expanded_fs, reduced_solution)
+
+        # Log expansion info
+        has_periods = self._fs.periods is not None
+        has_scenarios = self._fs.scenarios is not None
+        n_combinations = (len(self._fs.periods) if has_periods else 1) * (
+            len(self._fs.scenarios) if has_scenarios else 1
+        )
+        n_reduced_timesteps = self._n_clusters * self._time_dim_size
+        segmented_info = f' ({self._n_segments} segments)' if self._n_segments else ''
+        logger.info(
+            f'Expanded FlowSystem from {n_reduced_timesteps} to {self._n_original_timesteps} timesteps '
+            f'({self._n_clusters} clusters{segmented_info}'
+            + (
+                f', {n_combinations} period/scenario combinations)'
+                if n_combinations > 1
+                else f' → {self._n_original_clusters} original clusters)'
+            )
+        )
+
+        return expanded_fs
+
 
 class TransformAccessor:
     """
@@ -1762,135 +1941,6 @@ def _validate_for_expansion(self) -> Clustering:
 
         return self._fs.clustering
 
-    def _combine_intercluster_charge_states(
-        self,
-        expanded_fs: FlowSystem,
-        reduced_solution: xr.Dataset,
-        clustering: Clustering,
-        original_timesteps_extra: pd.DatetimeIndex,
-        timesteps_per_cluster: int,
-        n_original_clusters: int,
-    ) -> None:
-        """Combine charge_state with SOC_boundary for intercluster storages (in-place).
-
-        For intercluster storages, charge_state is relative (delta-E) and can be negative.
-        Per Blanke et al. (2022) Eq. 9, actual SOC at time t in period d is:
-            SOC(t) = SOC_boundary[d] * (1 - loss)^t_within_period + charge_state(t)
-        where t_within_period is hours from period start (accounts for self-discharge decay).
-
-        Args:
-            expanded_fs: The expanded FlowSystem (modified in-place).
-            reduced_solution: The original reduced solution dataset.
-            clustering: Clustering with cluster order info.
-            original_timesteps_extra: Original timesteps including the extra final timestep.
-            timesteps_per_cluster: Number of timesteps per cluster.
-            n_original_clusters: Number of original clusters before aggregation.
-        """
-        n_original_timesteps_extra = len(original_timesteps_extra)
-        soc_boundary_vars = self._fs.get_variables_by_category(VariableCategory.SOC_BOUNDARY)
-
-        for soc_boundary_name in soc_boundary_vars:
-            storage_name = soc_boundary_name.rsplit('|', 1)[0]
-            charge_state_name = f'{storage_name}|charge_state'
-            if charge_state_name not in expanded_fs._solution:
-                continue
-
-            soc_boundary = reduced_solution[soc_boundary_name]
-            expanded_charge_state = expanded_fs._solution[charge_state_name]
-
-            # Map each original timestep to its original period index
-            original_cluster_indices = np.minimum(
-                np.arange(n_original_timesteps_extra) // timesteps_per_cluster,
-                n_original_clusters - 1,
-            )
-
-            # Select SOC_boundary for each timestep
-            soc_boundary_per_timestep = soc_boundary.isel(
-                cluster_boundary=xr.DataArray(original_cluster_indices, dims=['time'])
-            ).assign_coords(time=original_timesteps_extra)
-
-            # Apply self-discharge decay
-            soc_boundary_per_timestep = self._apply_soc_decay(
-                soc_boundary_per_timestep,
-                storage_name,
-                clustering,
-                original_timesteps_extra,
-                original_cluster_indices,
-                timesteps_per_cluster,
-            )
-
-            # Combine and clip to non-negative
-            combined = (expanded_charge_state + soc_boundary_per_timestep).clip(min=0)
-            expanded_fs._solution[charge_state_name] = combined.assign_attrs(expanded_charge_state.attrs)
-
-        # Clean up SOC_boundary variables and orphaned coordinates
-        for soc_boundary_name in soc_boundary_vars:
-            if soc_boundary_name in expanded_fs._solution:
-                del expanded_fs._solution[soc_boundary_name]
-        if 'cluster_boundary' in expanded_fs._solution.coords:
-            expanded_fs._solution = expanded_fs._solution.drop_vars('cluster_boundary')
-
-    def _apply_soc_decay(
-        self,
-        soc_boundary_per_timestep: xr.DataArray,
-        storage_name: str,
-        clustering: Clustering,
-        original_timesteps_extra: pd.DatetimeIndex,
-        original_cluster_indices: np.ndarray,
-        timesteps_per_cluster: int,
-    ) -> xr.DataArray:
-        """Apply self-discharge decay to SOC_boundary values.
-
-        Args:
-            soc_boundary_per_timestep: SOC boundary values mapped to each timestep.
-            storage_name: Name of the storage component.
-            clustering: Clustering with cluster order info.
-            original_timesteps_extra: Original timesteps including final extra timestep.
-            original_cluster_indices: Mapping of timesteps to original cluster indices.
-            timesteps_per_cluster: Number of timesteps per cluster.
-
-        Returns:
-            SOC boundary values with decay applied.
-        """
-        storage = self._fs.storages.get(storage_name)
-        if storage is None:
-            return soc_boundary_per_timestep
-
-        n_timesteps = len(original_timesteps_extra)
-
-        # Time within period for each timestep (0, 1, 2, ..., T-1, 0, 1, ...)
-        time_within_period = np.arange(n_timesteps) % timesteps_per_cluster
-        time_within_period[-1] = timesteps_per_cluster  # Extra timestep gets full decay
-        time_within_period_da = xr.DataArray(
-            time_within_period, dims=['time'], coords={'time': original_timesteps_extra}
-        )
-
-        # Decay factor: (1 - loss)^t
-        loss_value = _scalar_safe_reduce(storage.relative_loss_per_hour, 'time', 'mean')
-        # Normalize to array for consistent handling (scalar_safe_reduce may return scalar or DataArray)
-        loss_arr = np.asarray(loss_value)
-        if not np.any(loss_arr > 0):
-            return soc_boundary_per_timestep
-
-        decay_da = (1 - loss_arr) ** time_within_period_da
-
-        # Handle cluster dimension if present
-        if 'cluster' in decay_da.dims:
-            cluster_assignments = clustering.cluster_assignments
-            if cluster_assignments.ndim == 1:
-                cluster_per_timestep = xr.DataArray(
-                    cluster_assignments.values[original_cluster_indices],
-                    dims=['time'],
-                    coords={'time': original_timesteps_extra},
-                )
-            else:
-                cluster_per_timestep = cluster_assignments.isel(
-                    original_cluster=xr.DataArray(original_cluster_indices, dims=['time'])
-                ).assign_coords(time=original_timesteps_extra)
-            decay_da = decay_da.isel(cluster=cluster_per_timestep).drop_vars('cluster', errors='ignore')
-
-        return soc_boundary_per_timestep * decay_da
-
     def expand(self) -> FlowSystem:
         """Expand a clustered FlowSystem back to full original timesteps.
 
@@ -1999,85 +2049,6 @@ def expand(self) -> FlowSystem:
                - ``{flow}|startup``: Startup event
                - ``{flow}|shutdown``: Shutdown event
         """
-        from .flow_system import FlowSystem
-
-        # Validate and create expander with pre-computed state
         clustering = self._validate_for_expansion()
         expander = _Expander(self._fs, clustering)
-
-        # Helper to construct DataArray without slow _construct_dataarray
-        def _fast_get_da(ds: xr.Dataset, name: str, coord_cache: dict) -> xr.DataArray:
-            variable = ds.variables[name]
-            var_dims = set(variable.dims)
-            coords = {k: v for k, v in coord_cache.items() if set(v.dims).issubset(var_dims)}
-            return xr.DataArray(variable, coords=coords, name=name)
-
-        # 1. Expand FlowSystem data
-        reduced_ds = self._fs.to_dataset(include_solution=False)
-        clustering_attrs = {'is_clustered', 'n_clusters', 'timesteps_per_cluster', 'clustering', 'cluster_weight'}
-        skip_vars = {'cluster_weight', 'timestep_duration'}  # These have special handling
-        data_vars = {}
-        # Use ds.variables pattern to avoid slow _construct_dataarray calls
-        coord_cache = {k: v for k, v in reduced_ds.coords.items()}
-        coord_names = set(coord_cache)
-        for name in reduced_ds.variables:
-            if name in coord_names:
-                continue
-            if name in skip_vars or name.startswith('clustering|'):
-                continue
-            da = _fast_get_da(reduced_ds, name, coord_cache)
-            # Skip vars with cluster dim but no time dim - they don't make sense after expansion
-            # (e.g., representative_weights with dims ('cluster',) or ('cluster', 'period'))
-            if 'cluster' in da.dims and 'time' not in da.dims:
-                continue
-            data_vars[name] = expander.expand_dataarray(da, name)
-        # Remove timestep_duration reference from attrs - let FlowSystem compute it from timesteps_extra
-        # This ensures proper time coordinates for xarray alignment with N+1 solution timesteps
-        attrs = {k: v for k, v in reduced_ds.attrs.items() if k not in clustering_attrs and k != 'timestep_duration'}
-        expanded_ds = xr.Dataset(data_vars, attrs=attrs)
-
-        expanded_fs = FlowSystem.from_dataset(expanded_ds)
-
-        # 2. Expand solution (with segment total correction for segmented systems)
-        reduced_solution = self._fs.solution
-        # Use ds.variables pattern to avoid slow _construct_dataarray calls
-        sol_coord_cache = {k: v for k, v in reduced_solution.coords.items()}
-        sol_coord_names = set(sol_coord_cache)
-        expanded_sol_vars = {}
-        for name in reduced_solution.variables:
-            if name in sol_coord_names:
-                continue
-            da = _fast_get_da(reduced_solution, name, sol_coord_cache)
-            expanded_sol_vars[name] = expander.expand_dataarray(da, name, is_solution=True)
-        expanded_fs._solution = xr.Dataset(expanded_sol_vars, attrs=reduced_solution.attrs)
-        expanded_fs._solution = expanded_fs._solution.reindex(time=expander._original_timesteps_extra)
-
-        # 3. Combine charge_state with SOC_boundary for intercluster storages
-        self._combine_intercluster_charge_states(
-            expanded_fs,
-            reduced_solution,
-            clustering,
-            expander._original_timesteps_extra,
-            expander._timesteps_per_cluster,
-            expander._n_original_clusters,
-        )
-
-        # Log expansion info
-        has_periods = self._fs.periods is not None
-        has_scenarios = self._fs.scenarios is not None
-        n_combinations = (len(self._fs.periods) if has_periods else 1) * (
-            len(self._fs.scenarios) if has_scenarios else 1
-        )
-        n_reduced_timesteps = expander._n_clusters * expander._time_dim_size
-        segmented_info = f' ({expander._n_segments} segments)' if expander._n_segments else ''
-        logger.info(
-            f'Expanded FlowSystem from {n_reduced_timesteps} to {expander._n_original_timesteps} timesteps '
-            f'({expander._n_clusters} clusters{segmented_info}'
-            + (
-                f', {n_combinations} period/scenario combinations)'
-                if n_combinations > 1
-                else f' → {expander._n_original_clusters} original clusters)'
-            )
-        )
-
-        return expanded_fs
+        return expander.expand_flow_system()