# Eager vs Lazy DataFrames: One Fix to Make Your Code Work Anywhere

## Motivation

In [None]:
from datetime import datetime

import pandas as pd
import polars as pl

data1 = {"store": [1, 1, 2], "date_id": [4, 5, 6]}
data2 = {"store": [1, 2], "sales": [7, 8]}

pandas_df1 = pd.DataFrame(data1)
pandas_df2 = pd.DataFrame(data2)

# The outputs are  the same
for _ in range(5):
    # Left join
    pandas_df = pd.merge(pandas_df1, pandas_df2, on="store", how="left")

    # Cumulative sum of sales within each store
    pandas_df["cumulative_sales"] = pandas_df.groupby("store")["sales"].cumsum()

    print(pandas_df)

In [None]:
polars_df1 = pl.DataFrame(data1).lazy()
polars_df2 = pl.DataFrame(data2).lazy()

# The outputs are not the same
for _ in range(5):
    print(
        polars_df1.join(polars_df2, on="store", how="left")
        .with_columns(cumulative_sales=pl.col("sales").cum_sum().over("store"))
        .collect(engine="streaming")
    )

## Eager-only solution

In [None]:
data = {
	"sale_date": [
		datetime(2025, 5, 22),
		datetime(2025, 5, 23),
		datetime(2025, 5, 24),
		datetime(2025, 5, 22),
		datetime(2025, 5, 23),
		datetime(2025, 5, 24),
	],
	"store": [
		"Thimphu",
		"Thimphu",
		"Thimphu",
		"Paro",
		"Paro",
		"Paro",
	],
	"sales": [1100, None, 1450, 501, 500, None],
}

pdf = pd.DataFrame(data)
print(pdf)

In [None]:
import narwhals as nw
from narwhals.typing import IntoFrameT


def agnostic_ffill_by_store(df_native: IntoFrameT) -> IntoFrameT:
	# Supports pandas and Polars.DataFrame, but not lazy ones.
	return (
		nw.from_native(df_native)
		.with_columns(
			nw.col("sales").fill_null(strategy="forward").over("store")
		)
		.to_native()
	)

In [None]:
# pandas.DataFrame
df_pandas = pd.DataFrame(data)
agnostic_ffill_by_store(df_pandas)

In [None]:
# polars.DataFrame
df_polars = pl.DataFrame(data)
agnostic_ffill_by_store(df_polars)

In [None]:
import duckdb

duckdb_rel = duckdb.table("df_polars")
duckdb_rel

In [None]:
# agnostic_ffill_by_store(duckdb_rel)
# Error: narwhals.exceptions.OrderDependentExprError: Order-dependent expressions are not supported for use in LazyFrame.

## Eager and lazy solution

In [None]:
def agnostic_ffill_by_store_improved(df_native: IntoFrameT) -> IntoFrameT:
	return (
		nw.from_native(df_native)
		.with_columns(
			nw.col("sales")
			.fill_null(strategy="forward")
			# Note the `order_by` statement
			.over("store", order_by="sale_date")
		)
		.to_native()
	)

In [None]:
agnostic_ffill_by_store_improved(duckdb_rel)

In [None]:
agnostic_ffill_by_store_improved(df_polars.lazy()).collect()

In [None]:
# Note that it still supports pandas
print(agnostic_ffill_by_store_improved(df_pandas))