diff --git a/sdks/java/io/iceberg/build.gradle b/sdks/java/io/iceberg/build.gradle
index 8142c5f5b90b..f874df14ee45 100644
--- a/sdks/java/io/iceberg/build.gradle
+++ b/sdks/java/io/iceberg/build.gradle
@@ -50,6 +50,7 @@ dependencies {
     implementation library.java.slf4j_api
     implementation library.java.joda_time
     implementation "org.apache.parquet:parquet-column:$parquet_version"
+    implementation "org.apache.parquet:parquet-common:$parquet_version"
     implementation "org.apache.parquet:parquet-hadoop:$parquet_version"
     implementation "org.apache.parquet:parquet-common:$parquet_version"
     implementation project(":sdks:java:io:parquet")
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergCdcReadSchemaTransformProvider.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergCdcReadSchemaTransformProvider.java
index 31ff57a668bb..c450beefeab3 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergCdcReadSchemaTransformProvider.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergCdcReadSchemaTransformProvider.java
@@ -118,12 +118,17 @@ public PCollectionRowTuple expand(PCollectionRowTuple input) {
               .streaming(configuration.getStreaming())
               .keeping(configuration.getKeep())
               .dropping(configuration.getDrop())
-              .withFilter(configuration.getFilter());
+              .withFilter(configuration.getFilter())
+              .withWatermarkColumn(configuration.getWatermarkColumn());
 
       @Nullable Integer pollIntervalSeconds = configuration.getPollIntervalSeconds();
       if (pollIntervalSeconds != null) {
         readRows = readRows.withPollInterval(Duration.standardSeconds(pollIntervalSeconds));
       }
+      @Nullable Long maxDelay = configuration.getMaxSnapshotDiscoveryDelay();
+      if (maxDelay != null) {
+        readRows = readRows.withMaxSnapshotDiscoveryDelay(Duration.standardSeconds(maxDelay));
+      }
 
       PCollection<Row> output = input.getPipeline().apply(readRows);
 
@@ -194,6 +199,18 @@ static Builder builder() {
         "A subset of column names to exclude from reading. If null or empty, all columns will be read.")
     abstract @Nullable List<String> getDrop();
 
+    @SchemaFieldDescription(
+        "Column used to derive the source's output watermark. "
+            + "Must be an existing, required, top-level column of type 'long' or 'timestamp'. "
+            + "If not set, the watermark advances according to snapshot commit timestamp.")
+    abstract @Nullable String getWatermarkColumn();
+
+    @SchemaFieldDescription(
+        "Maximum expected snapshot discovery delay in seconds. While idle, the source may advance "
+            + "the watermark to now() minus this delay; snapshots discovered later with older commit "
+            + "timestamps may be treated as late by downstream windowing. Default: 600 seconds.")
+    abstract @Nullable Long getMaxSnapshotDiscoveryDelay();
+
     @AutoValue.Builder
     abstract static class Builder {
       abstract Builder setTable(String table);
@@ -224,6 +241,10 @@ abstract static class Builder {
 
       abstract Builder setFilter(String filter);
 
+      abstract Builder setWatermarkColumn(String watermarkColumn);
+
+      abstract Builder setMaxSnapshotDiscoveryDelay(Long seconds);
+
       abstract Configuration build();
     }
 
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergIO.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergIO.java
index a5a3beef8f51..78a101201f8f 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergIO.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergIO.java
@@ -24,6 +24,7 @@
 import java.util.List;
 import org.apache.beam.sdk.annotations.Internal;
 import org.apache.beam.sdk.io.Read;
+import org.apache.beam.sdk.io.iceberg.cdc.IncrementalChangelogSource;
 import org.apache.beam.sdk.schemas.Schema;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.values.PBegin;
@@ -569,6 +570,10 @@ public enum StartingStrategy {
 
     abstract @Nullable String getFilter();
 
+    abstract @Nullable String getWatermarkColumn();
+
+    abstract @Nullable Duration getMaxSnapshotDiscoveryDelay();
+
     abstract Builder toBuilder();
 
     @AutoValue.Builder
@@ -599,6 +604,10 @@ abstract static class Builder {
 
       abstract Builder setFilter(@Nullable String filter);
 
+      abstract Builder setWatermarkColumn(@Nullable String watermarkColumn);
+
+      abstract Builder setMaxSnapshotDiscoveryDelay(@Nullable Duration delay);
+
       abstract ReadRows build();
     }
 
@@ -650,12 +659,27 @@ public ReadRows withFilter(@Nullable String filter) {
       return toBuilder().setFilter(filter).build();
     }
 
+    public ReadRows withWatermarkColumn(@Nullable String watermarkColumn) {
+      return toBuilder().setWatermarkColumn(watermarkColumn).build();
+    }
+
+    public ReadRows withMaxSnapshotDiscoveryDelay(@Nullable Duration delay) {
+      return toBuilder().setMaxSnapshotDiscoveryDelay(delay).build();
+    }
+
     @Override
     public PCollection<Row> expand(PBegin input) {
       TableIdentifier tableId =
           checkStateNotNull(getTableIdentifier(), "Must set a table to read from.");
-
-      Table table = getCatalogConfig().catalog().loadTable(tableId);
+      Table table;
+      try {
+        table = getCatalogConfig().catalog().loadTable(tableId);
+      } catch (Exception e) {
+        throw new RuntimeException(
+            "Could not fetch table at expansion time. Doing so is needed to "
+                + "determine the output Row schema.",
+            e);
+      }
 
       IcebergScanConfig scanConfig =
           IcebergScanConfig.builder()
@@ -674,12 +698,15 @@ public PCollection<Row> expand(PBegin input) {
               .setKeepFields(getKeep())
               .setDropFields(getDrop())
               .setFilterString(getFilter())
+              .setWatermarkColumn(getWatermarkColumn())
+              .setMaxSnapshotDiscoveryDelay(getMaxSnapshotDiscoveryDelay())
               .build();
       scanConfig.validate(table);
 
       PTransform<PBegin, PCollection<Row>> source =
           getUseCdc()
-              ? new IncrementalScanSource(scanConfig)
+              ? new IncrementalChangelogSource(scanConfig)
+              //              ? new IncrementalScanSource(scanConfig)
               : Read.from(new ScanSource(scanConfig));
 
       return input.apply(source);
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergScanConfig.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergScanConfig.java
index 45ecc7cf71c3..fb85594b3183 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergScanConfig.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergScanConfig.java
@@ -17,27 +17,37 @@
  */
 package org.apache.beam.sdk.io.iceberg;
 
+import static org.apache.beam.sdk.io.iceberg.IcebergUtils.icebergSchemaToBeamSchema;
+import static org.apache.beam.sdk.util.Preconditions.checkArgumentNotNull;
 import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument;
-import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull;
+import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState;
 import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Sets.newHashSet;
+import static org.apache.iceberg.types.Type.TypeID.LONG;
+import static org.apache.iceberg.types.Type.TypeID.TIMESTAMP;
 
 import com.google.auto.value.AutoValue;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Set;
 import org.apache.beam.sdk.io.iceberg.IcebergIO.ReadRows.StartingStrategy;
 import org.apache.beam.sdk.schemas.Schema;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.MoreObjects;
-import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.MetadataColumns;
+import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.expressions.Evaluator;
 import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.types.Comparators;
 import org.apache.iceberg.types.TypeUtil;
+import org.apache.iceberg.types.Types.NestedField;
 import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
 import org.checkerframework.checker.nullness.qual.Nullable;
 import org.checkerframework.dataflow.qual.Pure;
@@ -50,6 +60,8 @@ public abstract class IcebergScanConfig implements Serializable {
   private transient org.apache.iceberg.@MonotonicNonNull Schema cachedRequiredSchema;
   private transient @MonotonicNonNull Evaluator cachedEvaluator;
   private transient @MonotonicNonNull Expression cachedFilter;
+  private transient org.apache.iceberg.@MonotonicNonNull Schema cachedRecordIdSchema;
+  private transient @MonotonicNonNull Schema cachedRowIdBeamSchema;
 
   public enum ScanType {
     TABLE,
@@ -89,9 +101,9 @@ static org.apache.iceberg.Schema resolveSchema(
       @Nullable List<String> keep,
       @Nullable List<String> drop,
       @Nullable Set<String> fieldsInFilter) {
-    ImmutableList.Builder<String> selectedFieldsBuilder = ImmutableList.builder();
+    Set<String> selectedFields = new LinkedHashSet<>();
     if (keep != null && !keep.isEmpty()) {
-      selectedFieldsBuilder.addAll(keep);
+      selectedFields.addAll(keep);
     } else if (drop != null && !drop.isEmpty()) {
       List<String> paths = new ArrayList<>(TypeUtil.indexNameById(schema.asStruct()).values());
       Collections.sort(paths);
@@ -100,7 +112,7 @@ static org.apache.iceberg.Schema resolveSchema(
         boolean isParent = i + 1 < paths.size() && paths.get(i + 1).startsWith(path + ".");
         boolean isDrop = drop.stream().anyMatch(d -> path.equals(d) || path.startsWith(d + "."));
         if (!isParent && !isDrop) {
-          selectedFieldsBuilder.add(path);
+          selectedFields.add(path);
         }
       }
     } else {
@@ -111,9 +123,8 @@ static org.apache.iceberg.Schema resolveSchema(
     if (fieldsInFilter != null && !fieldsInFilter.isEmpty()) {
       fieldsInFilter.stream()
           .map(f -> schema.caseInsensitiveFindField(f).name())
-          .forEach(selectedFieldsBuilder::add);
+          .forEach(selectedFields::add);
     }
-    ImmutableList<String> selectedFields = selectedFieldsBuilder.build();
     return selectedFields.isEmpty() ? schema : schema.select(selectedFields);
   }
 
@@ -141,15 +152,34 @@ public org.apache.iceberg.Schema getRequiredSchema() {
     return cachedRequiredSchema;
   }
 
+  public org.apache.iceberg.Schema recordIdSchema() {
+    if (cachedRecordIdSchema == null) {
+      org.apache.iceberg.Schema fullSchema = TableCache.get(getTableIdentifier()).schema();
+      cachedRecordIdSchema = TypeUtil.select(fullSchema, fullSchema.identifierFieldIds());
+    }
+    return cachedRecordIdSchema;
+  }
+
+  public Schema rowIdBeamSchema() {
+    if (cachedRowIdBeamSchema == null) {
+      cachedRowIdBeamSchema = icebergSchemaToBeamSchema(recordIdSchema());
+    }
+    return cachedRowIdBeamSchema;
+  }
+
+  public Comparator<StructLike> recordIdComparator() {
+    return Comparators.forType(recordIdSchema().asStruct());
+  }
+
   @Pure
   @Nullable
-  public Evaluator getEvaluator() {
+  public Evaluator getEvaluator(org.apache.iceberg.Schema requiredSchema) {
     @Nullable Expression filter = getFilter();
     if (filter == null) {
       return null;
     }
     if (cachedEvaluator == null) {
-      cachedEvaluator = new Evaluator(getRequiredSchema().asStruct(), filter);
+      cachedEvaluator = new Evaluator(requiredSchema.asStruct(), filter);
     }
     return cachedEvaluator;
   }
@@ -226,6 +256,12 @@ public Expression getFilter() {
   @Pure
   public abstract @Nullable List<String> getDropFields();
 
+  @Pure
+  public abstract @Nullable String getWatermarkColumn();
+
+  @Pure
+  public abstract @Nullable Duration getMaxSnapshotDiscoveryDelay();
+
   @Pure
   public static Builder builder() {
     return new AutoValue_IcebergScanConfig.Builder()
@@ -248,7 +284,8 @@ public static Builder builder() {
         .setPollInterval(null)
         .setStartingStrategy(null)
         .setTag(null)
-        .setBranch(null);
+        .setBranch(null)
+        .setWatermarkColumn(null);
   }
 
   @AutoValue.Builder
@@ -311,6 +348,10 @@ public Builder setTableIdentifier(String... names) {
 
     public abstract Builder setDropFields(@Nullable List<String> fields);
 
+    public abstract Builder setWatermarkColumn(@Nullable String watermarkColumn);
+
+    public abstract Builder setMaxSnapshotDiscoveryDelay(@Nullable Duration delay);
+
     public abstract IcebergScanConfig build();
   }
 
@@ -328,16 +369,19 @@ void validate(Table table) {
       String param;
       if (keep != null) {
         param = "keep";
-        fieldsSpecified = newHashSet(checkNotNull(keep));
+        fieldsSpecified = newHashSet(checkArgumentNotNull(keep));
       } else { // drop != null
         param = "drop";
-        fieldsSpecified = newHashSet(checkNotNull(drop));
+        fieldsSpecified = newHashSet(checkArgumentNotNull(drop));
       }
       fieldsSpecified.removeIf(name -> table.schema().findField(name) != null);
 
       checkArgument(
-          fieldsSpecified.isEmpty(),
-          error(String.format("'%s' specifies unknown field(s): %s", param, fieldsSpecified)));
+          fieldsSpecified.isEmpty()
+              || fieldsSpecified.stream().allMatch(MetadataColumns::isMetadataColumn),
+          error("'%s' specifies unknown field(s): %s"),
+          param,
+          fieldsSpecified);
     }
 
     // TODO(#34168, ahmedabu98): fill these gaps for the existing batch source
@@ -371,6 +415,18 @@ void validate(Table table) {
                     + "reading with Managed.ICEBERG_CDC: "
                     + invalidOptions));
       }
+    } else {
+      Set<Integer> primaryKeyIds = new HashSet<>(table.schema().identifierFieldIds());
+      checkState(
+          !primaryKeyIds.isEmpty(),
+          "Cannot read CDC records as the table schema does not specified any primary key fields.");
+      Set<Integer> projectedPrimaryKeyIds = getProjectedSchema().identifierFieldIds();
+      primaryKeyIds.removeAll(projectedPrimaryKeyIds);
+      checkArgument(
+          primaryKeyIds.isEmpty(),
+          "When reading CDC records, the projected schema must not drop primary key fields. "
+              + "The specified configuration drops the following PK fields: %s",
+          primaryKeyIds);
     }
 
     if (getStartingStrategy() != null) {
@@ -385,12 +441,44 @@ void validate(Table table) {
     checkArgument(
         getToTimestamp() == null || getToSnapshot() == null,
         error("only one of 'to_timestamp' or 'to_snapshot' can be set"));
+    @Nullable Long fromSnapshotId = ReadUtils.getFromSnapshotInclusive(table, this);
+    @Nullable Long toSnapshotId = ReadUtils.getToSnapshot(table, this);
+    if (fromSnapshotId != null) {
+      checkArgumentNotNull(
+          table.snapshot(fromSnapshotId),
+          error("configured starting snapshot does not exist: '%s'"),
+          fromSnapshotId);
+    }
+    if (toSnapshotId != null) {
+      checkArgumentNotNull(
+          table.snapshot(toSnapshotId),
+          error("configured end snapshot does not exist: '%s'"),
+          toSnapshotId);
+    }
 
     if (getPollInterval() != null) {
       checkArgument(
           Boolean.TRUE.equals(getStreaming()),
           error("'poll_interval_seconds' can only be set when streaming is true"));
     }
+
+    @Nullable String watermarkColumn = getWatermarkColumn();
+    if (watermarkColumn != null) {
+      checkArgument(getUseCdc(), error("'watermark_column' is only supported in CDC mode"));
+      NestedField field = table.schema().findField(watermarkColumn);
+      checkArgument(
+          field != null, error("'watermark_column' refers to unknown column: %s"), watermarkColumn);
+      checkArgument(
+          field.isRequired(),
+          error("'watermark_column' needs to be a non-nullable column: %s"),
+          watermarkColumn);
+      checkArgument(
+          field.type().typeId() == TIMESTAMP || field.type().typeId() == LONG,
+          error("'watermark_column' must be a timestamp-typed column, but '%s' has type %s"),
+          watermarkColumn,
+          field.type().typeId());
+      checkArgumentNotNull(getProjectedSchema().findField(watermarkColumn), "'watermark_column' column should not be dropped.");
+    }
   }
 
   private String error(String message) {
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergUtils.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergUtils.java
index d0d24532ff39..2b7a8f956bae 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergUtils.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/IcebergUtils.java
@@ -46,6 +46,7 @@
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.StructLike;
 import org.apache.iceberg.data.GenericRecord;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.types.Type;
@@ -469,120 +470,141 @@ private static Object getIcebergTimestampValue(Object beamValue, boolean shouldA
     }
   }
 
+  /** Converts a {@link StructLike} to a Beam {@link Row}. */
+  public static Row structToRow(Schema schema, StructLike struct) {
+    checkState(
+        schema.getFieldCount() == struct.size(),
+        "Struct of size %s does not match expected schema size %s",
+        struct.size(),
+        schema.getFieldCount());
+    Row.Builder rowBuilder = Row.withSchema(schema);
+    for (int i = 0; i < schema.getFieldCount(); i++) {
+      Schema.Field field = schema.getField(i);
+      @Nullable Object icebergValue = struct.get(i, Object.class);
+      addIcebergValue(rowBuilder, field, icebergValue);
+    }
+    return rowBuilder.build();
+  }
+
   /** Converts an Iceberg {@link Record} to a Beam {@link Row}. */
   public static Row icebergRecordToBeamRow(Schema schema, Record record) {
     Row.Builder rowBuilder = Row.withSchema(schema);
     for (Schema.Field field : schema.getFields()) {
-      boolean isNullable = field.getType().getNullable();
       @Nullable Object icebergValue = record.getField(field.getName());
-      if (icebergValue == null) {
-        if (isNullable) {
-          rowBuilder.addValue(null);
-          continue;
-        }
-        throw new RuntimeException(
-            String.format("Received null value for required field '%s'.", field.getName()));
+      addIcebergValue(rowBuilder, field, icebergValue);
+    }
+    return rowBuilder.build();
+  }
+
+  private static void addIcebergValue(
+      Row.Builder rowBuilder, Schema.Field field, @Nullable Object icebergValue) {
+    boolean isNullable = field.getType().getNullable();
+    if (icebergValue == null) {
+      if (isNullable) {
+        rowBuilder.addValue(null);
+        return;
       }
-      switch (field.getType().getTypeName()) {
-        case BYTE:
-        case INT16:
-        case INT32:
-        case INT64:
-        case DECIMAL: // Iceberg and Beam both use BigDecimal
-        case FLOAT: // Iceberg and Beam both use float
-        case DOUBLE: // Iceberg and Beam both use double
-        case STRING: // Iceberg and Beam both use String
-        case BOOLEAN: // Iceberg and Beam both use boolean
-          rowBuilder.addValue(icebergValue);
-          break;
-        case ARRAY:
-          checkState(
-              icebergValue instanceof List,
-              "Expected List type for field '%s' but received %s",
-              field.getName(),
-              icebergValue.getClass());
-          List<@NonNull ?> beamList = (List<@NonNull ?>) icebergValue;
-          Schema.FieldType collectionType =
-              checkStateNotNull(field.getType().getCollectionElementType());
-          // recurse on struct types
-          if (collectionType.getTypeName().isCompositeType()) {
-            Schema innerSchema = checkStateNotNull(collectionType.getRowSchema());
-            beamList =
-                beamList.stream()
-                    .map(v -> icebergRecordToBeamRow(innerSchema, (Record) v))
-                    .collect(Collectors.toList());
-          }
-          rowBuilder.addValue(beamList);
-          break;
-        case ITERABLE:
-          checkState(
-              icebergValue instanceof Iterable,
-              "Expected Iterable type for field '%s' but received %s",
-              field.getName(),
-              icebergValue.getClass());
-          Iterable<@NonNull ?> beamIterable = (Iterable<@NonNull ?>) icebergValue;
-          Schema.FieldType iterableCollectionType =
-              checkStateNotNull(field.getType().getCollectionElementType());
-          // recurse on struct types
-          if (iterableCollectionType.getTypeName().isCompositeType()) {
-            Schema innerSchema = checkStateNotNull(iterableCollectionType.getRowSchema());
-            ImmutableList.Builder<Row> builder = ImmutableList.builder();
-            for (Record v : (Iterable<@NonNull Record>) icebergValue) {
-              builder.add(icebergRecordToBeamRow(innerSchema, v));
-            }
-            beamIterable = builder.build();
+      throw new RuntimeException(
+          String.format("Received null value for required field '%s'.", field.getName()));
+    }
+    switch (field.getType().getTypeName()) {
+      case BYTE:
+      case INT16:
+      case INT32:
+      case INT64:
+      case DECIMAL: // Iceberg and Beam both use BigDecimal
+      case FLOAT: // Iceberg and Beam both use float
+      case DOUBLE: // Iceberg and Beam both use double
+      case STRING: // Iceberg and Beam both use String
+      case BOOLEAN: // Iceberg and Beam both use boolean
+        rowBuilder.addValue(icebergValue);
+        break;
+      case ARRAY:
+        checkState(
+            icebergValue instanceof List,
+            "Expected List type for field '%s' but received %s",
+            field.getName(),
+            icebergValue.getClass());
+        List<@NonNull ?> beamList = (List<@NonNull ?>) icebergValue;
+        Schema.FieldType collectionType =
+            checkStateNotNull(field.getType().getCollectionElementType());
+        // recurse on struct types
+        if (collectionType.getTypeName().isCompositeType()) {
+          Schema innerSchema = checkStateNotNull(collectionType.getRowSchema());
+          beamList =
+              beamList.stream()
+                  .map(v -> icebergRecordToBeamRow(innerSchema, (Record) v))
+                  .collect(Collectors.toList());
+        }
+        rowBuilder.addValue(beamList);
+        break;
+      case ITERABLE:
+        checkState(
+            icebergValue instanceof Iterable,
+            "Expected Iterable type for field '%s' but received %s",
+            field.getName(),
+            icebergValue.getClass());
+        Iterable<@NonNull ?> beamIterable = (Iterable<@NonNull ?>) icebergValue;
+        Schema.FieldType iterableCollectionType =
+            checkStateNotNull(field.getType().getCollectionElementType());
+        // recurse on struct types
+        if (iterableCollectionType.getTypeName().isCompositeType()) {
+          Schema innerSchema = checkStateNotNull(iterableCollectionType.getRowSchema());
+          ImmutableList.Builder<Row> builder = ImmutableList.builder();
+          for (Record v : (Iterable<@NonNull Record>) icebergValue) {
+            builder.add(icebergRecordToBeamRow(innerSchema, v));
           }
-          rowBuilder.addValue(beamIterable);
-          break;
-        case MAP:
-          checkState(
-              icebergValue instanceof Map,
-              "Expected Map type for field '%s' but received %s",
-              field.getName(),
-              icebergValue.getClass());
-          Map<?, ?> beamMap = (Map<?, ?>) icebergValue;
-          Schema.FieldType valueType = checkStateNotNull(field.getType().getMapValueType());
-          // recurse on struct types
-          if (valueType.getTypeName().isCompositeType()) {
-            Schema innerSchema = checkStateNotNull(valueType.getRowSchema());
-            ImmutableMap.Builder<Object, Row> newMap = ImmutableMap.builder();
-            for (Map.Entry<?, ?> entry : ((Map<?, ?>) icebergValue).entrySet()) {
-              Record rec = ((Record) entry.getValue());
-              newMap.put(
-                  checkStateNotNull(entry.getKey()),
-                  icebergRecordToBeamRow(innerSchema, checkStateNotNull(rec)));
-            }
-            beamMap = newMap.build();
+          beamIterable = builder.build();
+        }
+        rowBuilder.addValue(beamIterable);
+        break;
+      case MAP:
+        checkState(
+            icebergValue instanceof Map,
+            "Expected Map type for field '%s' but received %s",
+            field.getName(),
+            icebergValue.getClass());
+        Map<?, ?> beamMap = (Map<?, ?>) icebergValue;
+        Schema.FieldType valueType = checkStateNotNull(field.getType().getMapValueType());
+        // recurse on struct types
+        if (valueType.getTypeName().isCompositeType()) {
+          Schema innerSchema = checkStateNotNull(valueType.getRowSchema());
+          ImmutableMap.Builder<Object, Row> newMap = ImmutableMap.builder();
+          for (Map.Entry<?, ?> entry : ((Map<?, ?>) icebergValue).entrySet()) {
+            Record rec = ((Record) entry.getValue());
+            newMap.put(
+                checkStateNotNull(entry.getKey()),
+                icebergRecordToBeamRow(innerSchema, checkStateNotNull(rec)));
           }
-          rowBuilder.addValue(beamMap);
-          break;
-        case DATETIME:
-          // Iceberg uses a long for micros.
-          // Beam DATETIME uses joda's DateTime, which only supports millis,
-          // so we do lose some precision here
-          rowBuilder.addValue(getBeamDateTimeValue(icebergValue));
-          break;
-        case BYTES:
-          // Iceberg uses ByteBuffer; Beam uses byte[]
-          rowBuilder.addValue(((ByteBuffer) icebergValue).array());
-          break;
-        case ROW:
-          Record nestedRecord = (Record) icebergValue;
-          Schema nestedSchema =
-              checkArgumentNotNull(
-                  field.getType().getRowSchema(),
-                  "Corrupted schema: Row type did not have associated nested schema.");
-          rowBuilder.addValue(icebergRecordToBeamRow(nestedSchema, nestedRecord));
-          break;
-        case LOGICAL_TYPE:
-          rowBuilder.addValue(getLogicalTypeValue(icebergValue, field.getType()));
-          break;
-        default:
-          throw new UnsupportedOperationException(
-              "Unsupported Beam type: " + field.getType().getTypeName());
-      }
+          beamMap = newMap.build();
+        }
+        rowBuilder.addValue(beamMap);
+        break;
+      case DATETIME:
+        // Iceberg uses a long for micros.
+        // Beam DATETIME uses joda's DateTime, which only supports millis,
+        // so we do lose some precision here
+        rowBuilder.addValue(getBeamDateTimeValue(icebergValue));
+        break;
+      case BYTES:
+        // Iceberg uses ByteBuffer; Beam uses byte[]
+        rowBuilder.addValue(((ByteBuffer) icebergValue).array());
+        break;
+      case ROW:
+        Record nestedRecord = (Record) icebergValue;
+        Schema nestedSchema =
+            checkArgumentNotNull(
+                field.getType().getRowSchema(),
+                "Corrupted schema: Row type did not have associated nested schema.");
+        rowBuilder.addValue(icebergRecordToBeamRow(nestedSchema, nestedRecord));
+        break;
+      case LOGICAL_TYPE:
+        rowBuilder.addValue(getLogicalTypeValue(icebergValue, field.getType()));
+        break;
+      default:
+        throw new UnsupportedOperationException(
+            "Unsupported Beam type: " + field.getType().getTypeName());
     }
-    return rowBuilder.build();
   }
 
   private static DateTime getBeamDateTimeValue(Object icebergValue) {
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/PartitionUtils.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/PartitionUtils.java
index 805cc0672940..91031fc69e9e 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/PartitionUtils.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/PartitionUtils.java
@@ -18,6 +18,7 @@
 package org.apache.beam.sdk.io.iceberg;
 
 import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+import static org.apache.iceberg.data.IdentityPartitionConverters.convertConstant;
 
 import java.util.List;
 import java.util.Map;
@@ -25,11 +26,20 @@
 import java.util.function.Function;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps;
+import org.apache.iceberg.ChangelogScanTask;
+import org.apache.iceberg.ContentFile;
+import org.apache.iceberg.ContentScanTask;
+import org.apache.iceberg.MetadataColumns;
+import org.apache.iceberg.PartitionField;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Schema;
+import org.apache.iceberg.StructLike;
 import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.expressions.Term;
+import org.apache.iceberg.types.Types;
 import org.checkerframework.checker.nullness.qual.Nullable;
 
 class PartitionUtils {
@@ -130,4 +140,61 @@ static Term toIcebergTerm(String field) {
 
     throw new IllegalArgumentException("Could not find a partition term for '" + field + "'.");
   }
+
+  /**
+   * Copied over from Apache Iceberg's <a
+   * href="https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/util/PartitionUtil.java">PartitionUtil</a>.
+   *
+   * <p>Needed to accommodate CDC reads, where scans produce {@link ChangelogScanTask}s instead of
+   * {@link ContentScanTask}s.
+   */
+  public static Map<Integer, ?> constantsMap(
+      PartitionSpec spec, ContentFile<?> file, @Nullable Long fileSequenceNumber) {
+    Preconditions.checkState(
+        spec.specId() == file.specId(),
+        "File spec ID (%s) does not match PartitionSpec ID (%s)",
+        file.specId(),
+        spec.specId());
+    StructLike partitionData = file.partition();
+
+    // use java.util.HashMap because partition data may contain null values
+    Map<Integer, Object> idToConstant = Maps.newHashMap();
+
+    // add first_row_id as _row_id
+    if (file.firstRowId() != null) {
+      idToConstant.put(
+          MetadataColumns.ROW_ID.fieldId(),
+          convertConstant(Types.LongType.get(), file.firstRowId()));
+    }
+
+    // When reconstructing a DataFile, we lose the ability to attach its fileSequenceNumber,
+    // so we pipe it along the util methods to include it here.
+    fileSequenceNumber =
+        fileSequenceNumber != null ? fileSequenceNumber : file.fileSequenceNumber();
+    idToConstant.put(
+        MetadataColumns.LAST_UPDATED_SEQUENCE_NUMBER.fieldId(),
+        convertConstant(Types.LongType.get(), fileSequenceNumber));
+
+    // add _file
+    idToConstant.put(
+        MetadataColumns.FILE_PATH.fieldId(),
+        convertConstant(Types.StringType.get(), file.location()));
+
+    // add _spec_id
+    idToConstant.put(
+        MetadataColumns.SPEC_ID.fieldId(), convertConstant(Types.IntegerType.get(), file.specId()));
+
+    List<Types.NestedField> partitionFields = spec.partitionType().fields();
+    List<PartitionField> fields = spec.fields();
+    for (int pos = 0; pos < fields.size(); pos += 1) {
+      PartitionField field = fields.get(pos);
+      if (field.transform().isIdentity()) {
+        Object converted =
+            convertConstant(partitionFields.get(pos).type(), partitionData.get(pos, Object.class));
+        idToConstant.put(field.sourceId(), converted);
+      }
+    }
+
+    return idToConstant;
+  }
 }
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/ReadFromTasks.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/ReadFromTasks.java
index 528b89c203bf..fea62356e431 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/ReadFromTasks.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/ReadFromTasks.java
@@ -75,9 +75,7 @@ public void process(
       }
       FileScanTask task = fileScanTasks.get((int) l);
       Schema beamSchema = IcebergUtils.icebergSchemaToBeamSchema(scanConfig.getProjectedSchema());
-      try (CloseableIterable<Record> fullIterable =
-          ReadUtils.createReader(task, table, scanConfig.getRequiredSchema())) {
-        CloseableIterable<Record> reader = ReadUtils.maybeApplyFilter(fullIterable, scanConfig);
+      try (CloseableIterable<Record> reader = ReadUtils.createReader(task, table, scanConfig)) {
 
         for (Record record : reader) {
           Row row = IcebergUtils.icebergRecordToBeamRow(beamSchema, record);
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/ReadUtils.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/ReadUtils.java
index e7f50882f433..4baa35030c46 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/ReadUtils.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/ReadUtils.java
@@ -20,25 +20,22 @@
 import static org.apache.iceberg.util.SnapshotUtil.ancestorsOf;
 
 import java.util.Collection;
-import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
-import java.util.function.BiFunction;
 import java.util.stream.Collectors;
 import org.apache.beam.sdk.io.iceberg.IcebergIO.ReadRows.StartingStrategy;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.MoreObjects;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Sets;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.ContentFile;
+import org.apache.iceberg.ContentScanTask;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.Snapshot;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TableProperties;
-import org.apache.iceberg.data.IdentityPartitionConverters;
 import org.apache.iceberg.data.InternalRecordWrapper;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.data.parquet.GenericParquetReaders;
@@ -46,17 +43,13 @@
 import org.apache.iceberg.encryption.EncryptedInputFile;
 import org.apache.iceberg.expressions.Evaluator;
 import org.apache.iceberg.expressions.Expression;
-import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.hadoop.HadoopInputFile;
 import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.io.FileIO;
 import org.apache.iceberg.io.InputFile;
-import org.apache.iceberg.mapping.MappingUtil;
 import org.apache.iceberg.mapping.NameMapping;
 import org.apache.iceberg.mapping.NameMappingParser;
 import org.apache.iceberg.parquet.ParquetReader;
-import org.apache.iceberg.types.Type;
-import org.apache.iceberg.types.TypeUtil;
-import org.apache.iceberg.util.PartitionUtil;
 import org.apache.iceberg.util.SnapshotUtil;
 import org.apache.parquet.HadoopReadOptions;
 import org.apache.parquet.ParquetReadOptions;
@@ -73,13 +66,37 @@ public class ReadUtils {
           "parquet.read.support.class",
           "parquet.crypto.factory.class");
 
-  static ParquetReader<Record> createReader(FileScanTask task, Table table, Schema schema) {
-    String filePath = task.file().path().toString();
-    EncryptedInputFile encryptedInput =
-        EncryptedFiles.encryptedInput(table.io().newInputFile(filePath), task.file().keyMetadata());
-    InputFile inputFile = table.encryption().decrypt(encryptedInput);
-    Map<Integer, ?> idToConstants =
-        ReadUtils.constantsMap(task, IdentityPartitionConverters::convertConstant, table.schema());
+  public static CloseableIterable<Record> createReader(
+      ContentScanTask<?> task, Table table, IcebergScanConfig scanConfig) {
+    return createReader(
+        table,
+        scanConfig,
+        scanConfig.getRequiredSchema(),
+        task.spec(),
+        task.file(),
+        null,
+        task.start(),
+        task.length(),
+        task.residual());
+  }
+
+  public static CloseableIterable<Record> createReader(
+      Table table,
+      IcebergScanConfig scanConfig,
+      Schema requiredSchema,
+      PartitionSpec spec,
+      ContentFile<?> file,
+      @Nullable Long fileSequenceNumber,
+      long start,
+      long length,
+      Expression residual) {
+    InputFile inputFile;
+    try (FileIO io = table.io()) {
+      EncryptedInputFile encryptedInput =
+          EncryptedFiles.encryptedInput(io.newInputFile(file.location()), file.keyMetadata());
+      inputFile = table.encryption().decrypt(encryptedInput);
+    }
+    Map<Integer, ?> idToConstants = PartitionUtils.constantsMap(spec, file, fileSequenceNumber);
 
     ParquetReadOptions.Builder optionsBuilder;
     if (inputFile instanceof HadoopInputFile) {
@@ -94,71 +111,30 @@ static ParquetReader<Record> createReader(FileScanTask task, Table table, Schema
     }
     optionsBuilder =
         optionsBuilder
-            .withRange(task.start(), task.start() + task.length())
+            .withRange(start, start + length)
             .withMaxAllocationInBytes(MAX_FILE_BUFFER_SIZE);
 
     @Nullable String nameMapping = table.properties().get(TableProperties.DEFAULT_NAME_MAPPING);
     NameMapping mapping =
         nameMapping != null ? NameMappingParser.fromJson(nameMapping) : NameMapping.empty();
 
-    return new ParquetReader<>(
-        inputFile,
-        schema,
-        optionsBuilder.build(),
-        // TODO(ahmedabu98): Implement a Parquet-to-Beam Row reader, bypassing conversion to Iceberg
-        // Record
-        fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema, idToConstants),
-        mapping,
-        task.residual(),
-        false,
-        true);
-  }
-
-  static ParquetReader<Record> createReader(InputFile inputFile, Schema schema) {
-    ParquetReadOptions.Builder optionsBuilder;
-    if (inputFile instanceof HadoopInputFile) {
-      // remove read properties already set that may conflict with this read
-      Configuration conf = new Configuration(((HadoopInputFile) inputFile).getConf());
-      for (String property : READ_PROPERTIES_TO_REMOVE) {
-        conf.unset(property);
-      }
-      optionsBuilder = HadoopReadOptions.builder(conf);
-    } else {
-      optionsBuilder = ParquetReadOptions.builder();
-    }
-    optionsBuilder =
-        optionsBuilder
-            .withRange(0, inputFile.getLength())
-            .withMaxAllocationInBytes(MAX_FILE_BUFFER_SIZE);
-
-    return new ParquetReader<>(
-        inputFile,
-        schema,
-        optionsBuilder.build(),
-        fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema),
-        MappingUtil.create(schema),
-        Expressions.alwaysTrue(),
-        false,
-        true);
-  }
-
-  static Map<Integer, ?> constantsMap(
-      FileScanTask task,
-      BiFunction<Type, Object, Object> converter,
-      org.apache.iceberg.Schema schema) {
-    PartitionSpec spec = task.spec();
-    Set<Integer> idColumns = spec.identitySourceIds();
-    org.apache.iceberg.Schema partitionSchema = TypeUtil.select(schema, idColumns);
-    boolean projectsIdentityPartitionColumns = !partitionSchema.columns().isEmpty();
-
-    if (projectsIdentityPartitionColumns) {
-      return PartitionUtil.constantsMap(task, converter);
-    } else {
-      return Collections.emptyMap();
-    }
+    ParquetReader<Record> records =
+        new ParquetReader<>(
+            inputFile,
+            requiredSchema,
+            optionsBuilder.build(),
+            // TODO(ahmedabu98): Implement a Parquet-to-Beam Row reader, bypassing conversion to
+            // Iceberg Record
+            fileSchema ->
+                GenericParquetReaders.buildReader(requiredSchema, fileSchema, idToConstants),
+            mapping,
+            residual,
+            false,
+            true);
+    return maybeApplyFilter(records, scanConfig, requiredSchema);
   }
 
-  static @Nullable Long getFromSnapshotExclusive(Table table, IcebergScanConfig scanConfig) {
+  public static @Nullable Long getFromSnapshotInclusive(Table table, IcebergScanConfig scanConfig) {
     @Nullable StartingStrategy startingStrategy = scanConfig.getStartingStrategy();
     boolean isStreaming = MoreObjects.firstNonNull(scanConfig.getStreaming(), false);
     if (startingStrategy == null) {
@@ -179,6 +155,13 @@ static ParquetReader<Record> createReader(InputFile inputFile, Schema schema) {
         fromSnapshot = currentSnapshot.snapshotId();
       }
     }
+
+    return fromSnapshot;
+  }
+
+  public static @Nullable Long getFromSnapshotExclusive(Table table, IcebergScanConfig scanConfig) {
+    @Nullable Long fromSnapshot = getFromSnapshotInclusive(table, scanConfig);
+
     // incremental append scan can only be configured with an *exclusive* starting snapshot,
     // so we need to provide this snapshot's parent id.
     if (fromSnapshot != null) {
@@ -189,7 +172,7 @@ static ParquetReader<Record> createReader(InputFile inputFile, Schema schema) {
     return fromSnapshot;
   }
 
-  static @Nullable Long getToSnapshot(Table table, IcebergScanConfig scanConfig) {
+  public static @Nullable Long getToSnapshot(Table table, IcebergScanConfig scanConfig) {
     // 1. fetch from to_snapshot
     @Nullable Long toSnapshot = scanConfig.getToSnapshot();
     // 2. fetch from to_timestamp
@@ -205,7 +188,7 @@ static ParquetReader<Record> createReader(InputFile inputFile, Schema schema) {
    * Returns a list of snapshots in the range (fromSnapshotId, toSnapshotId], ordered
    * chronologically.
    */
-  static List<SnapshotInfo> snapshotsBetween(
+  public static List<SnapshotInfo> snapshotsBetween(
       Table table, String tableIdentifier, @Nullable Long fromSnapshotId, long toSnapshotId) {
     long from = MoreObjects.firstNonNull(fromSnapshotId, -1L);
     @SuppressWarnings("return")
@@ -225,10 +208,14 @@ static List<SnapshotInfo> snapshotsBetween(
 
   public static CloseableIterable<Record> maybeApplyFilter(
       CloseableIterable<Record> iterable, IcebergScanConfig scanConfig) {
-    InternalRecordWrapper wrapper =
-        new InternalRecordWrapper(scanConfig.getRequiredSchema().asStruct());
+    return maybeApplyFilter(iterable, scanConfig, scanConfig.getRequiredSchema());
+  }
+
+  public static CloseableIterable<Record> maybeApplyFilter(
+      CloseableIterable<Record> iterable, IcebergScanConfig scanConfig, Schema requiredSchema) {
+    InternalRecordWrapper wrapper = new InternalRecordWrapper(requiredSchema.asStruct());
     Expression filter = scanConfig.getFilter();
-    Evaluator evaluator = scanConfig.getEvaluator();
+    Evaluator evaluator = scanConfig.getEvaluator(requiredSchema);
     if (filter != null && evaluator != null && filter.op() != Expression.Operation.TRUE) {
       return CloseableIterable.filter(iterable, record -> evaluator.eval(wrapper.wrap(record)));
     }
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/ScanTaskReader.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/ScanTaskReader.java
index b3485a7bcc4f..c9ad372a0751 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/ScanTaskReader.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/ScanTaskReader.java
@@ -36,7 +36,6 @@
 import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.avro.Avro;
 import org.apache.iceberg.data.GenericDeleteFilter;
-import org.apache.iceberg.data.IdentityPartitionConverters;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.data.avro.DataReader;
 import org.apache.iceberg.data.orc.GenericOrcReader;
@@ -121,8 +120,7 @@ public boolean advance() throws IOException {
       DataFile file = fileTask.file();
       InputFile input = decryptor.getInputFile(fileTask);
       Map<Integer, ?> idToConstants =
-          ReadUtils.constantsMap(
-              fileTask, IdentityPartitionConverters::convertConstant, requiredSchema);
+          PartitionUtils.constantsMap(fileTask.spec(), fileTask.file(), null);
 
       CloseableIterable<Record> iterable;
       switch (file.format()) {
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/SerializableDataFile.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/SerializableDataFile.java
index 9e75be0a1987..e1291601d149 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/SerializableDataFile.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/SerializableDataFile.java
@@ -54,13 +54,13 @@
  */
 @DefaultSchema(AutoValueSchema.class)
 @AutoValue
-abstract class SerializableDataFile {
+public abstract class SerializableDataFile {
   public static Builder builder() {
     return new AutoValue_SerializableDataFile.Builder();
   }
 
   @SchemaFieldNumber("0")
-  abstract String getPath();
+  public abstract String getPath();
 
   @SchemaFieldNumber("1")
   abstract String getFileFormat();
@@ -69,10 +69,10 @@ public static Builder builder() {
   abstract long getRecordCount();
 
   @SchemaFieldNumber("3")
-  abstract long getFileSizeInBytes();
+  public abstract long getFileSizeInBytes();
 
   @SchemaFieldNumber("4")
-  abstract String getPartitionPath();
+  public abstract String getPartitionPath();
 
   @SchemaFieldNumber("5")
   abstract int getPartitionSpecId();
@@ -96,13 +96,22 @@ public static Builder builder() {
   abstract @Nullable Map<Integer, Long> getNanValueCounts();
 
   @SchemaFieldNumber("12")
-  abstract @Nullable Map<Integer, byte[]> getLowerBounds();
+  public abstract @Nullable Map<Integer, byte[]> getLowerBounds();
 
   @SchemaFieldNumber("13")
-  abstract @Nullable Map<Integer, byte[]> getUpperBounds();
+  public abstract @Nullable Map<Integer, byte[]> getUpperBounds();
+
+  @SchemaFieldNumber("14")
+  public abstract @Nullable Long getDataSequenceNumber();
+
+  @SchemaFieldNumber("15")
+  public abstract @Nullable Long getFileSequenceNumber();
+
+  @SchemaFieldNumber("16")
+  public abstract @Nullable Long getFirstRowId();
 
   @AutoValue.Builder
-  abstract static class Builder {
+  public abstract static class Builder {
     abstract Builder setPath(String path);
 
     abstract Builder setFileFormat(String fileFormat);
@@ -131,31 +140,49 @@ abstract static class Builder {
 
     abstract Builder setUpperBounds(@Nullable Map<Integer, byte[]> upperBounds);
 
+    abstract Builder setDataSequenceNumber(@Nullable Long number);
+
+    abstract Builder setFileSequenceNumber(@Nullable Long number);
+
+    abstract Builder setFirstRowId(@Nullable Long id);
+
     abstract SerializableDataFile build();
   }
 
+  public static SerializableDataFile from(DataFile f, String partitionPath) {
+    return from(f, partitionPath, true);
+  }
+
   /**
    * Create a {@link SerializableDataFile} from a {@link DataFile} and its associated {@link
    * PartitionKey}.
    */
-  static SerializableDataFile from(DataFile f, String partitionPath) {
-
-    return SerializableDataFile.builder()
-        .setPath(f.location().toString())
-        .setFileFormat(f.format().toString())
-        .setRecordCount(f.recordCount())
-        .setFileSizeInBytes(f.fileSizeInBytes())
-        .setPartitionPath(partitionPath)
-        .setPartitionSpecId(f.specId())
-        .setKeyMetadata(f.keyMetadata())
-        .setSplitOffsets(f.splitOffsets())
-        .setColumnSizes(f.columnSizes())
-        .setValueCounts(f.valueCounts())
-        .setNullValueCounts(f.nullValueCounts())
-        .setNanValueCounts(f.nanValueCounts())
-        .setLowerBounds(toByteArrayMap(f.lowerBounds()))
-        .setUpperBounds(toByteArrayMap(f.upperBounds()))
-        .build();
+  public static SerializableDataFile from(
+      DataFile f, String partitionPath, boolean includeMetrics) {
+    SerializableDataFile.Builder builder =
+        SerializableDataFile.builder()
+            .setPath(f.location())
+            .setFileFormat(f.format().toString())
+            .setRecordCount(f.recordCount())
+            .setFileSizeInBytes(f.fileSizeInBytes())
+            .setPartitionPath(partitionPath)
+            .setPartitionSpecId(f.specId())
+            .setKeyMetadata(f.keyMetadata())
+            .setSplitOffsets(f.splitOffsets())
+            .setColumnSizes(f.columnSizes())
+            .setValueCounts(f.valueCounts())
+            .setNullValueCounts(f.nullValueCounts())
+            .setNanValueCounts(f.nanValueCounts())
+            .setDataSequenceNumber(f.dataSequenceNumber())
+            .setFileSequenceNumber(f.fileSequenceNumber())
+            .setFirstRowId(f.firstRowId());
+    if (includeMetrics) {
+      builder =
+          builder
+              .setLowerBounds(toByteArrayMap(f.lowerBounds()))
+              .setUpperBounds(toByteArrayMap(f.upperBounds()));
+    }
+    return builder.build();
   }
 
   /**
@@ -165,7 +192,7 @@ static SerializableDataFile from(DataFile f, String partitionPath) {
    * it from Beam-compatible types.
    */
   @SuppressWarnings("nullness")
-  DataFile createDataFile(Map<Integer, PartitionSpec> partitionSpecs) {
+  public DataFile createDataFile(Map<Integer, PartitionSpec> partitionSpecs) {
     PartitionSpec partitionSpec =
         checkStateNotNull(
             partitionSpecs.get(getPartitionSpecId()),
@@ -192,14 +219,14 @@ DataFile createDataFile(Map<Integer, PartitionSpec> partitionSpecs) {
         .withFileSizeInBytes(getFileSizeInBytes())
         .withMetrics(dataFileMetrics)
         .withSplitOffsets(getSplitOffsets())
+        .withFirstRowId(getFirstRowId())
         .build();
   }
 
   // ByteBuddyUtils has trouble converting Map value type ByteBuffer
   // to byte[] and back to ByteBuffer, so we perform these conversions manually
   // TODO(https://github.com/apache/beam/issues/32701)
-  private static @Nullable Map<Integer, byte[]> toByteArrayMap(
-      @Nullable Map<Integer, ByteBuffer> input) {
+  static @Nullable Map<Integer, byte[]> toByteArrayMap(@Nullable Map<Integer, ByteBuffer> input) {
     if (input == null) {
       return null;
     }
@@ -222,8 +249,7 @@ private static byte[] toByteArray(ByteBuffer buf) {
     return bytes;
   }
 
-  private static @Nullable Map<Integer, ByteBuffer> toByteBufferMap(
-      @Nullable Map<Integer, byte[]> input) {
+  static @Nullable Map<Integer, ByteBuffer> toByteBufferMap(@Nullable Map<Integer, byte[]> input) {
     if (input == null) {
       return null;
     }
@@ -256,10 +282,13 @@ && getPartitionSpecId() == that.getPartitionSpecId()
         && Objects.equals(getNullValueCounts(), that.getNullValueCounts())
         && Objects.equals(getNanValueCounts(), that.getNanValueCounts())
         && mapEquals(getLowerBounds(), that.getLowerBounds())
-        && mapEquals(getUpperBounds(), that.getUpperBounds());
+        && mapEquals(getUpperBounds(), that.getUpperBounds())
+        && Objects.equals(getDataSequenceNumber(), that.getDataSequenceNumber())
+        && Objects.equals(getFileSequenceNumber(), that.getFileSequenceNumber())
+        && Objects.equals(getFirstRowId(), that.getFirstRowId());
   }
 
-  private static boolean mapEquals(
+  static boolean mapEquals(
       @Nullable Map<Integer, byte[]> map1, @Nullable Map<Integer, byte[]> map2) {
     if (map1 == null && map2 == null) {
       return true;
@@ -297,13 +326,16 @@ public final int hashCode() {
             getColumnSizes(),
             getValueCounts(),
             getNullValueCounts(),
-            getNanValueCounts());
+            getNanValueCounts(),
+            getDataSequenceNumber(),
+            getFileSequenceNumber(),
+            getFirstRowId());
     hashCode = 31 * hashCode + computeMapByteHashCode(getLowerBounds());
     hashCode = 31 * hashCode + computeMapByteHashCode(getUpperBounds());
     return hashCode;
   }
 
-  private static int computeMapByteHashCode(@Nullable Map<Integer, byte[]> map) {
+  static int computeMapByteHashCode(@Nullable Map<Integer, byte[]> map) {
     if (map == null) {
       return 0;
     }
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/SerializableDeleteFile.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/SerializableDeleteFile.java
new file mode 100644
index 000000000000..e2aeeff6d7fd
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/SerializableDeleteFile.java
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg;
+
+import static org.apache.beam.sdk.io.iceberg.SerializableDataFile.computeMapByteHashCode;
+import static org.apache.beam.sdk.io.iceberg.SerializableDataFile.mapEquals;
+import static org.apache.beam.sdk.io.iceberg.SerializableDataFile.toByteArrayMap;
+import static org.apache.beam.sdk.io.iceberg.SerializableDataFile.toByteBufferMap;
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+
+import com.google.auto.value.AutoValue;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import org.apache.beam.sdk.schemas.AutoValueSchema;
+import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
+import org.apache.beam.sdk.schemas.annotations.SchemaFieldNumber;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.FileContent;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.FileMetadata;
+import org.apache.iceberg.Metrics;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.SortOrder;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+@DefaultSchema(AutoValueSchema.class)
+@AutoValue
+public abstract class SerializableDeleteFile {
+  public static SerializableDeleteFile.Builder builder() {
+    return new AutoValue_SerializableDeleteFile.Builder();
+  }
+
+  @SchemaFieldNumber("0")
+  public abstract FileContent getContentType();
+
+  @SchemaFieldNumber("1")
+  public abstract String getLocation();
+
+  @SchemaFieldNumber("2")
+  public abstract String getFileFormat();
+
+  @SchemaFieldNumber("3")
+  public abstract long getRecordCount();
+
+  @SchemaFieldNumber("4")
+  public abstract long getFileSizeInBytes();
+
+  @SchemaFieldNumber("5")
+  public abstract String getPartitionPath();
+
+  @SchemaFieldNumber("6")
+  public abstract int getPartitionSpecId();
+
+  @SchemaFieldNumber("7")
+  public abstract @Nullable Integer getSortOrderId();
+
+  @SchemaFieldNumber("8")
+  public abstract @Nullable List<Integer> getEqualityFieldIds();
+
+  @SchemaFieldNumber("9")
+  public abstract @Nullable ByteBuffer getKeyMetadata();
+
+  @SchemaFieldNumber("10")
+  public abstract @Nullable List<Long> getSplitOffsets();
+
+  @SchemaFieldNumber("11")
+  public abstract @Nullable Map<Integer, Long> getColumnSizes();
+
+  @SchemaFieldNumber("12")
+  public abstract @Nullable Map<Integer, Long> getValueCounts();
+
+  @SchemaFieldNumber("13")
+  public abstract @Nullable Map<Integer, Long> getNullValueCounts();
+
+  @SchemaFieldNumber("14")
+  public abstract @Nullable Map<Integer, Long> getNanValueCounts();
+
+  @SchemaFieldNumber("15")
+  public abstract @Nullable Map<Integer, byte[]> getLowerBounds();
+
+  @SchemaFieldNumber("16")
+  public abstract @Nullable Map<Integer, byte[]> getUpperBounds();
+
+  @SchemaFieldNumber("17")
+  public abstract @Nullable Long getContentOffset();
+
+  @SchemaFieldNumber("18")
+  public abstract @Nullable Long getContentSizeInBytes();
+
+  @SchemaFieldNumber("19")
+  public abstract @Nullable String getReferencedDataFile();
+
+  @SchemaFieldNumber("20")
+  public abstract @Nullable Long getDataSequenceNumber();
+
+  @SchemaFieldNumber("21")
+  public abstract @Nullable Long getFileSequenceNumber();
+
+  @AutoValue.Builder
+  abstract static class Builder {
+    abstract Builder setContentType(FileContent content);
+
+    abstract Builder setLocation(String path);
+
+    abstract Builder setFileFormat(String fileFormat);
+
+    abstract Builder setRecordCount(long recordCount);
+
+    abstract Builder setFileSizeInBytes(long fileSizeInBytes);
+
+    abstract Builder setPartitionPath(String partitionPath);
+
+    abstract Builder setPartitionSpecId(int partitionSpec);
+
+    abstract Builder setSortOrderId(int sortOrderId);
+
+    abstract Builder setEqualityFieldIds(List<Integer> equalityFieldIds);
+
+    abstract Builder setKeyMetadata(ByteBuffer keyMetadata);
+
+    abstract Builder setSplitOffsets(List<Long> splitOffsets);
+
+    abstract Builder setColumnSizes(Map<Integer, Long> columnSizes);
+
+    abstract Builder setValueCounts(Map<Integer, Long> valueCounts);
+
+    abstract Builder setNullValueCounts(Map<Integer, Long> nullValueCounts);
+
+    abstract Builder setNanValueCounts(Map<Integer, Long> nanValueCounts);
+
+    abstract Builder setLowerBounds(@Nullable Map<Integer, byte[]> lowerBounds);
+
+    abstract Builder setUpperBounds(@Nullable Map<Integer, byte[]> upperBounds);
+
+    abstract Builder setContentOffset(@Nullable Long offset);
+
+    abstract Builder setContentSizeInBytes(@Nullable Long sizeInBytes);
+
+    abstract Builder setReferencedDataFile(@Nullable String dataFile);
+
+    abstract Builder setDataSequenceNumber(@Nullable Long number);
+
+    abstract Builder setFileSequenceNumber(@Nullable Long number);
+
+    abstract SerializableDeleteFile build();
+  }
+
+  public static SerializableDeleteFile from(
+      DeleteFile deleteFile, String partitionPath, boolean includeMetrics) {
+
+    SerializableDeleteFile.Builder builder =
+        SerializableDeleteFile.builder()
+            .setLocation(deleteFile.location())
+            .setFileFormat(deleteFile.format().name())
+            .setFileSizeInBytes(deleteFile.fileSizeInBytes())
+            .setPartitionPath(partitionPath)
+            .setPartitionSpecId(deleteFile.specId())
+            .setRecordCount(deleteFile.recordCount())
+            .setColumnSizes(deleteFile.columnSizes())
+            .setValueCounts(deleteFile.valueCounts())
+            .setNullValueCounts(deleteFile.nullValueCounts())
+            .setNanValueCounts(deleteFile.nanValueCounts())
+            .setSplitOffsets(deleteFile.splitOffsets())
+            .setKeyMetadata(deleteFile.keyMetadata())
+            .setEqualityFieldIds(deleteFile.equalityFieldIds())
+            .setSortOrderId(deleteFile.sortOrderId())
+            .setContentOffset(deleteFile.contentOffset())
+            .setContentSizeInBytes(deleteFile.contentSizeInBytes())
+            .setReferencedDataFile(deleteFile.referencedDataFile())
+            .setContentType(deleteFile.content())
+            .setDataSequenceNumber(deleteFile.dataSequenceNumber())
+            .setFileSequenceNumber(deleteFile.fileSequenceNumber());
+
+    if (includeMetrics) {
+      builder =
+          builder
+              .setLowerBounds(toByteArrayMap(deleteFile.lowerBounds()))
+              .setUpperBounds(toByteArrayMap(deleteFile.upperBounds()));
+    }
+
+    return builder.build();
+  }
+
+  @SuppressWarnings("nullness")
+  public DeleteFile createDeleteFile(
+      Map<Integer, PartitionSpec> partitionSpecs, @Nullable Map<Integer, SortOrder> sortOrders) {
+    PartitionSpec partitionSpec =
+        checkStateNotNull(
+            partitionSpecs.get(getPartitionSpecId()),
+            "This DeleteFile was originally created with spec id '%s', "
+                + "but table only has spec ids: %s.",
+            getPartitionSpecId(),
+            partitionSpecs.keySet());
+
+    Metrics metrics =
+        new Metrics(
+            getRecordCount(),
+            getColumnSizes(),
+            getValueCounts(),
+            getNullValueCounts(),
+            getNanValueCounts(),
+            toByteBufferMap(getLowerBounds()),
+            toByteBufferMap(getUpperBounds()));
+
+    FileMetadata.Builder deleteFileBuilder =
+        FileMetadata.deleteFileBuilder(partitionSpec)
+            .withPath(getLocation())
+            .withFormat(getFileFormat())
+            .withFileSizeInBytes(getFileSizeInBytes())
+            .withRecordCount(getRecordCount())
+            .withMetrics(metrics)
+            .withSplitOffsets(getSplitOffsets())
+            .withEncryptionKeyMetadata(getKeyMetadata())
+            .withPartitionPath(getPartitionPath());
+
+    switch (getContentType()) {
+      case POSITION_DELETES:
+        deleteFileBuilder = deleteFileBuilder.ofPositionDeletes();
+        break;
+      case EQUALITY_DELETES:
+        int[] equalityFieldIds =
+            Objects.requireNonNullElse(getEqualityFieldIds(), new ArrayList<Integer>()).stream()
+                .mapToInt(Integer::intValue)
+                .toArray();
+        SortOrder sortOrder = SortOrder.unsorted();
+        if (sortOrders != null) {
+          sortOrder =
+              checkStateNotNull(
+                  sortOrders.get(getSortOrderId()),
+                  "This DeleteFile was originally created with sort order id '%s', "
+                      + "but table only has sort order ids: %s.",
+                  getSortOrderId(),
+                  sortOrders.keySet());
+        }
+        deleteFileBuilder =
+            deleteFileBuilder.ofEqualityDeletes(equalityFieldIds).withSortOrder(sortOrder);
+        break;
+      default:
+        throw new IllegalStateException(
+            "Unexpected content type for DeleteFile: " + getContentType());
+    }
+
+    // needed for puffin files
+    if (getFileFormat().equalsIgnoreCase(FileFormat.PUFFIN.name())) {
+      deleteFileBuilder =
+          deleteFileBuilder
+              .withContentOffset(checkStateNotNull(getContentOffset()))
+              .withContentSizeInBytes(checkStateNotNull(getContentSizeInBytes()))
+              .withReferencedDataFile(checkStateNotNull(getReferencedDataFile()));
+    }
+    return deleteFileBuilder.build();
+  }
+
+  @Override
+  public final boolean equals(@Nullable Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof SerializableDeleteFile)) {
+      return false;
+    }
+    SerializableDeleteFile that = (SerializableDeleteFile) o;
+    return getContentType().equals(that.getContentType())
+        && getLocation().equals(that.getLocation())
+        && getFileFormat().equals(that.getFileFormat())
+        && getRecordCount() == that.getRecordCount()
+        && getFileSizeInBytes() == that.getFileSizeInBytes()
+        && getPartitionPath().equals(that.getPartitionPath())
+        && getPartitionSpecId() == that.getPartitionSpecId()
+        && Objects.equals(getSortOrderId(), that.getSortOrderId())
+        && Objects.equals(getEqualityFieldIds(), that.getEqualityFieldIds())
+        && Objects.equals(getKeyMetadata(), that.getKeyMetadata())
+        && Objects.equals(getSplitOffsets(), that.getSplitOffsets())
+        && Objects.equals(getColumnSizes(), that.getColumnSizes())
+        && Objects.equals(getValueCounts(), that.getValueCounts())
+        && Objects.equals(getNullValueCounts(), that.getNullValueCounts())
+        && Objects.equals(getNanValueCounts(), that.getNanValueCounts())
+        && mapEquals(getLowerBounds(), that.getLowerBounds())
+        && mapEquals(getUpperBounds(), that.getUpperBounds())
+        && Objects.equals(getContentOffset(), that.getContentOffset())
+        && Objects.equals(getContentSizeInBytes(), that.getContentSizeInBytes())
+        && Objects.equals(getReferencedDataFile(), that.getReferencedDataFile())
+        && Objects.equals(getDataSequenceNumber(), that.getDataSequenceNumber())
+        && Objects.equals(getFileSequenceNumber(), that.getFileSequenceNumber());
+  }
+
+  @Override
+  public final int hashCode() {
+    int hashCode =
+        Objects.hash(
+            getContentType(),
+            getLocation(),
+            getFileFormat(),
+            getRecordCount(),
+            getFileSizeInBytes(),
+            getPartitionPath(),
+            getPartitionSpecId(),
+            getSortOrderId(),
+            getEqualityFieldIds(),
+            getKeyMetadata(),
+            getSplitOffsets(),
+            getColumnSizes(),
+            getValueCounts(),
+            getNullValueCounts(),
+            getNanValueCounts(),
+            getContentOffset(),
+            getContentSizeInBytes(),
+            getReferencedDataFile(),
+            getDataSequenceNumber(),
+            getFileSequenceNumber());
+    hashCode = 31 * hashCode + computeMapByteHashCode(getLowerBounds());
+    hashCode = 31 * hashCode + computeMapByteHashCode(getUpperBounds());
+    return hashCode;
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/TableCache.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/TableCache.java
index cb00d90f7fb3..d9d8802e2b49 100644
--- a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/TableCache.java
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/TableCache.java
@@ -33,7 +33,7 @@
 import org.apache.iceberg.catalog.TableIdentifier;
 
 /** Utility to fetch and cache Iceberg {@link Table}s. */
-class TableCache {
+public class TableCache {
   private static final Map<String, IcebergCatalogConfig> CATALOG_CACHE = new ConcurrentHashMap<>();
   private static final LoadingCache<String, Table> INTERNAL_CACHE =
       CacheBuilder.newBuilder()
@@ -55,7 +55,7 @@ public ListenableFuture<Table> reload(String unusedIdentifier, Table table) {
                 }
               });;
 
-  static Table get(String identifier) {
+  public static Table get(String identifier) {
     try {
       return INTERNAL_CACHE.get(identifier);
     } catch (ExecutionException e) {
@@ -65,12 +65,12 @@ static Table get(String identifier) {
   }
 
   /** Forces a table refresh and returns. */
-  static Table getRefreshed(String identifier) {
+  public static Table getRefreshed(String identifier) {
     INTERNAL_CACHE.refresh(identifier);
     return get(identifier);
   }
 
-  static void setup(IcebergScanConfig scanConfig) {
+  public static void setup(IcebergScanConfig scanConfig) {
     String tableIdentifier = scanConfig.getTableIdentifier();
     IcebergCatalogConfig catalogConfig = scanConfig.getCatalogConfig();
     if (CATALOG_CACHE.containsKey(tableIdentifier)) {
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ApplyWatermarkColumn.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ApplyWatermarkColumn.java
new file mode 100644
index 000000000000..9c1e627f64fd
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ApplyWatermarkColumn.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import static java.util.concurrent.TimeUnit.MICROSECONDS;
+
+import java.time.LocalDateTime;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.util.Preconditions;
+import org.apache.beam.sdk.values.Row;
+import org.apache.iceberg.util.DateTimeUtil;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+/**
+ * Re-stamps each output row using the configured {@code watermarkColumn}'s value, so the source's
+ * output watermark advances per record rather than per snapshot.
+ *
+ * <p>If the configured column's value on a record is null or missing, this DoFn is a pass-through,
+ * preserving the snapshot commit timestamp.
+ *
+ * <p>The {@link #getAllowedTimestampSkew()} return is intentionally generous — the user's watermark
+ * column may produce values well before the snapshot commit time (event-time data can lag
+ * wall-clock by hours or days). Restricting the skew here would force the source to drop legitimate
+ * output.
+ */
+class ApplyWatermarkColumn extends DoFn<Row, Row> {
+  private final String watermarkColumn;
+
+  ApplyWatermarkColumn(String watermarkColumn) {
+    this.watermarkColumn = watermarkColumn;
+  }
+
+  @ProcessElement
+  public void process(@Element Row row, OutputReceiver<Row> out) {
+    @Nullable
+    Instant instant =
+        getInstant(row.getValue(watermarkColumn), row.getSchema().getField(watermarkColumn));
+    if (instant != null) {
+      out.outputWithTimestamp(row, instant);
+    } else {
+      out.output(row);
+    }
+  }
+
+  private @Nullable Instant getInstant(@Nullable Object value, Schema.Field field) {
+    if (value == null) {
+      return null;
+    }
+    switch (field.getType().getTypeName()) {
+      case INT64:
+        return Instant.ofEpochMilli(MICROSECONDS.toMillis((Long) value));
+      case DATETIME:
+        return (Instant) value;
+      case LOGICAL_TYPE:
+        String logicalType =
+            Preconditions.checkStateNotNull(field.getType().getLogicalType()).getIdentifier();
+        if (logicalType.equals(SqlTypes.DATETIME.getIdentifier())) {
+          return Instant.ofEpochMilli(
+              MICROSECONDS.toMillis(DateTimeUtil.microsFromTimestamp((LocalDateTime) value)));
+        } else if (logicalType.equals(SqlTypes.TIMESTAMP.getIdentifier())
+            || logicalType.equals(org.apache.beam.sdk.schemas.logicaltypes.Timestamp.IDENTIFIER)) {
+          return Instant.ofEpochMilli(
+              MICROSECONDS.toMillis(DateTimeUtil.microsFromInstant((java.time.Instant) value)));
+        } else {
+          throw new UnsupportedOperationException("Unexpected logical type: " + logicalType);
+        }
+      default:
+        throw new UnsupportedOperationException("Unexpected Beam type: " + field.getType());
+    }
+  }
+
+  @Override
+  public Duration getAllowedTimestampSkew() {
+    // Generous skew to cover backfill of historical data and late-arriving CDC patterns.
+    return Duration.standardDays(365);
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/CdcReadUtils.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/CdcReadUtils.java
new file mode 100644
index 000000000000..b8ec49dfcc1c
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/CdcReadUtils.java
@@ -0,0 +1,694 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.beam.sdk.io.iceberg.IcebergScanConfig;
+import org.apache.beam.sdk.io.iceberg.ReadUtils;
+import org.apache.beam.sdk.io.iceberg.SerializableDeleteFile;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.FileContent;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.data.BaseDeleteLoader;
+import org.apache.iceberg.data.DeleteFilter;
+import org.apache.iceberg.data.DeleteLoader;
+import org.apache.iceberg.data.InternalRecordWrapper;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.deletes.PositionDeleteIndex;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.Expressions;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.io.SeekableInputStream;
+import org.apache.iceberg.parquet.ParquetMetricsRowGroupFilter;
+import org.apache.iceberg.types.TypeUtil;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.StructLikeSet;
+import org.apache.parquet.hadoop.ParquetFileReader;
+import org.apache.parquet.hadoop.metadata.BlockMetaData;
+import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.io.DelegatingSeekableInputStream;
+import org.apache.parquet.schema.MessageType;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Read-side helpers specific to the CDC source. Keeps {@link ReadUtils} focused on the
+ * general-purpose append-only read path; everything that takes a {@link SerializableChangelogTask},
+ * references {@link DeleteReader}, or implements the delete-pushdown row-group skipping lives here.
+ *
+ * <p>This class still delegates to {@link ReadUtils} for the low-level Parquet reader construction
+ * — the goal is decoupling, not duplication.
+ */
+public final class CdcReadUtils {
+  private static final Logger LOG = LoggerFactory.getLogger(CdcReadUtils.class);
+
+  /**
+   * Maximum size of an equality delete set to push down as a Parquet residual {@code IN}
+   * expression. Matches {@link ParquetMetricsRowGroupFilter#IN_PREDICATE_LIMIT}.
+   */
+  private static final int IN_PREDICATE_LIMIT = 200;
+
+  public static CloseableIterable<Record> createReader(
+      SerializableChangelogTask task,
+      Table table,
+      IcebergScanConfig scanConfig,
+      Schema outputSchema) {
+    return createReader(task, table, scanConfig, outputSchema, Expressions.alwaysTrue());
+  }
+
+  /**
+   * Same as {@link #createReader(SerializableChangelogTask, Table, IcebergScanConfig, Schema)} but
+   * ANDs {@code extraResidual} into the task's residual expression. The combined expression is
+   * passed to Iceberg's Parquet reader, which uses it as a row-group-level filter (skips row groups
+   * whose column statistics cannot match). The caller is still responsible for applying the
+   * residual at the row level.
+   *
+   * <p>This is used to push extra predicates (e.g. an equality-delete {@code IN} expression) down
+   * to the reader for cheap row-group skipping.
+   */
+  public static CloseableIterable<Record> createReader(
+      SerializableChangelogTask task,
+      Table table,
+      IcebergScanConfig scanConfig,
+      Schema outputSchema,
+      Expression extraResidual) {
+    return createReader(
+        task, table, scanConfig, outputSchema, extraResidual, task.getStart(), task.getLength());
+  }
+
+  /**
+   * Same as {@link #createReader(SerializableChangelogTask, Table, IcebergScanConfig, Schema,
+   * Expression)} but reads the byte range {@code [start, start + length)} of the DataFile.
+   * Iceberg's Parquet reader selects the row groups whose starting offset falls within this range,
+   * allowing us to prune row-groups by byte-range.
+   *
+   * <p>Callers are responsible for ensuring the requested range stays within the task's assigned
+   * range, to avoid reading a section that is meant for another worker.
+   */
+  public static CloseableIterable<Record> createReader(
+      SerializableChangelogTask task,
+      Table table,
+      IcebergScanConfig scanConfig,
+      Schema outputSchema,
+      Expression extraResidual,
+      long start,
+      long length) {
+    Expression baseResidual = task.getExpression(table.schema());
+    Expression combined =
+        extraResidual.op() == Expression.Operation.TRUE
+            ? baseResidual
+            : Expressions.and(baseResidual, extraResidual);
+    return ReadUtils.createReader(
+        table,
+        scanConfig,
+        outputSchema,
+        checkStateNotNull(table.specs().get(task.getSpecId())),
+        task.getDataFile().createDataFile(table.specs()),
+        task.getDataFile().getFileSequenceNumber(),
+        start,
+        length,
+        combined);
+  }
+
+  /** Returns a filter that skips records marked for deletion. */
+  public static DeleteFilter<Record> genericDeleteFilter(
+      Table table, Schema outputSchema, String dataFilePath, List<SerializableDeleteFile> deletes) {
+    return new GenericDeleteFilter(
+        table.io(),
+        dataFilePath,
+        table.schema(),
+        outputSchema,
+        deletes.stream()
+            .map(sdf -> sdf.createDeleteFile(table.specs(), table.sortOrders()))
+            .collect(Collectors.toList()));
+  }
+
+  /** Returns a delete reader that reuses delete structures already loaded by CDC planning. */
+  public static DeleteReader<Record> genericDeleteReader(
+      Table table,
+      Schema outputSchema,
+      String dataFilePath,
+      List<SerializableDeleteFile> deletes,
+      DeleteReader.PreloadedDeletes preloadedDeletes) {
+    return new GenericDeleteReader(
+        table.io(),
+        dataFilePath,
+        table.schema(),
+        outputSchema,
+        deletes.stream()
+            .map(sdf -> sdf.createDeleteFile(table.specs(), table.sortOrders()))
+            .collect(Collectors.toList()),
+        preloadedDeletes);
+  }
+
+  /**
+   * Opens the records that a CDC reader should process for a single {@link
+   * SerializableChangelogTask}, applying the appropriate delete-filter / delete-reader chain for
+   * the task's type:
+   *
+   * <ul>
+   *   <li>{@code ADDED_ROWS}: Collect and return the records that became live in this commit:
+   *       <ul>
+   *         <li>1. Iterate over records in the added DataFile
+   *         <li>2. Filter out records matched by any added deletes
+   *       </ul>
+   *   <li>{@code DELETED_ROWS}: Return records in the DataFile that are marked for deletion by new
+   *       DeleteFiles, making sure to first ignore records that have already been marked by
+   *       previous DeleteFiles:
+   *       <ul>
+   *         <li>1. Iterate over records in the referenced DataFile
+   *         <li>2. Filter out records matched from existing deletes.
+   *         <li>3. Filter out records NOT matched from added deletes
+   *       </ul>
+   *   <li>{@code DELETED_FILE} — every record in the DataFile that wasn't already deleted by {@code
+   *       existingDeletes}.
+   *       <ul>
+   *         <li>1. Iterate over records in the referenced DataFile
+   *         <li>2. Filter out records matched from existing deletes.
+   *       </ul>
+   * </ul>
+   *
+   * <p>Projection pushdown should not be used when reading bi-directional tasks because we need to
+   * compare all record columns to accurately identify updates. Otherwise, user-configured
+   * projection may drop a column that contains real updates. If this happens, the downstream
+   * resolver will mistakenly determine the (delete, insert) pair to be a duplicate.
+   */
+  public static CloseableIterable<Record> changelogRecordsForTask(
+      SerializableChangelogTask task,
+      Table table,
+      IcebergScanConfig scanConfig,
+      boolean useProjectedSchema) {
+    String dataFilePath = task.getDataFile().getPath();
+    Schema outputSchema = useProjectedSchema ? scanConfig.getRequiredSchema() : table.schema();
+    switch (task.getType()) {
+      case ADDED_ROWS:
+        DeleteFilter<Record> addedDeletesFilter =
+            genericDeleteFilter(table, outputSchema, dataFilePath, task.getAddedDeletes());
+        return addedDeletesFilter.filter(
+            createReader(task, table, scanConfig, addedDeletesFilter.requiredSchema()));
+      case DELETED_FILE:
+        DeleteFilter<Record> existingDeletesFilter =
+            genericDeleteFilter(table, outputSchema, dataFilePath, task.getExistingDeletes());
+        return existingDeletesFilter.filter(
+            createReader(task, table, scanConfig, existingDeletesFilter.requiredSchema()));
+      case DELETED_ROWS:
+        return deletedRowsForTask(task, table, scanConfig, outputSchema);
+      default:
+        throw new IllegalStateException("Unknown ChangelogScanTask type: " + task.getType());
+    }
+  }
+
+  /**
+   * Builds the reader chain for a {@code DELETED_ROWS} task with row-group pushdown when possible.
+   * This helps the reader skip entire row groups. For unskipped row groups, the reader should still
+   * apply per-record position + equality checks at the row level.
+   *
+   * <p>We use two pushdown strategies, depending on the type of {@link DeleteFile} in the task
+   * (Position Delete vs. Equality Delete). The two strategies can be combined if both {@link
+   * DeleteFile} types are present.
+   *
+   * <ol>
+   *   <li><b>Byte-range pushdown for Position Deletes:</b> pre-load the {@link
+   *       PositionDeleteIndex}, read the Parquet footer, and compute a single contiguous byte range
+   *       covering the row groups that contain at least one deleted position.
+   *   <li><b>IN-expression pushdown for Equality Deletes:</b> build an Iceberg {@code IN}
+   *       expression and pass it as a Parquet residual so the metrics row-group filter can skip
+   *       non-matching row groups.
+   * </ol>
+   *
+   * <p>If Position and Equality deletes are both present, both strategies are used to get one
+   * contiguous range. We read only that range, skipping leading and trailing row groups that
+   * contain no deletions.
+   *
+   * <p>Note: Equality pushdown is only used when all delete files share a single equality field.
+   * Multi-column equality requires an exploded OR expression that Parquet's metrics filter handles
+   * poorly.
+   */
+  private static CloseableIterable<Record> deletedRowsForTask(
+      SerializableChangelogTask task,
+      Table table,
+      IcebergScanConfig scanConfig,
+      Schema outputSchema) {
+    String dataFilePath = task.getDataFile().getPath();
+    List<SerializableDeleteFile> addedDeletes = task.getAddedDeletes();
+
+    // Split into position vs equality.
+    List<DeleteFile> posFiles = new ArrayList<>();
+    List<DeleteFile> eqFiles = new ArrayList<>();
+    for (SerializableDeleteFile sd : addedDeletes) {
+      DeleteFile df = sd.createDeleteFile(table.specs(), table.sortOrders());
+      if (df.content() == FileContent.POSITION_DELETES) {
+        posFiles.add(df);
+      } else if (df.content() == FileContent.EQUALITY_DELETES) {
+        eqFiles.add(df);
+      }
+    }
+
+    // Strategy 1: byte-range pushdown around row groups with position deletes (+ eq
+    // matches).
+    DeleteReader.PreloadedDeletes preloadedDeletes = DeleteReader.PreloadedDeletes.empty();
+    if (!posFiles.isEmpty()) {
+      @Nullable
+      PositionPushdownResult pushdown =
+          tryPositionByteRangePushdown(
+              task, table, scanConfig, outputSchema, posFiles, eqFiles, addedDeletes);
+      if (pushdown != null) {
+        if (pushdown.deletedRecords != null) {
+          return pushdown.deletedRecords;
+        }
+        preloadedDeletes = pushdown.preloadedDeletes;
+      }
+      // fall through to the default chain on failure
+    }
+
+    // Strategy 2: equality IN-expression pushdown applied as a reader residual.
+    // Only safe when no position deletes are present. when both exist, the
+    // byte-range path above already incorporates the eq filter
+    Expression eqResidual = Expressions.alwaysTrue();
+    if (posFiles.isEmpty() && !eqFiles.isEmpty()) {
+      EqualityPushdownResult eqPushdown = buildEqualityDeletePushdown(table, eqFiles);
+      eqResidual = eqPushdown.applicable ? eqPushdown.residual : Expressions.alwaysTrue();
+      preloadedDeletes = eqPushdown.preloadedDeletes(null);
+    }
+
+    DeleteFilter<Record> existingDeletesFilter =
+        genericDeleteFilter(table, outputSchema, dataFilePath, task.getExistingDeletes());
+    DeleteReader<Record> addedDeletesReader =
+        genericDeleteReader(table, outputSchema, dataFilePath, addedDeletes, preloadedDeletes);
+    Schema requiredSchema =
+        TypeUtil.join(existingDeletesFilter.requiredSchema(), addedDeletesReader.requiredSchema());
+
+    CloseableIterable<Record> records =
+        createReader(task, table, scanConfig, requiredSchema, eqResidual);
+    CloseableIterable<Record> liveRecords = existingDeletesFilter.filter(records);
+    return addedDeletesReader.read(liveRecords);
+  }
+
+  /**
+   * Path-A byte-range position-delete pushdown. Returns {@code null} if pushdown isn't applicable
+   * or any step fails, signaling to the caller to fall back. Returns an empty iterable if every row
+   * group is pruned.
+   */
+  private static @Nullable PositionPushdownResult tryPositionByteRangePushdown(
+      SerializableChangelogTask task,
+      Table table,
+      IcebergScanConfig scanConfig,
+      Schema outputSchema,
+      List<DeleteFile> posFiles,
+      List<DeleteFile> eqFiles,
+      List<SerializableDeleteFile> addedDeletes) {
+    String dataFilePath = task.getDataFile().getPath();
+
+    // 1. pre-load the position index for this data file.
+    PositionDeleteIndex posIndex;
+    try {
+      DeleteLoader loader = new BaseDeleteLoader(df -> table.io().newInputFile(df.location()));
+      posIndex = loader.loadPositionDeletes(posFiles, dataFilePath);
+    } catch (RuntimeException e) {
+      LOG.info(
+          "Failed to pre-load position deletes for {}; falling back to default reader chain.",
+          dataFilePath,
+          e);
+      return null;
+    }
+    if (posIndex.isEmpty()) {
+      // the pos-delete files don't actually target this data file (rare but possible
+      // after metadata operations). Fall back so the eq pushdown does not run here either.
+      return PositionPushdownResult.fallback(
+          DeleteReader.PreloadedDeletes.of(posIndex, Collections.emptyMap()));
+    }
+
+    // 2. optional equality filter (used to extend the byte range to include row groups
+    // whose stats match the equality IN values).
+    @Nullable ParquetMetricsRowGroupFilter eqFilter = null;
+    EqualityPushdownResult eqPushdown = EqualityPushdownResult.notApplicable();
+    if (!eqFiles.isEmpty()) {
+      eqPushdown = buildEqualityDeletePushdown(table, eqFiles);
+      if (!eqPushdown.applicable) {
+        // eq deletes are present but we can't safely identify which row groups they target.
+        // A narrowed position-only range could drop eq-deleted rows, so fall back to the
+        // default full-range reader. DeleteReader will still apply residual per record.
+        return PositionPushdownResult.fallback(eqPushdown.preloadedDeletes(posIndex));
+      }
+      eqFilter = new ParquetMetricsRowGroupFilter(table.schema(), eqPushdown.residual);
+    }
+    DeleteReader.PreloadedDeletes preloadedDeletes = eqPushdown.preloadedDeletes(posIndex);
+
+    // 3. read the footer and compute the task byte range covering every row group that
+    // contains a position delete or matches the eq filter.
+    long taskStart = task.getStart();
+    long taskEnd = taskStart + task.getLength();
+    long minStart = Long.MAX_VALUE;
+    long maxEnd = Long.MIN_VALUE;
+    long[] sortedDeletePositions = sortedDeletePositions(posIndex);
+
+    try {
+      InputFile inputFile = table.io().newInputFile(dataFilePath);
+      try (ParquetFileReader reader = ParquetFileReader.open(asParquetInputFile(inputFile))) {
+        ParquetMetadata footer = reader.getFooter();
+        MessageType parquetSchema = footer.getFileMetaData().getSchema();
+
+        // track cumulative row count ourselves. not all Parquet writers will include
+        // it in BlockMetaData.getRowIndexOffset
+        long cumulativeRows = 0;
+        for (BlockMetaData rowGroup : footer.getBlocks()) {
+          long rgStartPos = cumulativeRows;
+          long rgEndPos = cumulativeRows + rowGroup.getRowCount();
+          cumulativeRows = rgEndPos;
+
+          long rgByteStart = rowGroup.getStartingPos();
+          long rgByteEnd = rgByteStart + rowGroup.getCompressedSize();
+
+          // skip row groups outside this task's range.
+          if (rgByteEnd <= taskStart || rgByteStart >= taskEnd) {
+            continue;
+          }
+
+          // if row group has a position and/or an equality delete, include it in the global range
+          boolean rowGroupHasPosDelete = anyInRange(sortedDeletePositions, rgStartPos, rgEndPos);
+          boolean rowGroupMatchesEq =
+              eqFilter != null && eqFilter.shouldRead(parquetSchema, rowGroup);
+
+          if (rowGroupHasPosDelete || rowGroupMatchesEq) {
+            minStart = Math.min(minStart, rgByteStart);
+            maxEnd = Math.max(maxEnd, rgByteEnd);
+          }
+        }
+      }
+    } catch (IOException | RuntimeException e) {
+      LOG.info(
+          "Failed to read Parquet footer for {}; falling back to default reader chain.",
+          dataFilePath,
+          e);
+      return PositionPushdownResult.fallback(preloadedDeletes);
+    }
+
+    long readStart = Math.max(minStart, taskStart);
+    long readEnd = Math.min(maxEnd, taskEnd);
+    if (readStart >= readEnd) {
+      // deletes don't target the portion of the DataFile covered by this read task.
+      return PositionPushdownResult.of(CloseableIterable.empty(), preloadedDeletes);
+    }
+
+    // 4. Open the reader with the narrowed byte range. This range represents the union
+    // of "has position delete" + "matches eq stats"
+    DeleteFilter<Record> existingDeletesFilter =
+        genericDeleteFilter(table, outputSchema, dataFilePath, task.getExistingDeletes());
+    DeleteReader<Record> addedDeletesReader =
+        genericDeleteReader(table, outputSchema, dataFilePath, addedDeletes, preloadedDeletes);
+    Schema requiredSchema =
+        TypeUtil.join(existingDeletesFilter.requiredSchema(), addedDeletesReader.requiredSchema());
+    CloseableIterable<Record> records =
+        createReader(
+            task,
+            table,
+            scanConfig,
+            requiredSchema,
+            Expressions.alwaysTrue(),
+            readStart,
+            readEnd - readStart);
+    CloseableIterable<Record> liveRecords = existingDeletesFilter.filter(records);
+    return PositionPushdownResult.of(addedDeletesReader.read(liveRecords), preloadedDeletes);
+  }
+
+  /** Materializes a sorted long[] of the positions in {@code posIndex} for binary-search lookup. */
+  private static long[] sortedDeletePositions(PositionDeleteIndex posIndex) {
+    long cardinality = posIndex.cardinality();
+    if (cardinality > Integer.MAX_VALUE) {
+      throw new IllegalStateException(
+          "Position delete index cardinality exceeds Integer.MAX_VALUE: " + cardinality);
+    }
+    long[] arr = new long[(int) cardinality];
+    int[] idx = {0};
+    posIndex.forEach(p -> arr[idx[0]++] = p);
+    // forEach is ordered for the bitmap-backed implementation, but the interface doesn't
+    // promise it, so sort defensively. Cheap relative to the I/O it gates.
+    Arrays.sort(arr);
+    return arr;
+  }
+
+  /** Returns true iff {@code sortedDeletes} contains any value in {@code [start, end)}. */
+  private static boolean anyInRange(long[] sortedDeletes, long startInclusive, long endExclusive) {
+    if (sortedDeletes.length == 0) {
+      return false;
+    }
+    int i = Arrays.binarySearch(sortedDeletes, startInclusive);
+    if (i < 0) {
+      i = -i - 1; // insertion point
+    }
+    return i < sortedDeletes.length && sortedDeletes[i] < endExclusive;
+  }
+
+  /**
+   * Returns an {@code IN} expression suitable as a Parquet residual for the given equality-delete
+   * files, or {@link Expressions#alwaysTrue()} if pushdown is not applicable. See {@link
+   * #deletedRowsForTask} for the applicability rules.
+   */
+  private static EqualityPushdownResult buildEqualityDeletePushdown(
+      Table table, List<DeleteFile> eqFiles) {
+    // All eq delete files in this task must share a single equality field id.
+    Set<Integer> sharedIds = null;
+    for (DeleteFile df : eqFiles) {
+      Set<Integer> ids = new HashSet<>(df.equalityFieldIds());
+      if (sharedIds == null) {
+        sharedIds = ids;
+      } else if (!sharedIds.equals(ids)) {
+        return EqualityPushdownResult.notApplicable();
+      }
+    }
+    if (sharedIds == null || sharedIds.size() != 1) {
+      return EqualityPushdownResult.notApplicable();
+    }
+
+    int fieldId = Iterables.getOnlyElement(sharedIds);
+    Types.NestedField field = table.schema().findField(fieldId);
+    if (field == null) {
+      return EqualityPushdownResult.notApplicable();
+    }
+    Schema deleteSchema = TypeUtil.select(table.schema(), sharedIds);
+
+    DeleteLoader loader = new BaseDeleteLoader(df -> table.io().newInputFile(df.location()));
+    StructLikeSet set;
+    try {
+      set = loader.loadEqualityDeletes(eqFiles, deleteSchema);
+    } catch (RuntimeException e) {
+      LOG.info(
+          "Failed to pre-load equality deletes for pushdown; falling back to per-record check.", e);
+      return EqualityPushdownResult.notApplicable();
+    }
+
+    Map<Set<Integer>, StructLikeSet> preloadedSets = new HashMap<>();
+    preloadedSets.put(sharedIds, set);
+
+    if (set.size() > IN_PREDICATE_LIMIT) {
+      return EqualityPushdownResult.notApplicable(preloadedSets);
+    }
+    Class<?> javaClass = field.type().typeId().javaClass();
+    List<Object> values = new ArrayList<>(set.size());
+    for (StructLike s : set) {
+      @Nullable Object v = s.get(0, javaClass);
+      if (v == null) {
+        // Nulls don't match an IN-expression. pushing down would drop those deletions.
+        return EqualityPushdownResult.notApplicable(preloadedSets);
+      }
+      values.add(v);
+    }
+    if (values.isEmpty()) {
+      return EqualityPushdownResult.notApplicable(preloadedSets);
+    }
+    return EqualityPushdownResult.applicable(Expressions.in(field.name(), values), preloadedSets);
+  }
+
+  private static final class PositionPushdownResult {
+    private final @Nullable CloseableIterable<Record> deletedRecords;
+    private final DeleteReader.PreloadedDeletes preloadedDeletes;
+
+    private static PositionPushdownResult of(
+        CloseableIterable<Record> deletedRecords, DeleteReader.PreloadedDeletes preloadedDeletes) {
+      return new PositionPushdownResult(deletedRecords, preloadedDeletes);
+    }
+
+    private static PositionPushdownResult fallback(DeleteReader.PreloadedDeletes preloadedDeletes) {
+      return new PositionPushdownResult(null, preloadedDeletes);
+    }
+
+    private PositionPushdownResult(
+        @Nullable CloseableIterable<Record> records,
+        DeleteReader.PreloadedDeletes preloadedDeletes) {
+      this.deletedRecords = records;
+      this.preloadedDeletes = preloadedDeletes;
+    }
+  }
+
+  private static final class EqualityPushdownResult {
+    private static final EqualityPushdownResult NOT_APPLICABLE =
+        new EqualityPushdownResult(Expressions.alwaysTrue(), Collections.emptyMap(), false);
+
+    private final Expression residual;
+    private final Map<Set<Integer>, StructLikeSet> preloadedSets;
+    private final boolean applicable;
+
+    private static EqualityPushdownResult applicable(
+        Expression residual, Map<Set<Integer>, StructLikeSet> preloadedSets) {
+      return new EqualityPushdownResult(residual, preloadedSets, true);
+    }
+
+    private static EqualityPushdownResult notApplicable() {
+      return NOT_APPLICABLE;
+    }
+
+    private static EqualityPushdownResult notApplicable(
+        Map<Set<Integer>, StructLikeSet> preloadedSets) {
+      if (preloadedSets.isEmpty()) {
+        return NOT_APPLICABLE;
+      }
+      return new EqualityPushdownResult(Expressions.alwaysTrue(), preloadedSets, false);
+    }
+
+    private EqualityPushdownResult(
+        Expression residual, Map<Set<Integer>, StructLikeSet> preloadedSets, boolean applicable) {
+      this.residual = residual;
+      this.preloadedSets = preloadedSets;
+      this.applicable = applicable;
+    }
+
+    private DeleteReader.PreloadedDeletes preloadedDeletes(
+        @Nullable PositionDeleteIndex positionDeleteIndex) {
+      return DeleteReader.PreloadedDeletes.of(positionDeleteIndex, preloadedSets);
+    }
+  }
+
+  public static class GenericDeleteFilter extends DeleteFilter<Record> {
+    private final FileIO io;
+    private final InternalRecordWrapper asStructLike;
+
+    @SuppressWarnings("method.invocation")
+    public GenericDeleteFilter(
+        FileIO io,
+        String dataFilePath,
+        Schema tableSchema,
+        Schema requiredSchema,
+        List<DeleteFile> deleteFiles) {
+      super(dataFilePath, deleteFiles, tableSchema, requiredSchema);
+      this.io = io;
+      this.asStructLike = new InternalRecordWrapper(requiredSchema().asStruct());
+    }
+
+    @Override
+    protected StructLike asStructLike(Record record) {
+      return asStructLike.wrap(record);
+    }
+
+    @Override
+    protected InputFile getInputFile(String location) {
+      return io.newInputFile(location);
+    }
+  }
+
+  public static class GenericDeleteReader extends DeleteReader<Record> {
+    private final FileIO io;
+    private final InternalRecordWrapper asStructLike;
+
+    @SuppressWarnings("method.invocation")
+    public GenericDeleteReader(
+        FileIO io,
+        String dataFilePath,
+        Schema tableSchema,
+        Schema requiredSchema,
+        List<DeleteFile> deleteFiles,
+        DeleteReader.PreloadedDeletes preloadedDeletes) {
+      super(dataFilePath, deleteFiles, tableSchema, requiredSchema, true, preloadedDeletes);
+      this.io = io;
+      this.asStructLike = new InternalRecordWrapper(requiredSchema().asStruct());
+    }
+
+    @Override
+    protected StructLike asStructLike(Record record) {
+      return asStructLike.wrap(record);
+    }
+
+    @Override
+    protected InputFile getInputFile(String location) {
+      return io.newInputFile(location);
+    }
+  }
+
+  /**
+   * Adapter from Iceberg's {@link InputFile} to Parquet's {@link org.apache.parquet.io.InputFile},
+   * for callers that need to open a Parquet file directly (e.g. to read the footer for row-group
+   * pruning decisions). Iceberg has an equivalent internal {@code ParquetIO} but it's
+   * package-private.
+   */
+  public static org.apache.parquet.io.InputFile asParquetInputFile(InputFile icebergFile) {
+    return new IcebergParquetInputFile(icebergFile);
+  }
+
+  private static final class IcebergParquetInputFile implements org.apache.parquet.io.InputFile {
+    private final InputFile delegate;
+
+    IcebergParquetInputFile(InputFile delegate) {
+      this.delegate = delegate;
+    }
+
+    @Override
+    public long getLength() {
+      return delegate.getLength();
+    }
+
+    @Override
+    public org.apache.parquet.io.SeekableInputStream newStream() {
+      return new IcebergParquetSeekableStream(delegate.newStream());
+    }
+  }
+
+  private static final class IcebergParquetSeekableStream extends DelegatingSeekableInputStream {
+    private final SeekableInputStream delegate;
+
+    IcebergParquetSeekableStream(SeekableInputStream delegate) {
+      super(delegate);
+      this.delegate = delegate;
+    }
+
+    @Override
+    public long getPos() throws java.io.IOException {
+      return delegate.getPos();
+    }
+
+    @Override
+    public void seek(long newPos) throws java.io.IOException {
+      delegate.seek(newPos);
+    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/CdcResolver.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/CdcResolver.java
new file mode 100644
index 000000000000..0323c621e169
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/CdcResolver.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiConsumer;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.ValueKind;
+import org.apache.iceberg.data.Record;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+/**
+ * Helper class to reconcile CDC rows. Used by {@link ResolveChanges} (with Beam {@link Row}s) and
+ * {@link LocalResolveDoFn} (with Iceberg {@link Record}s).
+ *
+ * <p>We determine the output ValueKind as follows:
+ *
+ * <ul>
+ *   <li>(delete, insert) pairs become {@code UPDATE_BEFORE} + {@code UPDATE_AFTER}
+ *   <li>singletons remain {@code DELETE} or {@code INSERT}
+ *   <li>matching delete+insert with identical non-PK fields are considered Copy-on-Write side
+ *       effects and are dropped
+ * </ul>
+ *
+ * <p>General implementation:
+ *
+ * <ol>
+ *   <li>Hash-index inserts by their non-PK field hash, for efficient Copy-on-Write detection.
+ *   <li>Skip matching (delete, insert) pairs with identical non-PK columns. A CoW operation deletes
+ *       and rewrites the whole file (minus some records that are actually marked for deletion).
+ *       Unchanged records are no-ops and should not be mistaken for updates.
+ *   <li>Walk the remaining deletes and inserts, emitting matched pairs as {@link
+ *       ValueKind#UPDATE_BEFORE} / {@link ValueKind#UPDATE_AFTER}.
+ *   <li>Emit any unmatched extras as {@link ValueKind#DELETE} / {@link ValueKind#INSERT}.
+ * </ol>
+ */
+abstract class CdcResolver<T> {
+  /** Hashes the non-PK fields of an element. Used as the index for O(n+m) CoW deduplication. */
+  protected abstract int nonPkHash(T element);
+
+  /**
+   * Returns true if two records (already known to share a PK) share identical non-PK fields. Called
+   * only when the two elements collide in the {@link #nonPkHash} index, so the implementation can
+   * stay simple (linear scan of non-PK fields).
+   */
+  protected abstract boolean nonPkEquals(T delete, T insert);
+
+  /**
+   * Resolves a Primary Key group of deletes and inserts. Caller provides {@code emit} which decides
+   * how to materialize each output.
+   *
+   * <p>Both input lists are inspected in their given order.
+   */
+  final void resolve(List<T> deletes, List<T> inserts, BiConsumer<ValueKind, T> emit) {
+    boolean hasDeletes = !deletes.isEmpty();
+    boolean hasInserts = !inserts.isEmpty();
+
+    if (hasInserts && hasDeletes) {
+      // First, check if any (delete, insert) pairs are duplicates that should not be
+      // included in the output
+      boolean[] dupDeletes = new boolean[deletes.size()];
+      boolean[] dupInserts = new boolean[inserts.size()];
+
+      // Map hash to insert-indices
+      Map<Integer, List<Integer>> insertHashToIdx = new HashMap<>();
+      for (int insertIdx = 0; insertIdx < inserts.size(); insertIdx++) {
+        int insertHash = nonPkHash(inserts.get(insertIdx));
+        insertHashToIdx.computeIfAbsent(insertHash, k -> new ArrayList<>()).add(insertIdx);
+      }
+      for (int deleteIdx = 0; deleteIdx < deletes.size(); deleteIdx++) {
+        int deleteHash = nonPkHash(deletes.get(deleteIdx));
+        @Nullable List<Integer> candidates = insertHashToIdx.get(deleteHash);
+        if (candidates != null) {
+          // check if candidates are just duplicates (e.g. from CoW)
+          for (int idx = 0; idx < candidates.size(); idx++) {
+            int insertIdx = candidates.get(idx);
+            if (!dupInserts[insertIdx]
+                && nonPkEquals(deletes.get(deleteIdx), inserts.get(insertIdx))) {
+              // this (delete, insert) pair is a duplicate --> should be skipped
+              dupDeletes[deleteIdx] = true;
+              dupInserts[insertIdx] = true;
+              candidates.remove(idx);
+              break;
+            }
+          }
+        }
+      }
+
+      // Emit matched pairs as UPDATE_BEFORE / UPDATE_AFTER.
+      int d = 0;
+      int i = 0;
+      while (d < deletes.size() && i < inserts.size()) {
+        // skip duplicates
+        while (d < deletes.size() && dupDeletes[d]) {
+          d++;
+        }
+        while (i < inserts.size() && dupInserts[i]) {
+          i++;
+        }
+
+        if (d < deletes.size() && i < inserts.size()) {
+          emit.accept(ValueKind.UPDATE_BEFORE, deletes.get(d));
+          emit.accept(ValueKind.UPDATE_AFTER, inserts.get(i));
+          d++;
+          i++;
+        }
+      }
+
+      // emit unmatched extras as DELETE / INSERT.
+      while (d < deletes.size()) {
+        if (!dupDeletes[d]) {
+          emit.accept(ValueKind.DELETE, deletes.get(d));
+        }
+        d++;
+      }
+      while (i < inserts.size()) {
+        if (!dupInserts[i]) {
+          emit.accept(ValueKind.INSERT, inserts.get(i));
+        }
+        i++;
+      }
+    } else if (hasInserts) {
+      for (T r : inserts) {
+        emit.accept(ValueKind.INSERT, r);
+      }
+    } else if (hasDeletes) {
+      for (T r : deletes) {
+        emit.accept(ValueKind.DELETE, r);
+      }
+    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ChangelogDescriptor.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ChangelogDescriptor.java
new file mode 100644
index 000000000000..103d38be4012
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ChangelogDescriptor.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import com.google.auto.value.AutoValue;
+import org.apache.beam.sdk.schemas.AutoValueSchema;
+import org.apache.beam.sdk.schemas.NoSuchSchemaException;
+import org.apache.beam.sdk.schemas.SchemaCoder;
+import org.apache.beam.sdk.schemas.SchemaRegistry;
+import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
+import org.apache.beam.sdk.schemas.annotations.SchemaFieldNumber;
+import org.apache.beam.sdk.values.Row;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+/** Descriptor for a set of {@link SerializableChangelogTask}s. */
+@DefaultSchema(AutoValueSchema.class)
+@AutoValue
+public abstract class ChangelogDescriptor {
+  public static Builder builder() {
+    return new AutoValue_ChangelogDescriptor.Builder();
+  }
+
+  public static SchemaCoder<ChangelogDescriptor> coder() {
+    try {
+      return SchemaRegistry.createDefault().getSchemaCoder(ChangelogDescriptor.class);
+    } catch (NoSuchSchemaException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @SchemaFieldNumber("0")
+  abstract String getTableIdentifierString();
+
+  @SchemaFieldNumber("1")
+  public abstract @Nullable Row getOverlapLower();
+
+  @SchemaFieldNumber("2")
+  public abstract @Nullable Row getOverlapUpper();
+
+  @AutoValue.Builder
+  public abstract static class Builder {
+    abstract Builder setTableIdentifierString(String table);
+
+    abstract Builder setOverlapLower(@Nullable Row overlapLower);
+
+    abstract Builder setOverlapUpper(@Nullable Row overlapUpper);
+
+    abstract ChangelogDescriptor build();
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ChangelogScanner.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ChangelogScanner.java
new file mode 100644
index 000000000000..8e6dd3dc5015
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ChangelogScanner.java
@@ -0,0 +1,1047 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import static java.lang.String.format;
+import static org.apache.beam.sdk.io.iceberg.cdc.SerializableChangelogTask.Type.ADDED_ROWS;
+import static org.apache.beam.sdk.io.iceberg.cdc.SerializableChangelogTask.getAddedDeleteFiles;
+import static org.apache.beam.sdk.io.iceberg.cdc.SerializableChangelogTask.getDataFile;
+import static org.apache.beam.sdk.io.iceberg.cdc.SerializableChangelogTask.getLength;
+import static org.apache.beam.sdk.io.iceberg.cdc.SerializableChangelogTask.getPartition;
+import static org.apache.beam.sdk.io.iceberg.cdc.SerializableChangelogTask.getSpec;
+import static org.apache.beam.sdk.io.iceberg.cdc.SerializableChangelogTask.getType;
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ListCoder;
+import org.apache.beam.sdk.io.iceberg.IcebergScanConfig;
+import org.apache.beam.sdk.io.iceberg.IcebergUtils;
+import org.apache.beam.sdk.io.iceberg.TableCache;
+import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Metrics;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Splitter;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables;
+import org.apache.iceberg.AddedRowsScanTask;
+import org.apache.iceberg.ChangelogScanTask;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DataOperations;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.DeletedDataFileScanTask;
+import org.apache.iceberg.DeletedRowsScanTask;
+import org.apache.iceberg.IncrementalChangelogScan;
+import org.apache.iceberg.MetricsConfig;
+import org.apache.iceberg.MetricsModes;
+import org.apache.iceberg.PartitionField;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.ScanTaskGroup;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SerializableTable;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.types.Conversions;
+import org.apache.iceberg.types.Type;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.PropertyUtil;
+import org.apache.iceberg.util.StructLikeMap;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * DoFn that takes incoming Iceberg snapshots and scans them for changelogs using Iceberg's {@link
+ * IncrementalChangelogScan}. Changelog tasks are organized into batches and routed to different
+ * downstream PCollections based on complexity.
+ *
+ * <p>The Iceberg scan generates batches of changelog scan tasks, each of size {@link
+ * TableProperties#SPLIT_SIZE}. This can be configured with the table's <a
+ * href="https://iceberg.apache.org/docs/latest/configuration/#read-properties">read.split.target-size
+ * property</a>.
+ *
+ * <p>This DoFn analyzes the nature of changes within the snapshot, partition, and file level, then
+ * routes the changes accordingly:
+ *
+ * <ol>
+ *   <li><b>Unidirectional (Fast Path):</b> If an isolated level contains only inserts OR only
+ *       deletes, its tasks are emitted to {@link #UNIDIRECTIONAL_TASKS}. These records <b>bypass
+ *       the CoGBK shuffle</b> and are output immediately.
+ *   <li><b>Small Bidirectional (Medium Path):</b> If an isolated level contains a mix of inserts
+ *       and deletes, and is small enough, its tasks are emitted to {@link
+ *       #SMALL_BIDIRECTIONAL_TASKS}. These records are resolved in memory to identify potential
+ *       updates. Task groups are considered small enough if the estimated overlap region is within
+ *       {@link TableProperties#SPLIT_SIZE}.
+ *   <li><b>Bidirectional (Slow Path):</b> If an isolated level contains a mix of inserts and
+ *       deletes, and is too large, its tasks are emitted to {@link #LARGE_BIDIRECTIONAL_TASKS}.
+ *       These records are grouped by Primary Key and processed by {@link ResolveChanges} to
+ *       identify potential updates.
+ * </ol>
+ *
+ * <h2>Optimizing by Shuffling Less Data</h2>
+ *
+ * <p>We take a three-layered approach to identify data that can bypass the expensive downstream
+ * CoGroupByKey shuffle:
+ *
+ * <h3>Snapshots</h3>
+ *
+ * We start by analyzing the nature of changes at the snapshot level. If a snapshot's operation is
+ * not of type {@link DataOperations#OVERWRITE}, then it's a uni-directional change.
+ *
+ * <h3>Pinned Partitions</h3>
+ *
+ * <p>If the table's partition fields are derived entirely from Primary Key fields, we know that a
+ * record will not migrate between partitions. This narrows down the isolated level and allows us to
+ * only check for bi-directional changes <b>within a partition</b>. Doing this will allow partitions
+ * with uni-directional changes to bypass the expensive CoGBK shuffle. It also gives partitions with
+ * small bi-directional changes a chance to be processed in-memory instead of needing to pass
+ * through the CoGBK.
+ *
+ * <h3>Optimization for Individual Files</h3>
+ *
+ * When we have narrowed down our group of tasks with bi-directional changes, we start analyzing the
+ * metadata of their underlying files. We compare the upper and lower bounds of Partition Keys
+ * relevant to each file, and consider any overlaps as potentially containing an update. If a given
+ * task's Primary Key bounds has no overlap with any opposing task's Primary Key bounds, then we
+ * know it's not possible to create an (insert, delete) pair with it. Such a task can safely bypass
+ * the shuffle.
+ *
+ * <p>Note: "opposing" refers to a change that happens in the opposite direction (e.g. insert is
+ * "positive", delete is "negative")
+ *
+ * <p>For example, say we have a group of tasks:
+ *
+ * <ol>
+ *   <li>Task A (adds rows): bounds [3, 8]
+ *   <li>Task B (adds rows): bounds [2, 4]
+ *   <li>Task C (deletes rows): bounds [1, 5]
+ *   <li>Task D (adds rows): bounds [6, 12]
+ * </ol>
+ *
+ * <p>Tasks A and B add rows, and overlap with Task C which deletes row. We need to resolve the rows
+ * in these 3 tasks because they might all contain (insert, delete) pairs that lead to an update.
+ *
+ * <p>Task D however, does not overlap with any delete rows. It will never produce an (insert,
+ * delete) pair, so we can directly emit it without resolving its output rows.
+ */
+class ChangelogScanner
+    extends DoFn<Long, KV<ChangelogDescriptor, List<SerializableChangelogTask>>> {
+  private static final Logger LOG = LoggerFactory.getLogger(ChangelogScanner.class);
+  private static final Counter totalChangelogScanTasks =
+      Metrics.counter(ChangelogScanner.class, "totalChangelogScanTasks");
+  private static final Counter numAddedRowsScanTasks =
+      Metrics.counter(ChangelogScanner.class, "numAddedRowsScanTasks");
+  private static final Counter numDeletedRowsScanTasks =
+      Metrics.counter(ChangelogScanner.class, "numDeletedRowsScanTasks");
+  private static final Counter numDeletedDataFileScanTasks =
+      Metrics.counter(ChangelogScanner.class, "numDeletedDataFileScanTasks");
+  private static final Counter numUniDirectionalTasks =
+      Metrics.counter(ChangelogScanner.class, "numUniDirectionalTasks");
+  private static final Counter numLargeBiDirectionalTasks =
+      Metrics.counter(ChangelogScanner.class, "numLargeBiDirectionalTasks");
+  private static final Counter numSmallBiDirectionalTasks =
+      Metrics.counter(ChangelogScanner.class, "numSmallBiDirectionalTasks");
+  static final TupleTag<KV<ChangelogDescriptor, List<SerializableChangelogTask>>>
+      UNIDIRECTIONAL_TASKS = new TupleTag<>();
+  static final TupleTag<KV<ChangelogDescriptor, List<SerializableChangelogTask>>>
+      SMALL_BIDIRECTIONAL_TASKS = new TupleTag<>();
+  static final TupleTag<KV<ChangelogDescriptor, List<SerializableChangelogTask>>>
+      LARGE_BIDIRECTIONAL_TASKS = new TupleTag<>();
+
+  static final KvCoder<ChangelogDescriptor, List<SerializableChangelogTask>> OUTPUT_CODER =
+      KvCoder.of(ChangelogDescriptor.coder(), ListCoder.of(SerializableChangelogTask.coder()));
+  private final IcebergScanConfig scanConfig;
+  private @MonotonicNonNull Table table;
+  private @MonotonicNonNull Snapshot snapshot;
+  private @MonotonicNonNull TaskBatcher uniBatcher;
+  private boolean canDoPartitionOptimization = false;
+  // for metrics
+  private int numAddedRowsTasks = 0;
+  private int numDeletedRowsTasks = 0;
+  private int numDeletedFileTasks = 0;
+  private int numUniDirTasks = 0;
+  private int numSmallBiDirTasks = 0;
+  private int numLargeBiDirTasks = 0;
+  private int numUniDirSplits = 0;
+  private int numSmallBiDirSplits = 0;
+  private int numLargeBiDirSplits = 0;
+
+  ChangelogScanner(IcebergScanConfig scanConfig) {
+    this.scanConfig = scanConfig;
+  }
+
+  @Setup
+  public void setup() {
+    TableCache.setup(scanConfig);
+  }
+
+  @ProcessElement
+  public void process(@Element Long snapshotId, MultiOutputReceiver out) throws IOException {
+    resetLocalMetrics();
+    long millis = System.currentTimeMillis();
+    System.out.println("xxx started processing at: " + Instant.ofEpochMilli(millis));
+    // not using getRefreshed because upstream Watch should have already refreshed the
+    // table to a state where this snapshot exists
+    this.table = SerializableTable.copyOf(TableCache.get(scanConfig.getTableIdentifier()));
+    this.snapshot =
+        checkStateNotNull(
+            table.snapshot(snapshotId), "Could not retrieve table snapshot: %s", snapshotId);
+
+    @Nullable Long fromSnapshotId = snapshot.parentId();
+    @Nullable Expression filter = scanConfig.getFilter();
+
+    IncrementalChangelogScan scan =
+        table
+            .newIncrementalChangelogScan()
+            .toSnapshot(snapshotId)
+            .project(scanConfig.getProjectedSchema());
+    if (fromSnapshotId != null) {
+      scan = scan.fromSnapshotExclusive(fromSnapshotId);
+    }
+    if (filter != null) {
+      scan = scan.filter(filter);
+    }
+
+    // configure the scan to store upper/lower bound metrics only
+    // if it's available for primary key fields
+    scan = maybeIncludeColumnStats(scan, table);
+
+    createAndOutputReadTasks(scan, out);
+    long endMillis = System.currentTimeMillis();
+    System.out.printf(
+        "xxx finished processing (%s) after: %s seconds%n",
+        Instant.ofEpochMilli(endMillis), Duration.millis(endMillis - millis).getStandardSeconds());
+  }
+
+  private IncrementalChangelogScan maybeIncludeColumnStats(
+      IncrementalChangelogScan scan, Table table) {
+    boolean metricsAvailable = true;
+    MetricsConfig metricsConfig = MetricsConfig.forTable(table);
+    Collection<String> pkFields = table.schema().identifierFieldNames();
+    for (String field : pkFields) {
+      MetricsModes.MetricsMode mode = metricsConfig.columnMode(field);
+      if (!(mode instanceof MetricsModes.Full) && !(mode instanceof MetricsModes.Truncate)) {
+        metricsAvailable = false;
+        break;
+      }
+    }
+    if (metricsAvailable) {
+      scan = scan.includeColumnStats(pkFields);
+    }
+    return scan;
+  }
+
+  @SuppressWarnings("Slf4jFormatShouldBeConst")
+  private void createAndOutputReadTasks(
+      IncrementalChangelogScan scan, MultiOutputReceiver multiOutputReceiver) throws IOException {
+    Snapshot snapshot = checkStateNotNull(this.snapshot);
+    Table table = checkStateNotNull(this.table);
+
+    // ******** Partition Optimization ********
+    // Determine which partition specs "pin" records to their partition
+    // (i.e. partition fields are sourced entirely from a record's PK).
+    // If records are pinned, we can optimize by only shuffling bi-directional changes
+    // *within* a partition, since no cross-partition changes will occur.
+    Set<Integer> pinnedSpecs =
+        table.specs().entrySet().stream()
+            .filter(e -> doesSpecPinRecordsToPartition(e.getValue()))
+            .map(Map.Entry::getKey)
+            .collect(Collectors.toSet());
+    boolean tableHasPinnedSpecs = !pinnedSpecs.isEmpty();
+
+    // The optimization cannot apply if any file in this snapshot uses an unpinned spec
+    boolean snapshotHasUnpinnedSpec = false;
+    Set<Integer> specsInSnapshot = new HashSet<>();
+    ChangeTypesInPartition changeTypesInPartition = new ChangeTypesInPartition();
+
+    // Buffer tasks from OVERWRITE snapshots, because they are potentially bi-directional
+    OverwriteTasks overwriteTasks = new OverwriteTasks();
+
+    // batcher for uni-directional tasks, which can be directly emitted when splitSize is reached
+    uniBatcher =
+        new TaskBatcher(
+            scanConfig.getTableIdentifier(),
+            snapshot.timestampMillis(),
+            splitSize(),
+            multiOutputReceiver.get(UNIDIRECTIONAL_TASKS));
+
+    // === collect partition metadata and route/buffer tasks ===
+    LOG.info(
+        "Planning to scan snapshot {} (seq: {})", snapshot.snapshotId(), snapshot.sequenceNumber());
+    try (CloseableIterable<ScanTaskGroup<ChangelogScanTask>> scanTaskGroups = scan.planTasks()) {
+      for (ScanTaskGroup<ChangelogScanTask> scanTaskGroup : scanTaskGroups) {
+        for (ChangelogScanTask task : scanTaskGroup.tasks()) {
+          SerializableChangelogTask.Type type = getType(task);
+          StructLike partition = getPartition(task);
+          PartitionSpec spec = getSpec(task);
+          gatherTaskTypeMetrics(type);
+
+          // Collect partition metadata for pinned-spec optimization
+          if (tableHasPinnedSpecs) {
+            if (!pinnedSpecs.contains(spec.specId())) {
+              snapshotHasUnpinnedSpec = true;
+            } else {
+              changeTypesInPartition.add(spec, partition, type);
+              specsInSnapshot.add(spec.specId());
+            }
+          }
+
+          // non-overwrite tasks are always unidirectional (the scan planner
+          // skips REPLACE ops).
+          if (!DataOperations.OVERWRITE.equals(snapshot.operation())) {
+            uniBatcher.add(makeTask(task, table), getLength(task));
+            numUniDirTasks++;
+            continue;
+          }
+
+          // Overwrite tasks need further analysis — buffer for post-loop processing
+          overwriteTasks.add(spec, partition, task);
+
+          // TODO: remove debug printing
+          System.out.printf("\tBuffering overwrite task with partition '%s':%n", partition);
+          System.out.printf(
+              "\t\t(%s) DF: %s%n",
+              task.getClass().getSimpleName(), name(getDataFile(task).location()));
+          for (DeleteFile delf : getAddedDeleteFiles(task)) {
+            System.out.println("\t\t\tAdded DelF: " + name(delf.location()));
+          }
+        }
+      }
+    }
+    // a snapshot using multiple specs is also not safe for the partition optimization,
+    // unless we account for the spec ID in the file-to-file comparison, which complicates things
+    canDoPartitionOptimization =
+        tableHasPinnedSpecs && !snapshotHasUnpinnedSpec && specsInSnapshot.size() <= 1;
+
+    // === analyze buffered overwrite tasks using the partition metadata ===
+    processOverwriteTasks(overwriteTasks, changeTypesInPartition, multiOutputReceiver);
+    uniBatcher.flush();
+
+    int totalTasks = updateTaskCounters();
+
+    LOG.info(scanResultMessage(totalTasks));
+  }
+
+  private void processOverwriteTasks(
+      OverwriteTasks overwriteTasks,
+      ChangeTypesInPartition changeTypesInPartition,
+      MultiOutputReceiver multiOutputReceiver) {
+    if (overwriteTasks.isEmpty()) {
+      return;
+    }
+    Snapshot snapshot = checkStateNotNull(this.snapshot);
+    Table table = checkStateNotNull(this.table);
+
+    TaskBatcher uniBatcher = checkStateNotNull(this.uniBatcher);
+    TaskBatcher largeBiBatcher =
+        new TaskBatcher(
+            scanConfig.getTableIdentifier(),
+            snapshot.timestampMillis(),
+            splitSize(),
+            multiOutputReceiver.get(LARGE_BIDIRECTIONAL_TASKS));
+
+    if (!canDoPartitionOptimization) {
+      // Records are not pinned to partition (or no pinned specs at all).
+      // We need to compare underlying files across the whole snapshot.
+      List<ChangelogScanTask> tasks = overwriteTasks.allTasks();
+
+      AnalysisResult result =
+          analyzeFiles(tasks, scanConfig.recordIdSchema(), scanConfig.recordIdComparator());
+
+      uniBatcher.add(result.unidirectional, table);
+      numUniDirTasks += result.unidirectional.size();
+
+      routeBidirectional(result, largeBiBatcher, multiOutputReceiver);
+
+      // TODO: remove debug printing
+      System.out.println("\t\tUnpinned spec:");
+      for (ChangelogScanTask task : tasks) {
+        System.out.printf(
+            "\t\t\t(%s) DF: %s%n",
+            task.getClass().getSimpleName(), name(getDataFile(task).location()));
+        for (DeleteFile delf : getAddedDeleteFiles(task)) {
+          System.out.println("\t\t\tAdded DelF: " + name(delf.location()));
+        }
+      }
+    } else {
+      // Records are pinned to partition.
+      // Narrow down by comparing the files within each partition independently.
+      for (Map.Entry<Integer, StructLikeMap<List<ChangelogScanTask>>> tasksPerSpec :
+          overwriteTasks.tasks.entrySet()) {
+        int specId = tasksPerSpec.getKey();
+        for (Map.Entry<StructLike, List<ChangelogScanTask>> tasksInPartition :
+            tasksPerSpec.getValue().entrySet()) {
+          StructLike partition = tasksInPartition.getKey();
+          @Nullable
+          Set<SerializableChangelogTask.Type> partitionChangeTypes =
+              changeTypesInPartition.typesFor(specId, partition);
+
+          // If this partition has only uni-directional changes, output to UNIDIRECTIONAL and bypass
+          // file analysis
+          if (partitionChangeTypes != null && !containsBiDirectionalChanges(partitionChangeTypes)) {
+            uniBatcher.add(tasksInPartition.getValue(), table);
+            numUniDirTasks += tasksInPartition.getValue().size();
+            continue;
+          }
+
+          // Partition has bi-directional changes — analyze file-level overlaps
+          AnalysisResult result =
+              analyzeFiles(
+                  tasksInPartition.getValue(),
+                  scanConfig.recordIdSchema(),
+                  scanConfig.recordIdComparator());
+
+          uniBatcher.add(result.unidirectional, table);
+          routeBidirectional(result, largeBiBatcher, multiOutputReceiver);
+
+          // metrics
+          numUniDirTasks += result.unidirectional.size();
+          numLargeBiDirTasks += result.bidirectional.size();
+
+          // TODO: remove debug printing
+          System.out.printf("\t\tPinned Partition '%s' bidirectional:%n", partition);
+          for (ChangelogScanTask task : tasksInPartition.getValue()) {
+            System.out.printf(
+                "\t\t\t(%s) DF: %s%n",
+                task.getClass().getSimpleName(), name(getDataFile(task).location()));
+            for (DeleteFile delf : getAddedDeleteFiles(task)) {
+              System.out.println("\t\t\tAdded DelF: " + name(delf.location()));
+            }
+          }
+        }
+      }
+    }
+    largeBiBatcher.flush();
+    numLargeBiDirSplits = largeBiBatcher.totalSplits;
+  }
+
+  /**
+   * Helper class for storing + processing {@link ChangelogScanTask}s organized by partition and
+   * spec ID.
+   */
+  static class OverwriteTasks {
+    Map<Integer, StructLikeMap<List<ChangelogScanTask>>> tasks = new HashMap<>();
+
+    void add(PartitionSpec spec, StructLike partition, ChangelogScanTask task) {
+      tasks
+          .computeIfAbsent(spec.specId(), id -> StructLikeMap.create(spec.partitionType()))
+          .computeIfAbsent(partition, p -> new ArrayList<>())
+          .add(task);
+    }
+
+    boolean isEmpty() {
+      return tasks.isEmpty();
+    }
+
+    List<ChangelogScanTask> allTasks() {
+      return tasks.values().stream()
+          .flatMap(taskMap -> taskMap.values().stream())
+          .flatMap(List::stream)
+          .collect(Collectors.toList());
+    }
+  }
+
+  /**
+   * Helper class for identifying types of {@link ChangelogScanTask} per spec ID and partition. This
+   * is used to determine whether this snapshot is eligible for partition optimization.
+   */
+  static class ChangeTypesInPartition {
+    Map<Integer, StructLikeMap<Set<SerializableChangelogTask.Type>>> changeTypesPerPartition =
+        new HashMap<>();
+
+    void add(PartitionSpec spec, StructLike partition, SerializableChangelogTask.Type type) {
+      changeTypesPerPartition
+          .computeIfAbsent(spec.specId(), id -> StructLikeMap.create(spec.partitionType()))
+          .computeIfAbsent(partition, p -> new HashSet<>())
+          .add(type);
+    }
+
+    @Nullable
+    Set<SerializableChangelogTask.Type> typesFor(Integer specId, StructLike partition) {
+      if (!changeTypesPerPartition.containsKey(specId)) {
+        return null;
+      }
+      return checkStateNotNull(changeTypesPerPartition.get(specId)).get(partition);
+    }
+  }
+
+  /** Checks if a set of change types include both inserts and deletes. */
+  private static boolean containsBiDirectionalChanges(
+      Set<SerializableChangelogTask.Type> changeTypes) {
+    return changeTypes.contains(ADDED_ROWS) && changeTypes.size() > 1;
+  }
+
+  /** Helper class for analyzing overlaps between opposing tasks. */
+  static class AnalysisResult {
+    final List<ChangelogScanTask> unidirectional;
+    final List<ChangelogScanTask> bidirectional;
+    final @Nullable StructLike overlapLower;
+    final @Nullable StructLike overlapUpper;
+
+    AnalysisResult(
+        List<ChangelogScanTask> unidirectional,
+        List<ChangelogScanTask> bidirectional,
+        @Nullable StructLike overlapLower,
+        @Nullable StructLike overlapUpper) {
+      this.unidirectional = unidirectional;
+      this.bidirectional = bidirectional;
+      this.overlapLower = overlapLower;
+      this.overlapUpper = overlapUpper;
+    }
+
+    @Nullable
+    Row overlapLowerRow(org.apache.beam.sdk.schemas.Schema idSchema) {
+      return this.overlapLower == null
+          ? null
+          : IcebergUtils.icebergRecordToBeamRow(idSchema, (Record) this.overlapLower);
+    }
+
+    @Nullable
+    Row overlapUpperRow(org.apache.beam.sdk.schemas.Schema idSchema) {
+      return this.overlapUpper == null
+          ? null
+          : IcebergUtils.icebergRecordToBeamRow(idSchema, (Record) this.overlapUpper);
+    }
+  }
+
+  /**
+   * Analyzes all tasks in the given list by comparing the bounds of each task's underlying files.
+   * If a task's partition key bounds overlap with an opposing task's partition key bounds, they are
+   * both considered bi-directional changes. If a task's bounds do not overlap with any opposing
+   * task's bounds, it is considered a uni-directional change.
+   */
+  static AnalysisResult analyzeFiles(
+      List<ChangelogScanTask> tasks, Schema recIdSchema, Comparator<StructLike> idComp) {
+    List<TaskAndBounds> insertTasks = new ArrayList<>();
+    List<TaskAndBounds> deleteTasks = new ArrayList<>();
+
+    try {
+      for (ChangelogScanTask task : tasks) {
+        if (task instanceof AddedRowsScanTask) {
+          insertTasks.add(TaskAndBounds.of(task, recIdSchema, idComp));
+        } else if (task instanceof DeletedDataFileScanTask || task instanceof DeletedRowsScanTask) {
+          deleteTasks.add(TaskAndBounds.of(task, recIdSchema, idComp));
+        } else {
+          throw new IllegalStateException("Unknown ChangelogScanTask type: " + task.getClass());
+        }
+      }
+    } catch (TaskAndBounds.NoBoundMetricsException e) {
+      // if metrics are not fully available, we need to play it safe and shuffle all the tasks.
+      return new AnalysisResult(Collections.emptyList(), tasks, null, null);
+    }
+
+    if (!insertTasks.isEmpty() && !deleteTasks.isEmpty()) {
+      Comparator<TaskAndBounds> lowerBoundComp = (t1, t2) -> idComp.compare(t1.lowerId, t2.lowerId);
+      Comparator<TaskAndBounds> upperBoundComp = (t1, t2) -> idComp.compare(t1.upperId, t2.upperId);
+
+      insertTasks.sort(lowerBoundComp);
+      deleteTasks.sort(lowerBoundComp);
+
+      TaskAndBounds firstInsert = insertTasks.get(0);
+      TaskAndBounds firstDelete = deleteTasks.get(0);
+      TaskAndBounds lastInsert = insertTasks.stream().max(upperBoundComp).orElseThrow();
+      TaskAndBounds lastDelete = deleteTasks.stream().max(upperBoundComp).orElseThrow();
+
+      boolean overlapExists =
+          idComp.compare(firstDelete.lowerId, lastInsert.upperId) < 0
+              && idComp.compare(firstInsert.lowerId, lastDelete.upperId) < 0;
+
+      if (overlapExists) {
+        // Iterate through inserts and only check relevant deletes
+        for (TaskAndBounds insert : insertTasks) {
+          // First check if the insert task overlaps with the global delete window.
+          // If not, we can just skip it.
+          if (idComp.compare(insert.upperId, firstDelete.lowerId) < 0
+              || idComp.compare(insert.lowerId, lastDelete.upperId) > 0) {
+            continue;
+          }
+
+          for (TaskAndBounds del : deleteTasks) {
+            // if the delete task's lower bound is already past the insert task's upper bound,
+            // no subsequent delete can overlap this insert (because we sorted above).
+            // We can break inner loop.
+            if (idComp.compare(del.lowerId, insert.upperId) > 0) {
+              break;
+            }
+
+            del.checkOverlapWith(insert, idComp);
+          }
+        }
+      }
+    }
+
+    // collect results and return.
+    // overlapping tasks are bidirectional.
+    // otherwise they are unidirectional.
+    List<ChangelogScanTask> unidirectional = new ArrayList<>();
+    List<ChangelogScanTask> bidirectional = new ArrayList<>();
+
+    for (TaskAndBounds taskAndBounds : Iterables.concat(deleteTasks, insertTasks)) {
+      String msg = "";
+      if (taskAndBounds.overlaps) {
+        msg +=
+            format(
+                "overlapping task: (%s, %s)",
+                taskAndBounds.task.commitSnapshotId(),
+                taskAndBounds.task.getClass().getSimpleName());
+        bidirectional.add(taskAndBounds.task);
+      } else {
+        unidirectional.add(taskAndBounds.task);
+        msg +=
+            format(
+                "NON-overlapping task: (%s, %s)",
+                taskAndBounds.task.commitSnapshotId(),
+                taskAndBounds.task.getClass().getSimpleName());
+      }
+      msg += "\n\tDF: " + name(getDataFile(taskAndBounds.task).location());
+      msg += "\n\t\tlower: " + taskAndBounds.lowerId + ", upper: " + taskAndBounds.upperId;
+      if (!getAddedDeleteFiles(taskAndBounds.task).isEmpty()) {
+        for (DeleteFile df : getAddedDeleteFiles(taskAndBounds.task)) {
+          msg += "\n\tAdded DelF: " + name(df.location());
+          msg += "\n\t\tlower: " + taskAndBounds.lowerId + ", upper: " + taskAndBounds.upperId;
+        }
+      }
+      System.out.println(msg);
+    }
+
+    StructLike overlapLower = null;
+    StructLike overlapUpper = null;
+    if (!bidirectional.isEmpty()) {
+      StructLike globalInsertLower =
+          insertTasks.stream()
+              .filter(t -> t.overlaps)
+              .map(t -> t.lowerId)
+              .min(idComp)
+              .orElseThrow();
+      StructLike globalInsertUpper =
+          insertTasks.stream()
+              .filter(t -> t.overlaps)
+              .map(t -> t.upperId)
+              .max(idComp)
+              .orElseThrow();
+      StructLike globalDeleteLower =
+          deleteTasks.stream()
+              .filter(t -> t.overlaps)
+              .map(t -> t.lowerId)
+              .min(idComp)
+              .orElseThrow();
+      StructLike globalDeleteUpper =
+          deleteTasks.stream()
+              .filter(t -> t.overlaps)
+              .map(t -> t.upperId)
+              .max(idComp)
+              .orElseThrow();
+
+      overlapLower =
+          idComp.compare(globalInsertLower, globalDeleteLower) > 0
+              ? globalInsertLower
+              : globalDeleteLower;
+      overlapUpper =
+          idComp.compare(globalInsertUpper, globalDeleteUpper) < 0
+              ? globalInsertUpper
+              : globalDeleteUpper;
+    }
+
+    return new AnalysisResult(unidirectional, bidirectional, overlapLower, overlapUpper);
+  }
+
+  /**
+   * Routes bi-directional tasks from an {@link AnalysisResult} to either the in-memory local
+   * resolve path (when the estimated overlap region fits in one split) or the CoGroupByKey shuffle
+   * path otherwise.
+   *
+   * <p>For LOCAL routing, all bi-directional tasks for this snapshot/partition group are emitted as
+   * a batch so that the downstream {@link LocalResolveDoFn} can resolve them together in-memory. //
+   * * The total byte size may exceed {@code splitSize}, but the in-memory // * footprint is bounded
+   * by the overlap byte estimate (the local resolver still does per-record PK // * routing to avoid
+   * buffering records outside the overlap range).
+   *
+   * <p>Returns the number of tasks routed to LOCAL so the caller can update counters.
+   */
+  private void routeBidirectional(
+      AnalysisResult result, TaskBatcher largeBiBatcher, MultiOutputReceiver multiOutputReceiver) {
+    Snapshot snapshot = checkStateNotNull(this.snapshot);
+
+    if (result.bidirectional.isEmpty()) {
+      return;
+    }
+
+    long totalBytes =
+        result.bidirectional.stream().mapToLong(SerializableChangelogTask::getLength).sum();
+
+    @Nullable Row overlapLowerRow = result.overlapLowerRow(scanConfig.rowIdBeamSchema());
+    @Nullable Row overlapUpperRow = result.overlapUpperRow(scanConfig.rowIdBeamSchema());
+    ChangelogDescriptor descriptor =
+        ChangelogDescriptor.builder()
+            .setTableIdentifierString(scanConfig.getTableIdentifier())
+            .setOverlapLower(overlapLowerRow)
+            .setOverlapUpper(overlapUpperRow)
+            .build();
+
+    List<SerializableChangelogTask> serializedTasks =
+        result.bidirectional.stream()
+            .map(t -> makeTask(t, checkStateNotNull(table)))
+            .collect(Collectors.toList());
+
+    // If the batch is small enough, we can route to LOCAL (in-memory) resolver
+    if (totalBytes <= splitSize()) {
+      Instant ts = Instant.ofEpochMilli(snapshot.timestampMillis());
+      multiOutputReceiver
+          .get(SMALL_BIDIRECTIONAL_TASKS)
+          .outputWithTimestamp(KV.of(descriptor, serializedTasks), ts);
+      System.out.printf(
+          "xxx LOCAL_RESOLVE (snap=%d, %d tasks, ~%d total bytes)%n",
+          snapshot.snapshotId(), result.bidirectional.size(), totalBytes);
+      numSmallBiDirTasks += result.bidirectional.size();
+      numSmallBiDirSplits++;
+      return;
+    }
+
+    // If the batch is too big, we need to route to the CoGBK for distributed resolution
+    for (SerializableChangelogTask t : serializedTasks) {
+      largeBiBatcher.add(descriptor, t, t.getLength());
+    }
+    System.out.printf(
+        "xxx BIDIRECTIONAL (snap=%d, %d tasks, ~%d total bytes)%n",
+        snapshot.snapshotId(), result.bidirectional.size(), totalBytes);
+  }
+
+  private static SerializableChangelogTask makeTask(ChangelogScanTask task, Table table) {
+    return SerializableChangelogTask.from(task, table.specs());
+  }
+
+  /**
+   * Wraps the {@link ChangelogScanTask}, and stores its lower and upper Primary Keys. Identifies
+   * overlaps with other tasks by comparing lower and upper keys using Iceberg libraries.
+   */
+  static class TaskAndBounds {
+    ChangelogScanTask task;
+    StructLike lowerId;
+    StructLike upperId;
+    boolean overlaps = false;
+
+    private TaskAndBounds(ChangelogScanTask task, StructLike lowerId, StructLike upperId) {
+      this.task = task;
+      this.lowerId = lowerId;
+      this.upperId = upperId;
+    }
+
+    static TaskAndBounds of(
+        ChangelogScanTask task, Schema recIdSchema, Comparator<StructLike> idComp)
+        throws NoBoundMetricsException {
+      @MonotonicNonNull GenericRecord lowerId = null;
+      @MonotonicNonNull GenericRecord upperId = null;
+
+      if (task instanceof AddedRowsScanTask || task instanceof DeletedDataFileScanTask) {
+        // just store the bounds of the DataFile
+        DataFile df = getDataFile(task);
+        @Nullable Map<Integer, ByteBuffer> lowerBounds = df.lowerBounds();
+        @Nullable Map<Integer, ByteBuffer> upperBounds = df.upperBounds();
+        if (lowerBounds == null || upperBounds == null) {
+          throw new NoBoundMetricsException(
+              format(
+                  "Upper and/or lower bounds are missing for %s with DataFile: %s.",
+                  task.getClass().getSimpleName(), df.location()));
+        }
+
+        lowerId = createRecId(recIdSchema, lowerBounds);
+        upperId = createRecId(recIdSchema, upperBounds);
+      } else if (task instanceof DeletedRowsScanTask) {
+        // iterate over all added DeleteFiles and keep track of only the
+        // minimum and maximum bounds over the list
+        for (DeleteFile deleteFile : ((DeletedRowsScanTask) task).addedDeletes()) {
+          @Nullable Map<Integer, ByteBuffer> lowerDelBounds = deleteFile.lowerBounds();
+          @Nullable Map<Integer, ByteBuffer> upperDelBounds = deleteFile.upperBounds();
+          if (lowerDelBounds == null || upperDelBounds == null) {
+            throw new NoBoundMetricsException(
+                format(
+                    "Upper and/or lower bounds are missing for %s with "
+                        + "DataFile '%s' and DeleteFile '%s'",
+                    task.getClass().getSimpleName(),
+                    getDataFile(task).location(),
+                    deleteFile.location()));
+          }
+
+          GenericRecord delFileLower = createRecId(recIdSchema, lowerDelBounds);
+          GenericRecord delFileUpper = createRecId(recIdSchema, upperDelBounds);
+
+          if (lowerId == null || idComp.compare(delFileLower, lowerId) < 0) {
+            lowerId = delFileLower;
+          }
+          if (upperId == null || idComp.compare(delFileUpper, upperId) > 0) {
+            upperId = delFileUpper;
+          }
+        }
+      } else {
+        throw new UnsupportedOperationException(
+            "Unsupported task type: " + task.getClass().getSimpleName());
+      }
+
+      if (lowerId == null || upperId == null) {
+        throw new NoBoundMetricsException(
+            format(
+                "Could not compute min and/or max bounds for %s with DataFile: %s",
+                task.getClass().getSimpleName(), getDataFile(task).location()));
+      }
+      return new TaskAndBounds(task, lowerId, upperId);
+    }
+
+    /**
+     * Compares itself with another task. If the bounds overlap, sets {@link #overlaps} to true for
+     * both tasks.
+     */
+    private void checkOverlapWith(TaskAndBounds other, Comparator<StructLike> idComp) {
+      if (overlaps && other.overlaps) {
+        return;
+      }
+
+      int left = idComp.compare(lowerId, other.upperId);
+      int right = idComp.compare(other.lowerId, upperId);
+
+      if (left <= 0 && right <= 0) {
+        overlaps = true;
+        other.overlaps = true;
+      }
+    }
+
+    private static GenericRecord createRecId(Schema recIdSchema, Map<Integer, ByteBuffer> bounds)
+        throws NoBoundMetricsException {
+      GenericRecord recId = GenericRecord.create(recIdSchema);
+
+      for (Types.NestedField field : recIdSchema.columns()) {
+        int fieldId = field.fieldId();
+        Type type = field.type();
+        String name = field.name();
+        @Nullable ByteBuffer value = bounds.get(fieldId);
+        if (value == null) {
+          throw new NoBoundMetricsException("Could not fetch metric value for column: " + name);
+        }
+        Object data = checkStateNotNull(Conversions.fromByteBuffer(type, value));
+        recId.setField(name, data);
+      }
+      return recId;
+    }
+
+    static class NoBoundMetricsException extends Exception {
+      public NoBoundMetricsException(String msg) {
+        super(msg);
+      }
+    }
+  }
+
+  /** Checks if all partition fields are derived from record identifier fields. */
+  private static boolean doesSpecPinRecordsToPartition(PartitionSpec spec) {
+    Set<Integer> identifierFieldsIds = spec.schema().identifierFieldIds();
+    if (spec.isUnpartitioned() || identifierFieldsIds.isEmpty()) {
+      return false;
+    }
+
+    for (PartitionField field : spec.fields()) {
+      if (!identifierFieldsIds.contains(field.sourceId())) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  /**
+   * Helper class to batch tasks going to the same tagged PCollection.
+   *
+   * <p>Used to create batches of uni-directional tasks to send to {@link #UNIDIRECTIONAL_TASKS}
+   * tag.
+   *
+   * <p>Also used to create batches of large bi-directional tasks to send to {@link
+   * #LARGE_BIDIRECTIONAL_TASKS} tag.
+   *
+   * <p>A batch is emitted once it reaches {@link #splitSize()}.
+   *
+   * <p>Note: This is not used by small bi-directional tasks. Instead, they are emitted immediately
+   * to {@link #SMALL_BIDIRECTIONAL_TASKS}.
+   */
+  static class TaskBatcher {
+    Map<ChangelogDescriptor, List<SerializableChangelogTask>> tasks = new HashMap<>();
+    long byteSize = 0L;
+    final long maxSplitSize;
+    final String tableIdentifier;
+    final Instant timestamp;
+    final OutputReceiver<KV<ChangelogDescriptor, List<SerializableChangelogTask>>> output;
+    int totalSplits = 0;
+
+    TaskBatcher(
+        String tableIdentifier,
+        Long timestampMillis,
+        long maxSplitSize,
+        OutputReceiver<KV<ChangelogDescriptor, List<SerializableChangelogTask>>> output) {
+      this.tableIdentifier = tableIdentifier;
+      this.timestamp = Instant.ofEpochMilli(timestampMillis);
+      this.maxSplitSize = maxSplitSize;
+      this.output = output;
+    }
+
+    boolean canTake(long sizeBytes) {
+      return byteSize + sizeBytes <= maxSplitSize;
+    }
+
+    void add(List<ChangelogScanTask> tasks, Table table) {
+      tasks.forEach(t -> add(makeTask(t, table), getLength(t)));
+    }
+
+    void add(SerializableChangelogTask task, long sizeBytes) {
+      add(
+          ChangelogDescriptor.builder().setTableIdentifierString(tableIdentifier).build(),
+          task,
+          sizeBytes);
+    }
+
+    void add(ChangelogDescriptor descriptor, SerializableChangelogTask task, long sizeBytes) {
+      if (!canTake(sizeBytes)) {
+        flush();
+      }
+      byteSize += sizeBytes;
+      tasks.computeIfAbsent(descriptor, d -> new ArrayList<>()).add(task);
+    }
+
+    void flush() {
+      if (tasks.isEmpty()) {
+        return;
+      }
+
+      for (Map.Entry<ChangelogDescriptor, List<SerializableChangelogTask>> entry :
+          tasks.entrySet()) {
+        ChangelogDescriptor descriptor = entry.getKey();
+        List<SerializableChangelogTask> taskList = entry.getValue();
+        output.outputWithTimestamp(KV.of(descriptor, taskList), timestamp);
+      }
+
+      byteSize = 0;
+      tasks = new HashMap<>();
+      totalSplits++;
+    }
+  }
+
+  /**
+   * Fetch the desired split size for downstream read DoFn. We do our best to put tasks into groups
+   * of that size. This allows the user to control load per worker by tuning <a
+   * href="https://iceberg.apache.org/docs/latest/configuration/#read-properties">`read.split.target-size`</a>
+   */
+  long splitSize() {
+    return PropertyUtil.propertyAsLong(
+        checkStateNotNull(table).properties(),
+        TableProperties.SPLIT_SIZE,
+        TableProperties.SPLIT_SIZE_DEFAULT);
+  }
+
+  static String name(String path) {
+    return Iterables.getLast(Splitter.on("-").split(path));
+  }
+
+  private void resetLocalMetrics() {
+    numAddedRowsTasks = 0;
+    numDeletedRowsTasks = 0;
+    numDeletedFileTasks = 0;
+    numUniDirTasks = 0;
+    numLargeBiDirTasks = 0;
+    numSmallBiDirTasks = 0;
+    numUniDirSplits = 0;
+    numSmallBiDirSplits = 0;
+    numLargeBiDirSplits = 0;
+  }
+
+  private void gatherTaskTypeMetrics(SerializableChangelogTask.Type type) {
+    switch (type) {
+      case ADDED_ROWS:
+        numAddedRowsTasks++;
+        break;
+      case DELETED_ROWS:
+        numDeletedRowsTasks++;
+        break;
+      case DELETED_FILE:
+        numDeletedFileTasks++;
+        break;
+    }
+  }
+
+  private int updateTaskCounters() {
+    int totalTasks = numAddedRowsTasks + numDeletedRowsTasks + numDeletedFileTasks;
+    numUniDirSplits = checkStateNotNull(uniBatcher).totalSplits;
+    totalChangelogScanTasks.inc(totalTasks);
+    numAddedRowsScanTasks.inc(numAddedRowsTasks);
+    numDeletedRowsScanTasks.inc(numDeletedRowsTasks);
+    numDeletedDataFileScanTasks.inc(numDeletedFileTasks);
+    numUniDirectionalTasks.inc(numUniDirTasks);
+    numSmallBiDirectionalTasks.inc(numSmallBiDirTasks);
+    numLargeBiDirectionalTasks.inc(numLargeBiDirTasks - numSmallBiDirTasks);
+
+    return totalTasks;
+  }
+
+  private String scanResultMessage(int totalTasks) {
+    StringBuilder message = new StringBuilder();
+    message.append(
+        format(
+            "Snapshot %s (seq: %s) produced %s changelog tasks.",
+            checkStateNotNull(snapshot).snapshotId(),
+            checkStateNotNull(snapshot).sequenceNumber(),
+            totalTasks));
+    if (totalTasks > 0) {
+      message.append("Emitted:");
+      if (numUniDirTasks > 0) {
+        message.append(
+            format(
+                "\n\t%s splits containing %s uni-directional tasks",
+                numUniDirSplits, numUniDirTasks));
+      }
+      if (numSmallBiDirTasks > 0) {
+        message.append(
+            format(
+                "\n\t%s splits containing %s small bi-directional tasks (for local resolution)",
+                numSmallBiDirSplits, numSmallBiDirTasks));
+      }
+      if (numLargeBiDirTasks > 0) {
+        message.append(
+            format(
+                "\n\t%s splits containing %s large bi-directional tasks (to be shuffled)",
+                numLargeBiDirSplits, numLargeBiDirTasks));
+      }
+    }
+    return message.toString();
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/DeleteReader.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/DeleteReader.java
new file mode 100644
index 000000000000..e1b9a9c98583
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/DeleteReader.java
@@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Predicate;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Multimap;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Multimaps;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Sets;
+import org.apache.iceberg.Accessor;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.MetadataColumns;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.data.BaseDeleteLoader;
+import org.apache.iceberg.data.DeleteLoader;
+import org.apache.iceberg.deletes.PositionDeleteIndex;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.types.TypeUtil;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.StructLikeSet;
+import org.apache.iceberg.util.StructProjection;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Reads a {@link org.apache.iceberg.DataFile} and returns records marked deleted by the given
+ * {@link DeleteFile}s.
+ *
+ * <p>This is mostly a copy of {@link org.apache.iceberg.data.DeleteFilter}, but flipping the logic
+ * to output deleted records instead of filtering them out.
+ */
+public abstract class DeleteReader<T> {
+  private static final Logger LOG = LoggerFactory.getLogger(DeleteReader.class);
+
+  private final String filePath;
+  private final List<DeleteFile> posDeletes;
+  private final List<DeleteFile> eqDeletes;
+  private final PreloadedDeletes preloadedDeletes;
+  private final Schema requiredSchema;
+  private final Accessor<StructLike> posAccessor;
+  private volatile @Nullable DeleteLoader deleteLoader = null;
+  private @Nullable PositionDeleteIndex deleteRowPositions = null;
+  private @Nullable List<Predicate<T>> isInDeleteSets = null;
+
+  protected DeleteReader(
+      String filePath,
+      List<DeleteFile> deletes,
+      Schema tableSchema,
+      Schema expectedSchema,
+      boolean needRowPosCol,
+      PreloadedDeletes preloadedDeletes) {
+    this.filePath = filePath;
+    this.preloadedDeletes = preloadedDeletes;
+
+    ImmutableList.Builder<DeleteFile> posDeleteBuilder = ImmutableList.builder();
+    ImmutableList.Builder<DeleteFile> eqDeleteBuilder = ImmutableList.builder();
+    for (DeleteFile delete : deletes) {
+      switch (delete.content()) {
+        case POSITION_DELETES:
+          LOG.debug("Adding position delete file {} to reader", delete.location());
+          posDeleteBuilder.add(delete);
+          break;
+        case EQUALITY_DELETES:
+          LOG.debug("Adding equality delete file {} to reader", delete.location());
+          eqDeleteBuilder.add(delete);
+          break;
+        default:
+          throw new UnsupportedOperationException(
+              "Unknown delete file content: " + delete.content());
+      }
+    }
+
+    this.posDeletes = posDeleteBuilder.build();
+    this.eqDeletes = eqDeleteBuilder.build();
+    this.requiredSchema =
+        fileProjection(tableSchema, expectedSchema, posDeletes, eqDeletes, needRowPosCol);
+    this.posAccessor = requiredSchema.accessorForField(MetadataColumns.ROW_POSITION.fieldId());
+  }
+
+  public Schema requiredSchema() {
+    return requiredSchema;
+  }
+
+  protected abstract StructLike asStructLike(T record);
+
+  protected abstract InputFile getInputFile(String location);
+
+  protected InputFile loadInputFile(DeleteFile deleteFile) {
+    return getInputFile(deleteFile.location());
+  }
+
+  protected long pos(T record) {
+    return (Long) posAccessor.get(asStructLike(record));
+  }
+
+  protected DeleteLoader newDeleteLoader() {
+    return new BaseDeleteLoader(this::loadInputFile);
+  }
+
+  private DeleteLoader deleteLoader() {
+    if (deleteLoader == null) {
+      synchronized (this) {
+        if (deleteLoader == null) {
+          this.deleteLoader = newDeleteLoader();
+        }
+      }
+    }
+
+    return deleteLoader;
+  }
+
+  /**
+   * Returns records that are deleted by <b>either</b> the position deletes <b>or</b> the equality
+   * deletes attached to this reader — i.e. the union of the two delete predicates.
+   *
+   * <p>Each delete-type predicate is built independently and defaults to "false" (no contribution
+   * to the union) when its side has no delete files. Both predicates are then OR-combined and
+   * applied in a single pass over {@code records}. This guarantees that:
+   *
+   * <ul>
+   *   <li>A task with only position deletes emits all records whose position is in the index.
+   *   <li>A task with only equality deletes emits all records matching any equality delete value.
+   *   <li>A task with both emits the union of the two (without duplication).
+   * </ul>
+   */
+  public CloseableIterable<T> read(CloseableIterable<T> records) {
+    Predicate<T> isPosDeleted =
+        posDeletes.isEmpty() ? t -> false : positionDeletePredicate(deletedRowPositions());
+    Predicate<T> isEqDeleted = applyEqDeletes().stream().reduce(Predicate::or).orElse(t -> false);
+    return CloseableIterable.filter(records, isPosDeleted.or(isEqDeleted));
+  }
+
+  private Predicate<T> positionDeletePredicate(PositionDeleteIndex positionIndex) {
+    return record -> positionIndex.isDeleted(pos(record));
+  }
+
+  private List<Predicate<T>> applyEqDeletes() {
+    if (isInDeleteSets != null) {
+      return isInDeleteSets;
+    }
+
+    isInDeleteSets = Lists.newArrayList();
+    if (eqDeletes.isEmpty()) {
+      return isInDeleteSets;
+    }
+
+    Multimap<Set<Integer>, DeleteFile> filesByDeleteIds =
+        Multimaps.newMultimap(Maps.newHashMap(), Lists::newArrayList);
+    for (DeleteFile delete : eqDeletes) {
+      filesByDeleteIds.put(Sets.newHashSet(delete.equalityFieldIds()), delete);
+    }
+
+    for (Map.Entry<Set<Integer>, Collection<DeleteFile>> entry :
+        filesByDeleteIds.asMap().entrySet()) {
+      Set<Integer> ids = entry.getKey();
+      Iterable<DeleteFile> deletes = entry.getValue();
+
+      Schema deleteSchema = TypeUtil.select(requiredSchema, ids);
+
+      // a projection to select and reorder fields of the file schema to match the delete rows
+      StructProjection projectRow = StructProjection.create(requiredSchema, deleteSchema);
+
+      StructLikeSet deleteSet = preloadedDeletes.equalityDeleteSet(ids);
+      if (deleteSet == null) {
+        deleteSet = deleteLoader().loadEqualityDeletes(deletes, deleteSchema);
+      }
+      StructLikeSet deleteSetForPredicate = deleteSet;
+      Predicate<T> isInDeleteSet =
+          record -> deleteSetForPredicate.contains(projectRow.wrap(asStructLike(record)));
+      checkStateNotNull(isInDeleteSets).add(isInDeleteSet);
+    }
+
+    return checkStateNotNull(isInDeleteSets);
+  }
+
+  public PositionDeleteIndex deletedRowPositions() {
+    if (deleteRowPositions == null) {
+      deleteRowPositions = preloadedDeletes.positionDeleteIndex();
+      if (deleteRowPositions == null && !posDeletes.isEmpty()) {
+        deleteRowPositions = deleteLoader().loadPositionDeletes(posDeletes, filePath);
+      }
+    }
+
+    return checkStateNotNull(deleteRowPositions);
+  }
+
+  /** Delete data already loaded by a planning/pushdown path for one task read. */
+  public static final class PreloadedDeletes {
+    private static final PreloadedDeletes EMPTY =
+        new PreloadedDeletes(null, Collections.emptyMap());
+
+    private final @Nullable PositionDeleteIndex positionDeleteIndex;
+    private final Map<Set<Integer>, StructLikeSet> equalityDeleteSets;
+
+    public static PreloadedDeletes empty() {
+      return EMPTY;
+    }
+
+    public static PreloadedDeletes of(
+        @Nullable PositionDeleteIndex positionDeleteIndex,
+        Map<Set<Integer>, StructLikeSet> equalityDeleteSets) {
+      if (positionDeleteIndex == null && equalityDeleteSets.isEmpty()) {
+        return EMPTY;
+      }
+      return new PreloadedDeletes(positionDeleteIndex, equalityDeleteSets);
+    }
+
+    private PreloadedDeletes(
+        @Nullable PositionDeleteIndex positionDeleteIndex,
+        Map<Set<Integer>, StructLikeSet> equalityDeleteSets) {
+      this.positionDeleteIndex = positionDeleteIndex;
+      Map<Set<Integer>, StructLikeSet> copied = new HashMap<>();
+      for (Map.Entry<Set<Integer>, StructLikeSet> entry : equalityDeleteSets.entrySet()) {
+        copied.put(Collections.unmodifiableSet(Sets.newHashSet(entry.getKey())), entry.getValue());
+      }
+      this.equalityDeleteSets = Collections.unmodifiableMap(copied);
+    }
+
+    public @Nullable PositionDeleteIndex positionDeleteIndex() {
+      return positionDeleteIndex;
+    }
+
+    public @Nullable StructLikeSet equalityDeleteSet(Set<Integer> equalityFieldIds) {
+      return equalityDeleteSets.get(equalityFieldIds);
+    }
+  }
+
+  private static Schema fileProjection(
+      Schema tableSchema,
+      Schema requestedSchema,
+      List<DeleteFile> posDeletes,
+      List<DeleteFile> eqDeletes,
+      boolean needRowPosCol) {
+    if (posDeletes.isEmpty() && eqDeletes.isEmpty()) {
+      return requestedSchema;
+    }
+
+    Set<Integer> requiredIds = Sets.newLinkedHashSet();
+    if (needRowPosCol && !posDeletes.isEmpty()) {
+      requiredIds.add(MetadataColumns.ROW_POSITION.fieldId());
+    }
+
+    for (DeleteFile eqDelete : eqDeletes) {
+      requiredIds.addAll(eqDelete.equalityFieldIds());
+    }
+
+    Set<Integer> missingIds =
+        Sets.newLinkedHashSet(
+            Sets.difference(requiredIds, TypeUtil.getProjectedIds(requestedSchema)));
+
+    if (missingIds.isEmpty()) {
+      return requestedSchema;
+    }
+
+    // TODO: support adding nested columns. this will currently fail when finding nested columns to
+    // add
+    List<Types.NestedField> columns = Lists.newArrayList(requestedSchema.columns());
+    for (int fieldId : missingIds) {
+      if (fieldId == MetadataColumns.ROW_POSITION.fieldId()
+          || fieldId == MetadataColumns.IS_DELETED.fieldId()) {
+        continue; // add _pos and _deleted at the end
+      }
+
+      Types.NestedField field = tableSchema.asStruct().field(fieldId);
+      Preconditions.checkArgument(field != null, "Cannot find required field for ID %s", fieldId);
+
+      columns.add(field);
+    }
+
+    if (missingIds.contains(MetadataColumns.ROW_POSITION.fieldId())) {
+      columns.add(MetadataColumns.ROW_POSITION);
+    }
+
+    if (missingIds.contains(MetadataColumns.IS_DELETED.fieldId())) {
+      columns.add(MetadataColumns.IS_DELETED);
+    }
+
+    return new Schema(columns);
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/IncrementalChangelogSource.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/IncrementalChangelogSource.java
new file mode 100644
index 000000000000..003465bbfe78
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/IncrementalChangelogSource.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import static org.apache.beam.sdk.io.iceberg.cdc.ChangelogScanner.LARGE_BIDIRECTIONAL_TASKS;
+import static org.apache.beam.sdk.io.iceberg.cdc.ChangelogScanner.SMALL_BIDIRECTIONAL_TASKS;
+import static org.apache.beam.sdk.io.iceberg.cdc.ChangelogScanner.UNIDIRECTIONAL_TASKS;
+import static org.apache.beam.sdk.io.iceberg.cdc.ResolveChanges.DELETES;
+import static org.apache.beam.sdk.io.iceberg.cdc.ResolveChanges.INSERTS;
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.beam.sdk.io.iceberg.IcebergScanConfig;
+import org.apache.beam.sdk.io.iceberg.IcebergUtils;
+import org.apache.beam.sdk.io.iceberg.ReadUtils;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.Flatten;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.Redistribute;
+import org.apache.beam.sdk.transforms.join.CoGroupByKey;
+import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple;
+import org.apache.beam.sdk.transforms.windowing.AfterWatermark;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PBegin;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionList;
+import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TimestampedValue;
+import org.apache.beam.sdk.values.TupleTagList;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.MoreObjects;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+/**
+ * An Iceberg source that incrementally reads a table's changelogs, processing one snapshot at a
+ * time.
+ *
+ * <p>The streaming path uses {@link WatchForSnapshotsSdf} for proper per-snapshot watermarks. The
+ * bounded path creates the snapshot range up front.
+ */
+public class IncrementalChangelogSource extends PTransform<PBegin, PCollection<Row>> {
+  private static final Duration DEFAULT_POLL_INTERVAL = Duration.standardSeconds(60);
+
+  private final IcebergScanConfig scanConfig;
+
+  public IncrementalChangelogSource(IcebergScanConfig scanConfig) {
+    this.scanConfig = scanConfig;
+  }
+
+  @Override
+  public PCollection<Row> expand(PBegin input) {
+    // emit one SnapshotInfo per element, with element timestamp -> snapshot commit time.
+    PCollection<Long> snapshots =
+        MoreObjects.firstNonNull(scanConfig.getStreaming(), false)
+            ? unboundedSnapshots(input)
+            : boundedSnapshots(input);
+
+    // process one snapshot at a time and produce batches of changelog scan tasks.
+    // tasks are emitted to three outputs:
+    // 1. unidirectional tasks: we know these won't have any updates
+    // 2. small bidirectional tasks: these may contain an update, but the batch is small enough to
+    // resolve in-memory
+    // 2. large bidirectional tasks: may contain an update, but are too large for in-memory
+    // resolution. will
+    //    need to run these output rows through a CoGBK
+    PCollectionTuple changelogTasks =
+        snapshots.apply(
+            "Create Changelog Tasks",
+            ParDo.of(new ChangelogScanner(scanConfig))
+                .withOutputTags(
+                    UNIDIRECTIONAL_TASKS,
+                    TupleTagList.of(LARGE_BIDIRECTIONAL_TASKS).and(SMALL_BIDIRECTIONAL_TASKS)));
+    changelogTasks.get(UNIDIRECTIONAL_TASKS).setCoder(ChangelogScanner.OUTPUT_CODER);
+    changelogTasks.get(SMALL_BIDIRECTIONAL_TASKS).setCoder(ChangelogScanner.OUTPUT_CODER);
+    changelogTasks.get(LARGE_BIDIRECTIONAL_TASKS).setCoder(ChangelogScanner.OUTPUT_CODER);
+
+    Schema projectedRowSchema =
+        IcebergUtils.icebergSchemaToBeamSchema(scanConfig.getProjectedSchema());
+
+    // reads UNIDIRECTIONAL and BIDIRECTIONAL tags and produces rows.
+    ReadFromChangelogs.CdcOutput outputRows =
+        changelogTasks.apply(new ReadFromChangelogs(scanConfig));
+
+    // Small overlapping groups get resolved entirely in memory with no shuffle.
+    PCollection<Row> smallBidirectionalCdcRows =
+        changelogTasks
+            .get(SMALL_BIDIRECTIONAL_TASKS)
+            .apply("Redistribute Small Bidirectional Changes", Redistribute.arbitrarily())
+            .apply("Resolve Locally", ParDo.of(new LocalResolveDoFn(scanConfig)))
+            .setRowSchema(projectedRowSchema);
+
+    // BIDIRECTIONAL records go through a CoGBK and ResolveChanges
+    // We window locally using a custom WindowFn based on the snapsot's commit time. Each snapshot
+    // exists in its own window.
+    // We re-window the resolved output back to GlobalWindows before the final Flatten
+    // to align with the other branches.
+    Window<KV<KV<Long, Row>, Row>> keyedWindowing =
+        Window.<KV<KV<Long, Row>, Row>>into(new SnapshotWindowFn())
+            .triggering(AfterWatermark.pastEndOfWindow())
+            .withAllowedLateness(Duration.ZERO)
+            .discardingFiredPanes();
+    PCollection<KV<KV<Long, Row>, Row>> keyedInserts =
+        outputRows.biDirectionalInserts().apply("Window Inserts", keyedWindowing);
+    PCollection<KV<KV<Long, Row>, Row>> keyedDeletes =
+        outputRows.biDirectionalDeletes().apply("Window Deletes", keyedWindowing);
+    PCollection<Row> biDirectionalCdcRows =
+        KeyedPCollectionTuple.of(INSERTS, keyedInserts)
+            .and(DELETES, keyedDeletes)
+            .apply("CoGroupBy Primary Key", CoGroupByKey.create())
+            .apply("Resolve Delete-Insert Pairs", ParDo.of(new ResolveChanges(scanConfig)))
+            .setRowSchema(projectedRowSchema)
+            .apply("Re-window to Global", Window.into(new GlobalWindows()));
+
+    // Merge all three paths into a single output. All three are in GlobalWindows.
+    PCollection<Row> merged =
+        PCollectionList.of(outputRows.uniDirectionalRows())
+            .and(smallBidirectionalCdcRows)
+            .and(biDirectionalCdcRows)
+            .apply(Flatten.pCollections());
+
+    // If the user configures a watermark column, restamp each record by
+    // that column's value. Output watermark then advances per-record rather than per-snapshot.
+    @Nullable String watermarkColumn = scanConfig.getWatermarkColumn();
+    if (watermarkColumn != null) {
+      merged =
+          merged.apply(
+              "Apply Watermark Column", ParDo.of(new ApplyWatermarkColumn(watermarkColumn)));
+    }
+
+    return merged.setRowSchema(projectedRowSchema);
+  }
+
+  /**
+   * Continuously watches the Iceberg table for new snapshots via {@link WatchForSnapshotsSdf} and
+   * emits per snapshot.
+   */
+  private PCollection<Long> unboundedSnapshots(PBegin input) {
+    Duration pollInterval =
+        MoreObjects.firstNonNull(scanConfig.getPollInterval(), DEFAULT_POLL_INTERVAL);
+    return input
+        .apply("Impulse", Create.of(""))
+        .apply("Watch for Snapshots", ParDo.of(new WatchForSnapshotsSdf(scanConfig, pollInterval)));
+  }
+
+  /**
+   * Reads the full snapshot range up front and emits each snapshot individually, each carrying its
+   * own commit time as the element timestamp.
+   */
+  private PCollection<Long> boundedSnapshots(PBegin input) {
+    Table table =
+        scanConfig
+            .getCatalogConfig()
+            .catalog()
+            .loadTable(TableIdentifier.parse(scanConfig.getTableIdentifier()));
+    checkStateNotNull(
+        table.currentSnapshot(),
+        "Table %s does not have any snapshots to read from.",
+        scanConfig.getTableIdentifier());
+
+    @Nullable Long from = ReadUtils.getFromSnapshotExclusive(table, scanConfig);
+    long to =
+        MoreObjects.firstNonNull(
+            ReadUtils.getToSnapshot(table, scanConfig), table.currentSnapshot().snapshotId());
+    List<TimestampedValue<Long>> timestamped =
+        ReadUtils.snapshotsBetween(table, scanConfig.getTableIdentifier(), from, to).stream()
+            .map(
+                s ->
+                    TimestampedValue.of(
+                        s.getSnapshotId(), Instant.ofEpochMilli(s.getTimestampMillis())))
+            .collect(Collectors.toList());
+    return input.apply("Create Snapshot Range", Create.timestamped(timestamped));
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/LocalResolveDoFn.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/LocalResolveDoFn.java
new file mode 100644
index 000000000000..0d9398d7043c
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/LocalResolveDoFn.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import static org.apache.beam.sdk.io.iceberg.IcebergUtils.icebergSchemaToBeamSchema;
+import static org.apache.beam.sdk.io.iceberg.cdc.SerializableChangelogTask.Type.ADDED_ROWS;
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.beam.sdk.io.iceberg.IcebergScanConfig;
+import org.apache.beam.sdk.io.iceberg.IcebergUtils;
+import org.apache.beam.sdk.io.iceberg.TableCache;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.join.CoGroupByKey;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.ValueKind;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.types.Comparators;
+import org.apache.iceberg.types.TypeUtil;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.StructLikeMap;
+import org.apache.iceberg.util.StructLikeUtil;
+import org.apache.iceberg.util.StructProjection;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+/**
+ * Resolves a small bi-directional changelog group entirely in memory. This is the equivalent of
+ * {@link ReadFromChangelogs} + {@link CoGroupByKey} + {@link ResolveChanges}.
+ *
+ * <p>All tasks in a changelog group belong to the same Iceberg {@link Snapshot}. The upstream
+ * {@link ChangelogScanner} routes here only when the total size of the bi-directional group fits
+ * within {@link TableProperties#SPLIT_SIZE}.
+ *
+ * <p>The incoming batch's overlap region has already been computed in the scanning phase by {@link
+ * ChangelogScanner}. In this DoFn, we just process each task and route records:
+ *
+ * <ul>
+ *   <li>Records whose PK falls <b>outside</b> the overlap range cannot have an opposing-side match,
+ *       so they are emitted directly with {@code INSERT} or {@code DELETE} kind.
+ *   <li>Records whose PK falls <b>inside</b> the overlap range are stashed in a {@link
+ *       StructLikeMap} keyed by PK, then resolved by {@link CdcResolver}.
+ * </ul>
+ */
+class LocalResolveDoFn extends DoFn<KV<ChangelogDescriptor, List<SerializableChangelogTask>>, Row> {
+  private final IcebergScanConfig scanConfig;
+  private final org.apache.beam.sdk.schemas.Schema projectedBeamSchema;
+
+  private transient @MonotonicNonNull OverlapRange overlap;
+  private transient @MonotonicNonNull List<Types.NestedField> nonPkFields;
+  private transient @MonotonicNonNull StructProjection projector;
+
+  LocalResolveDoFn(IcebergScanConfig scanConfig) {
+    this.scanConfig = scanConfig;
+    this.projectedBeamSchema = icebergSchemaToBeamSchema(scanConfig.getProjectedSchema());
+  }
+
+  @Setup
+  public void setup() {
+    TableCache.setup(scanConfig);
+    Schema fullSchema = TableCache.get(scanConfig.getTableIdentifier()).schema();
+    this.overlap = OverlapRange.forScanConfig(scanConfig);
+    Set<String> pkFieldNames = new HashSet<>(overlap.recordIdSchema().identifierFieldNames());
+    // The dedup logic only inspects non-PK fields, so precompute them once.
+    List<Types.NestedField> nonPk = new ArrayList<>();
+    for (Types.NestedField f : fullSchema.columns()) {
+      if (!pkFieldNames.contains(f.name())) {
+        nonPk.add(f);
+      }
+    }
+    this.nonPkFields = nonPk;
+    this.projector = StructProjection.create(fullSchema, scanConfig.getProjectedSchema());
+  }
+
+  @ProcessElement
+  public void process(
+      @Element KV<ChangelogDescriptor, List<SerializableChangelogTask>> element,
+      OutputReceiver<Row> out)
+      throws IOException {
+    ChangelogDescriptor descriptor = element.getKey();
+    Table table = TableCache.get(scanConfig.getTableIdentifier());
+    OverlapRange ovl = checkStateNotNull(overlap);
+
+    // {PK: (inserts | deletes)} for in-overlap records that need resolution.
+    // Records outside the overlap are emitted directly
+    StructLikeMap<PkGroup> pkGroups = StructLikeMap.create(ovl.recordIdSchema().asStruct());
+
+    @Nullable StructLike overlapLower = ovl.toStructLike(descriptor.getOverlapLower());
+    @Nullable StructLike overlapUpper = ovl.toStructLike(descriptor.getOverlapUpper());
+    for (SerializableChangelogTask task : element.getValue()) {
+      readAndRoute(task, table, overlapLower, overlapUpper, pkGroups, out);
+    }
+
+    resolveAndEmit(pkGroups, table.schema(), out);
+  }
+
+  /**
+   * Processes a {@link SerializableChangelogTask} and routes each record:
+   *
+   * <ul>
+   *   <li>Out of overlap: emit directly
+   *   <li>Inside overlap: stash in {@code pkGroups} to resolve in {@link #resolveAndEmit}
+   * </ul>
+   */
+  private void readAndRoute(
+      SerializableChangelogTask task,
+      Table table,
+      @Nullable StructLike overlapLower,
+      @Nullable StructLike overlapUpper,
+      StructLikeMap<PkGroup> pkGroups,
+      OutputReceiver<Row> out)
+      throws IOException {
+    OverlapRange ovl = checkStateNotNull(overlap);
+    boolean isInsert = task.getType() == ADDED_ROWS;
+    try (CloseableIterable<Record> records =
+        CdcReadUtils.changelogRecordsForTask(task, table, scanConfig, false)) {
+      for (Record rec : records) {
+        if (ovl.contains(rec, overlapLower, overlapUpper)) { // needs resolution
+          StructLike pk = StructLikeUtil.copy(ovl.recordIdProjection());
+          PkGroup group = pkGroups.computeIfAbsent(pk, k -> new PkGroup());
+          if (isInsert) {
+            group.inserts.add(rec);
+          } else {
+            group.deletes.add(rec);
+          }
+        } else { // safe to emit directly
+          emit(rec, isInsert ? ValueKind.INSERT : ValueKind.DELETE, out);
+          logEmit(isInsert ? ValueKind.INSERT : ValueKind.DELETE, rec);
+        }
+      }
+    }
+  }
+
+  /** Resolves each PK group using {@link CdcResolver}. */
+  private void resolveAndEmit(
+      StructLikeMap<PkGroup> pkGroups, Schema fullSchema, OutputReceiver<Row> out) {
+    CdcResolver<Record> resolver = new RecordResolver(checkStateNotNull(nonPkFields), fullSchema);
+    for (PkGroup group : pkGroups.values()) {
+      resolver.resolve(
+          group.deletes,
+          group.inserts,
+          (kind, rec) -> {
+            emit(rec, kind, out);
+            logEmit(kind, rec);
+          });
+    }
+  }
+
+  /** Resolver specialization that hashes Iceberg Record non-PK fields. */
+  private static final class RecordResolver extends CdcResolver<Record> {
+    private final List<Types.NestedField> nonPkFields;
+    private final Comparator<StructLike> nonPkComparator;
+    private final StructProjection left;
+    private final StructProjection right;
+
+    RecordResolver(List<Types.NestedField> nonPkFields, Schema recSchema) {
+      this.nonPkFields = nonPkFields;
+      Set<Integer> nonPkFieldIds =
+          nonPkFields.stream().map(Types.NestedField::fieldId).collect(Collectors.toSet());
+      this.left = StructProjection.create(recSchema, nonPkFieldIds);
+      this.right = StructProjection.create(recSchema, nonPkFieldIds);
+      this.nonPkComparator =
+          Comparators.forType(TypeUtil.select(recSchema, nonPkFieldIds).asStruct());
+    }
+
+    @Override
+    protected int nonPkHash(Record rec) {
+      int hash = 1;
+      for (Types.NestedField field : nonPkFields) {
+        hash = 31 * hash + Objects.hashCode(rec.getField(field.name()));
+      }
+      return hash;
+    }
+
+    @Override
+    protected boolean nonPkEquals(Record delete, Record insert) {
+      return nonPkComparator.compare(left.wrap(delete), right.wrap(insert)) == 0;
+    }
+  }
+
+  /** Debug-only logging hook so the existing CoW / update / extra prints survive the refactor. */
+  private static void logEmit(ValueKind kind, Record rec) {
+    switch (kind) {
+      case UPDATE_BEFORE:
+        System.out.printf("[LOCAL_RESOLVE] -- UpdateBefore:%n\t%s%n", rec);
+        break;
+      case UPDATE_AFTER:
+        System.out.printf("[LOCAL_RESOLVE] -- UpdateAfter%n\t%s%n", rec);
+        break;
+      case DELETE:
+        System.out.printf("[LOCAL_RESOLVE] -- Deleted%n%s%n", rec);
+        break;
+      case INSERT:
+        System.out.printf("[LOCAL_RESOLVE] -- Added%n%s%n", rec);
+        break;
+    }
+  }
+
+  /** Prune to get the final projected record then output as a Beam Row. */
+  private void emit(Record rec, ValueKind kind, OutputReceiver<Row> out) {
+    StructLike projected = checkStateNotNull(projector).wrap(rec);
+    out.builder(IcebergUtils.structToRow(projectedBeamSchema, projected))
+        .setValueKind(kind)
+        .output();
+  }
+
+  /** Two parallel lists of inserts/deletes that share a primary key. */
+  private static final class PkGroup {
+    final List<Record> inserts = new ArrayList<>();
+    final List<Record> deletes = new ArrayList<>();
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/OverlapRange.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/OverlapRange.java
new file mode 100644
index 000000000000..efc200b4d04f
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/OverlapRange.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+
+import java.util.Comparator;
+import org.apache.beam.sdk.io.iceberg.IcebergScanConfig;
+import org.apache.beam.sdk.io.iceberg.IcebergUtils;
+import org.apache.beam.sdk.io.iceberg.TableCache;
+import org.apache.beam.sdk.values.Row;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.util.StructProjection;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+/**
+ * Primary-key-projection and overlap-range comparison helper.
+ *
+ * <p>Used by {@link LocalResolveDoFn} and {@link ReadFromChangelogs} to decide whether a record's
+ * PK falls within an overlap of two opposing tasks. If so, the record needs to be compared with
+ * others to determine if it is part of an update pair.
+ */
+final class OverlapRange {
+  private final Schema recordIdSchema;
+  private final StructProjection recordIdProjection;
+  private final Comparator<StructLike> idComp;
+
+  private OverlapRange(
+      Schema recordIdSchema, StructProjection recordIdProjection, Comparator<StructLike> idComp) {
+    this.recordIdSchema = recordIdSchema;
+    this.recordIdProjection = recordIdProjection;
+    this.idComp = idComp;
+  }
+
+  static OverlapRange forScanConfig(IcebergScanConfig scanConfig) {
+    Schema fullSchema = TableCache.get(scanConfig.getTableIdentifier()).schema();
+    StructProjection projection = StructProjection.create(fullSchema, scanConfig.recordIdSchema());
+    return new OverlapRange(
+        scanConfig.recordIdSchema(), projection, scanConfig.recordIdComparator());
+  }
+
+  StructProjection recordIdProjection() {
+    return recordIdProjection;
+  }
+
+  Schema recordIdSchema() {
+    return recordIdSchema;
+  }
+
+  /** Converts a Beam Row (overlap bound) back to an Iceberg {@link StructLike}. */
+  @Nullable
+  StructLike toStructLike(@Nullable Row beamBound) {
+    if (beamBound == null) {
+      return null;
+    }
+    return IcebergUtils.beamRowToIcebergRecord(recordIdSchema, beamBound);
+  }
+
+  /**
+   * Wraps the record to project its Primary Key, then checks if the PK within the overlap {@code
+   * [lower, upper]} (inclusive). Can be paired with a subsequent {@link #recordIdProjection()} call
+   * to fetch the PK value.
+   *
+   * <p>If either bound is null, we conservatively assume it falls within the overlap.
+   */
+  boolean contains(Record rec, @Nullable StructLike lower, @Nullable StructLike upper) {
+    checkStateNotNull(recordIdProjection).wrap(rec);
+
+    if (lower == null || upper == null) {
+      return true;
+    }
+    return idComp.compare(recordIdProjection, lower) >= 0
+        && idComp.compare(recordIdProjection, upper) <= 0;
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ReadFromChangelogs.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ReadFromChangelogs.java
new file mode 100644
index 000000000000..bcf9522faf68
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ReadFromChangelogs.java
@@ -0,0 +1,442 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import static org.apache.beam.sdk.io.iceberg.IcebergUtils.icebergRecordToBeamRow;
+import static org.apache.beam.sdk.io.iceberg.IcebergUtils.icebergSchemaToBeamSchema;
+import static org.apache.beam.sdk.io.iceberg.IcebergUtils.structToRow;
+import static org.apache.beam.sdk.io.iceberg.cdc.ChangelogScanner.LARGE_BIDIRECTIONAL_TASKS;
+import static org.apache.beam.sdk.io.iceberg.cdc.ChangelogScanner.UNIDIRECTIONAL_TASKS;
+import static org.apache.beam.sdk.io.iceberg.cdc.SerializableChangelogTask.Type.ADDED_ROWS;
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.VarLongCoder;
+import org.apache.beam.sdk.io.iceberg.IcebergScanConfig;
+import org.apache.beam.sdk.io.iceberg.IcebergUtils;
+import org.apache.beam.sdk.io.iceberg.TableCache;
+import org.apache.beam.sdk.io.range.OffsetRange;
+import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Metrics;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.schemas.SchemaCoder;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.Flatten;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.Redistribute;
+import org.apache.beam.sdk.transforms.Reify;
+import org.apache.beam.sdk.transforms.join.CoGroupByKey;
+import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionList;
+import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.PInput;
+import org.apache.beam.sdk.values.POutput;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TimestampedValue;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.TupleTagList;
+import org.apache.beam.sdk.values.ValueKind;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.ChangelogScanTask;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.util.StructProjection;
+import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+/**
+ * A {@link PTransform} that processes batches of {@link ChangelogScanTask}s and routes them
+ * accordingly:
+ *
+ * <ul>
+ *   <li>Records from Uni-directional batches are directly emitted, as INSERT or DELETE kind
+ *   <li>Records from Bi-directional batches are compared against the Primary Key overlap range:
+ *       <ul>
+ *         <li>if outside the overlap, emit directly as INSERT or DELETE kind
+ *         <li>if inside the overlap, key by (snapshot, pk) and route to downstream {@link
+ *             CoGroupByKey} and final resolution by {@link ResolveChanges}
+ *       </ul>
+ * </ul>
+ *
+ * <p>We first key bi-directional rows by (snapshot id, primary key) before sending to {@link
+ * CoGroupByKey} to ensure they stay isolated from other PKs or snapshots. Inserts are routed to
+ *
+ * <p>A {@link ChangelogScanTask} comes in three types:
+ *
+ * <ol>
+ *   <li><b>AddedRowsScanTask</b>: Indicates records have been inserted by a new DataFile.
+ *   <li><b>DeletedRowsScanTask</b>: Indicates records have been deleted using a DeleteFile.
+ *   <li><b>DeletedDataFileScanTask</b>: Indicates a whole DataFile has been deleted.
+ * </ol>
+ *
+ * <p>Each of these types need to be processed differently. More details in {@link
+ * CdcReadUtils#changelogRecordsForTask}.
+ */
+class ReadFromChangelogs extends PTransform<PCollectionTuple, ReadFromChangelogs.CdcOutput> {
+  private static final Counter numAddedRowsScanTasksCompleted =
+      Metrics.counter(ReadFromChangelogs.class, "numAddedRowsScanTasksCompleted");
+  private static final Counter numDeletedRowsScanTasksCompleted =
+      Metrics.counter(ReadFromChangelogs.class, "numDeletedRowsScanTasksCompleted");
+  private static final Counter numDeletedDataFileScanTasksCompleted =
+      Metrics.counter(ReadFromChangelogs.class, "numDeletedDataFileScanTasksCompleted");
+
+  private static final TupleTag<Row> UNIDIRECTIONAL_ROWS = new TupleTag<>();
+  private static final TupleTag<KV<KV<Long, Row>, Row>> BIDIRECTIONAL_INSERTS = new TupleTag<>();
+  private static final TupleTag<KV<KV<Long, Row>, Row>> BIDIRECTIONAL_DELETES = new TupleTag<>();
+
+  private final IcebergScanConfig scanConfig;
+
+  ReadFromChangelogs(IcebergScanConfig scanConfig) {
+    this.scanConfig = scanConfig;
+  }
+
+  @Override
+  public CdcOutput expand(PCollectionTuple input) {
+    Schema fullRowSchema = scanConfig.getSchema();
+    Schema projectedRowSchema =
+        IcebergUtils.icebergSchemaToBeamSchema(scanConfig.getProjectedSchema());
+
+    // === UNIDIRECTIONAL tasks ===
+    // (i.e. only deletes, or only inserts)
+    // take the fast approach of just reading and emitting CDC records
+    PCollection<Row> uniDirectionalRows =
+        input
+            .get(UNIDIRECTIONAL_TASKS)
+            .apply(Redistribute.arbitrarily())
+            .apply(
+                "Read Uni-Directional Changes",
+                ParDo.of(ReadDoFn.unidirectional(scanConfig))
+                    .withOutputTags(UNIDIRECTIONAL_ROWS, TupleTagList.empty()))
+            .get(UNIDIRECTIONAL_ROWS)
+            .setRowSchema(projectedRowSchema);
+
+    // === BIDIRECTIONAL tasks ===
+    // (i.e. a task group containing a mix of deletes and inserts)
+    // read and route records according to their PK (see class java doc)
+    PCollectionTuple biDirectionalRows =
+        input
+            .get(LARGE_BIDIRECTIONAL_TASKS)
+            .apply(Redistribute.arbitrarily())
+            .apply(
+                "Read Bi-Directional Changes",
+                ParDo.of(ReadDoFn.bidirectional(scanConfig))
+                    .withOutputTags(
+                        BIDIRECTIONAL_INSERTS,
+                        TupleTagList.of(BIDIRECTIONAL_DELETES).and(UNIDIRECTIONAL_ROWS)));
+    // Collect pruned (non-overlapping) rows from bi-directional reader
+    PCollection<Row> nonOverlappingRowsFromBiDirTasks =
+        biDirectionalRows.get(UNIDIRECTIONAL_ROWS).setRowSchema(projectedRowSchema);
+
+    // Flatten uni-directional rows from both sources
+    PCollection<Row> allUniDirectionalRows =
+        PCollectionList.of(uniDirectionalRows)
+            .and(nonOverlappingRowsFromBiDirTasks)
+            .apply("Flatten Uni-Directional Rows", Flatten.pCollections());
+
+    // Reify to preserve each record's timestamp (CoGBK overwrites timestamps with the window's
+    // end-of-window)
+    // Note: element timestamps are snapshot commit timestamp
+    KvCoder<KV<Long, Row>, Row> keyedOutputCoder =
+        KvCoder.of(
+            KvCoder.of(VarLongCoder.of(), SchemaCoder.of(scanConfig.rowIdBeamSchema())),
+            SchemaCoder.of(fullRowSchema));
+    PCollection<KV<KV<Long, Row>, Row>> keyedInsertsWithTimestamps =
+        biDirectionalRows
+            .get(BIDIRECTIONAL_INSERTS)
+            .setCoder(keyedOutputCoder);
+    PCollection<KV<KV<Long, Row>, Row>> keyedDeletesWithTimestamps =
+        biDirectionalRows
+            .get(BIDIRECTIONAL_DELETES)
+            .setCoder(keyedOutputCoder);
+
+    return new CdcOutput(
+        input.getPipeline(),
+        allUniDirectionalRows,
+        keyedInsertsWithTimestamps,
+        keyedDeletesWithTimestamps);
+  }
+
+  public static class CdcOutput implements POutput {
+    private final Pipeline pipeline;
+    private final PCollection<Row> uniDirectionalRows;
+    private final PCollection<KV<KV<Long, Row>, Row>> biDirectionalInserts;
+    private final PCollection<KV<KV<Long, Row>, Row>> biDirectionalDeletes;
+
+    CdcOutput(
+        Pipeline p,
+        PCollection<Row> uniDirectionalRows,
+        PCollection<KV<KV<Long, Row>, Row>> biDirectionalInserts,
+        PCollection<KV<KV<Long, Row>, Row>> biDirectionalDeletes) {
+      this.pipeline = p;
+      this.uniDirectionalRows = uniDirectionalRows;
+      this.biDirectionalInserts = biDirectionalInserts;
+      this.biDirectionalDeletes = biDirectionalDeletes;
+    }
+
+    PCollection<Row> uniDirectionalRows() {
+      return uniDirectionalRows;
+    }
+
+    PCollection<KV<KV<Long, Row>, Row>> biDirectionalInserts() {
+      return biDirectionalInserts;
+    }
+
+    PCollection<KV<KV<Long, Row>, Row>> biDirectionalDeletes() {
+      return biDirectionalDeletes;
+    }
+
+    @Override
+    public Pipeline getPipeline() {
+      return pipeline;
+    }
+
+    @Override
+    public Map<TupleTag<?>, PValue> expand() {
+      return ImmutableMap.of(
+          UNIDIRECTIONAL_ROWS,
+          uniDirectionalRows,
+          BIDIRECTIONAL_INSERTS,
+          biDirectionalInserts,
+          BIDIRECTIONAL_DELETES,
+          biDirectionalDeletes);
+    }
+
+    @Override
+    public void finishSpecifyingOutput(
+        String transformName, PInput input, PTransform<?, ?> transform) {}
+  }
+
+  @DoFn.BoundedPerElement
+  private static class ReadDoFn<OutT>
+      extends DoFn<KV<ChangelogDescriptor, List<SerializableChangelogTask>>, OutT> {
+    private final IcebergScanConfig scanConfig;
+    private final boolean keyedOutput;
+    private final Schema projectedBeamRowSchema;
+    private final Schema fullBeamRowSchema;
+    private transient @MonotonicNonNull OverlapRange overlap;
+    private transient @MonotonicNonNull StructProjection outputProjector;
+    private transient @MonotonicNonNull StructProjection pkProjector;
+
+    /** Used for uni-directional changes. Records are output immediately as-is. */
+    static ReadDoFn<Row> unidirectional(IcebergScanConfig scanConfig) {
+      return new ReadDoFn<>(scanConfig, false);
+    }
+
+    /**
+     * Used for bi-directional changes. Records are keyed by (snapshot ID, primary key) and sent to
+     * a CoGBK.
+     */
+    static ReadDoFn<KV<KV<Long, Row>, Row>> bidirectional(IcebergScanConfig scanConfig) {
+      return new ReadDoFn<>(scanConfig, true);
+    }
+
+    private ReadDoFn(IcebergScanConfig scanConfig, boolean keyedOutput) {
+      this.scanConfig = scanConfig;
+      this.keyedOutput = keyedOutput;
+
+      this.projectedBeamRowSchema = icebergSchemaToBeamSchema(scanConfig.getProjectedSchema());
+      this.fullBeamRowSchema = scanConfig.getSchema();
+    }
+
+    @Setup
+    public void setup() {
+      TableCache.setup(scanConfig);
+      this.overlap = OverlapRange.forScanConfig(scanConfig);
+    }
+
+    @ProcessElement
+    public void process(
+        @Element KV<ChangelogDescriptor, List<SerializableChangelogTask>> element,
+        RestrictionTracker<OffsetRange, Long> tracker,
+        MultiOutputReceiver out)
+        throws IOException {
+      Table table = TableCache.get(scanConfig.getTableIdentifier());
+
+      List<SerializableChangelogTask> tasks = element.getValue();
+      @Nullable Row overlapLower = element.getKey().getOverlapLower();
+      @Nullable Row overlapUpper = element.getKey().getOverlapUpper();
+
+      for (long l = tracker.currentRestriction().getFrom();
+          l < tracker.currentRestriction().getTo();
+          l++) {
+        if (!tracker.tryClaim(l)) {
+          return;
+        }
+
+        SerializableChangelogTask task = tasks.get((int) l);
+        processTaskRecords(task, overlapLower, overlapUpper, table, out);
+      }
+    }
+
+    /**
+     * Processes a ChangelogScanTask and routes records accordingly:
+     *
+     * <p>If this DoFn is configured with {@link #unidirectional}, we simply read records and output
+     * directly to {@link #UNIDIRECTIONAL_ROWS}.
+     *
+     * <p>If this DoFn is configured with {@link #bidirectional}, we compare against the Primary Key
+     * overlap range. If within the overlap, we key by (snapshotId, PK) and out to either {@link
+     * #BIDIRECTIONAL_INSERTS} or {@link #BIDIRECTIONAL_DELETES}. Otherwise (not in overlap), we
+     * output the record directly to {@link #UNIDIRECTIONAL_ROWS}.
+     */
+    private void processTaskRecords(
+        SerializableChangelogTask task,
+        @Nullable Row overlapLowerRow,
+        @Nullable Row overlapUpperRow,
+        Table table,
+        MultiOutputReceiver outputReceiver)
+        throws IOException {
+      OverlapRange ovl = checkStateNotNull(overlap);
+      @Nullable StructLike overlapLower = ovl.toStructLike(overlapLowerRow);
+      @Nullable StructLike overlapUpper = ovl.toStructLike(overlapUpperRow);
+
+      boolean isInsert = task.getType() == ADDED_ROWS;
+      TupleTag<KV<KV<Long, Row>, Row>> taggedOutput =
+          isInsert ? BIDIRECTIONAL_INSERTS : BIDIRECTIONAL_DELETES;
+      ValueKind kind = isInsert ? ValueKind.INSERT : ValueKind.DELETE;
+
+      Schema outputSchema = keyedOutput ? fullBeamRowSchema : projectedBeamRowSchema;
+      try (CloseableIterable<Record> records =
+          CdcReadUtils.changelogRecordsForTask(task, table, scanConfig, !keyedOutput)) {
+        for (Record rec : records) {
+          // uni-directional -- just output records (they are already projected by read pushdown)
+          if (!keyedOutput) {
+            Row row = icebergRecordToBeamRow(projectedBeamRowSchema, rec);
+            outputReceiver.get(UNIDIRECTIONAL_ROWS).builder(row).setValueKind(kind).output();
+            continue;
+          }
+
+          // bi-directional -- compare overlap
+          if (ovl.contains(rec, overlapLower, overlapUpper)) {
+            // inside overlap -- read full row and output KV
+            Row row = icebergRecordToBeamRow(outputSchema, rec);
+            Row pk = structToRow(scanConfig.rowIdBeamSchema(), pkProjector().wrap(rec));
+            long snapshotId = task.getCommitSnapshotId();
+            outputReceiver
+                .get(taggedOutput)
+                .builder(KV.of(KV.of(snapshotId, pk), row))
+                .setValueKind(kind)
+                .output();
+
+            System.out.printf(
+                "[LARGE BIIDIRECTIONAL OVERLAP] -- %s(%s)%n%s%n",
+                getKind(task.getType()), snapshotId, row);
+          } else {
+            // outside overlap -- get projected record and output
+            StructLike projected = outputProjector().wrap(rec);
+            Row row = structToRow(projectedBeamRowSchema, projected);
+            System.out.printf(
+                "[LARGE BIIDIRECTIONAL NO-OVERLAP] -- %s(%s)%n%s%n",
+                getKind(task.getType()), task.getCommitSnapshotId(), row);
+            outputReceiver.get(UNIDIRECTIONAL_ROWS).builder(row).setValueKind(kind).output();
+          }
+        }
+      }
+
+      trackMetrics(task.getType());
+    }
+
+    private StructProjection outputProjector() {
+      if (outputProjector == null) {
+        outputProjector =
+            StructProjection.create(
+                TableCache.get(scanConfig.getTableIdentifier()).schema(),
+                scanConfig.getProjectedSchema());
+      }
+      return outputProjector;
+    }
+
+    private StructProjection pkProjector() {
+      if (pkProjector == null) {
+        pkProjector =
+            StructProjection.create(
+                TableCache.get(scanConfig.getTableIdentifier()).schema(),
+                scanConfig.recordIdSchema());
+      }
+      return pkProjector;
+    }
+
+    private void trackMetrics(SerializableChangelogTask.Type type) {
+      switch (type) {
+        case ADDED_ROWS:
+          numAddedRowsScanTasksCompleted.inc();
+          break;
+        case DELETED_ROWS:
+          numDeletedRowsScanTasksCompleted.inc();
+          break;
+        case DELETED_FILE:
+          numDeletedDataFileScanTasksCompleted.inc();
+          break;
+      }
+    }
+
+    private String getKind(SerializableChangelogTask.Type taskType) {
+      switch (taskType) {
+        case ADDED_ROWS:
+          return "INSERT";
+        case DELETED_ROWS:
+          return "DELETE";
+        case DELETED_FILE:
+        default:
+          return "DELETE-DF";
+      }
+    }
+
+    @GetSize
+    public double getSize(
+        @Element KV<ChangelogDescriptor, List<SerializableChangelogTask>> element,
+        @Restriction OffsetRange restriction) {
+      // TODO(ahmedabu98): this is just the compressed DataFile byte size. find a way to make a
+      // better byte size estimate
+      long size = 0;
+
+      for (long l = restriction.getFrom(); l < restriction.getTo(); l++) {
+        size += element.getValue().get((int) l).getDataFile().getFileSizeInBytes();
+      }
+
+      return size;
+    }
+
+    @GetInitialRestriction
+    public OffsetRange getInitialRange(
+        @Element KV<ChangelogDescriptor, List<SerializableChangelogTask>> element) {
+      return new OffsetRange(0, element.getValue().size());
+    }
+
+    // commenting out for now because i think doing max split will lead to OOMs,
+    // each thread will try to buffer its task in memory at the same time
+    //    @SplitRestriction
+    //    public void splitRestriction(
+    //        @Restriction OffsetRange restriction, OutputReceiver<OffsetRange> out) {
+    //      // Split into individual tasks for maximum initial parallelism
+    //      for (long i = restriction.getFrom(); i < restriction.getTo(); i++) {
+    //        out.output(new OffsetRange(i, i + 1));
+    //      }
+    //    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ResolveChanges.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ResolveChanges.java
new file mode 100644
index 000000000000..2e233b8dfdf3
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/ResolveChanges.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Objects;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.beam.sdk.io.iceberg.IcebergScanConfig;
+import org.apache.beam.sdk.schemas.Schema;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.join.CoGbkResult;
+import org.apache.beam.sdk.util.RowFilter;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.Row;
+import org.apache.beam.sdk.values.TimestampedValue;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.ValueKind;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
+import org.apache.iceberg.types.Types;
+import org.joda.time.Instant;
+
+/**
+ * Receives a {@link CoGbkResult} containing inserts and deletes sharing the same snapshot ID and
+ * Primary Key, and uses {@link CdcResolver} to identify logical updates.
+ */
+class ResolveChanges extends DoFn<KV<KV<Long, Row>, CoGbkResult>, Row> {
+  static final TupleTag<Row> DELETES = new TupleTag<>() {};
+  static final TupleTag<Row> INSERTS = new TupleTag<>() {};
+  private final RowFilter rowFilter;
+
+  ResolveChanges(IcebergScanConfig scanConfig) {
+    this.rowFilter =
+        new RowFilter(scanConfig.getSchema())
+            .keep(
+                scanConfig.getProjectedSchema().columns().stream()
+                    .map(Types.NestedField::name)
+                    .collect(Collectors.toList()));
+  }
+
+  @ProcessElement
+  public void processElement(
+    @Element KV<KV<Long, Row>, CoGbkResult> element, @Timestamp Instant timestamp, OutputReceiver<Row> out) {
+    Row primaryKey = element.getKey().getValue();
+    Set<String> pkFields = new HashSet<>(primaryKey.getSchema().getFieldNames());
+    CoGbkResult result = element.getValue();
+
+    // should be okay to materialize these lists. a PK collision will likely be a handful of records
+    // at most
+    List<Row> deletes = Lists.newArrayList(result.getAll(DELETES));
+    List<Row> inserts = Lists.newArrayList(result.getAll(INSERTS));
+
+    new RowResolver(pkFields)
+        .resolve(
+            deletes,
+            inserts,
+            (kind, row) -> {
+              Row projectedRow = rowFilter.filter(row);
+              out.builder(projectedRow).setValueKind(kind).setTimestamp(timestamp).output();
+              logEmit(kind, row);
+            });
+  }
+
+  private static final class RowResolver extends CdcResolver<Row> {
+    private final Set<String> pkFields;
+
+    RowResolver(Set<String> pkFields) {
+      this.pkFields = pkFields;
+    }
+
+    @Override
+    protected int nonPkHash(Row element) {
+      int hash = 1;
+      for (String field : element.getSchema().getFieldNames()) {
+        if (pkFields.contains(field)) {
+          continue;
+        }
+        hash = 31 * hash + Objects.hashCode(element.getValue(field));
+      }
+      return hash;
+    }
+
+    @Override
+    protected boolean nonPkEquals(Row delete, Row insert) {
+      Schema schema = insert.getSchema();
+      for (String field : schema.getFieldNames()) {
+        // we already know PK values are equal
+        if (pkFields.contains(field)) {
+          continue;
+        }
+        // return early if two values are not equal
+        if (!Row.Equals.deepEquals(
+            insert.getValue(field),
+            delete.getValue(field),
+            schema.getField(field).getType())) {
+          return false;
+        }
+      }
+      return true;
+    }
+  }
+
+  /** Debug-only logging hook so the existing CoW / update / extra prints survive the refactor. */
+  private static void logEmit(ValueKind kind, Row row) {
+    switch (kind) {
+      case UPDATE_BEFORE:
+        System.out.printf("[BIDIRECTIONAL] -- UpdateBefore:%n\t%s%n", row);
+        break;
+      case UPDATE_AFTER:
+        System.out.printf("[BIDIRECTIONAL] -- UpdateAfter%n\t%s%n", row);
+        break;
+      case DELETE:
+        System.out.printf("[BIDIRECTIONAL] -- Deleted%n%s%n", row);
+        break;
+      case INSERT:
+        System.out.printf("[BIDIRECTIONAL] -- Inserted%n%s%n", row);
+        break;
+    }
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/SerializableChangelogTask.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/SerializableChangelogTask.java
new file mode 100644
index 000000000000..9b6955d9e4a5
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/SerializableChangelogTask.java
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import static org.apache.beam.sdk.util.Preconditions.checkStateNotNull;
+import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState;
+
+import com.google.auto.value.AutoValue;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import org.apache.beam.sdk.io.iceberg.SerializableDataFile;
+import org.apache.beam.sdk.io.iceberg.SerializableDeleteFile;
+import org.apache.beam.sdk.schemas.AutoValueSchema;
+import org.apache.beam.sdk.schemas.NoSuchSchemaException;
+import org.apache.beam.sdk.schemas.SchemaCoder;
+import org.apache.beam.sdk.schemas.SchemaRegistry;
+import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
+import org.apache.beam.sdk.schemas.annotations.SchemaFieldNumber;
+import org.apache.beam.sdk.schemas.annotations.SchemaIgnore;
+import org.apache.iceberg.AddedRowsScanTask;
+import org.apache.iceberg.ChangelogOperation;
+import org.apache.iceberg.ChangelogScanTask;
+import org.apache.iceberg.ContentScanTask;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.DeletedDataFileScanTask;
+import org.apache.iceberg.DeletedRowsScanTask;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.ExpressionParser;
+
+@DefaultSchema(AutoValueSchema.class)
+@AutoValue
+public abstract class SerializableChangelogTask {
+  public enum Type {
+    ADDED_ROWS,
+    DELETED_ROWS,
+    DELETED_FILE
+  }
+
+  public static SchemaCoder<SerializableChangelogTask> coder() {
+    try {
+      return SchemaRegistry.createDefault().getSchemaCoder(SerializableChangelogTask.class);
+    } catch (NoSuchSchemaException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static SerializableChangelogTask.Builder builder() {
+    return new AutoValue_SerializableChangelogTask.Builder()
+        .setExistingDeletes(Collections.emptyList())
+        .setAddedDeletes(Collections.emptyList());
+  }
+
+  @SchemaFieldNumber("0")
+  public abstract Type getType();
+
+  @SchemaFieldNumber("1")
+  public abstract SerializableDataFile getDataFile();
+
+  @SchemaFieldNumber("2")
+  public abstract List<SerializableDeleteFile> getExistingDeletes();
+
+  @SchemaFieldNumber("3")
+  public abstract List<SerializableDeleteFile> getAddedDeletes();
+
+  @SchemaFieldNumber("4")
+  public abstract int getSpecId();
+
+  @SchemaFieldNumber("5")
+  public abstract ChangelogOperation getOperation();
+
+  @SchemaFieldNumber("6")
+  public abstract int getOrdinal();
+
+  @SchemaFieldNumber("7")
+  public abstract long getCommitSnapshotId();
+
+  @SchemaFieldNumber("8")
+  public abstract long getStart();
+
+  @SchemaFieldNumber("9")
+  public abstract long getLength();
+
+  @SchemaFieldNumber("10")
+  public abstract String getJsonExpression();
+
+  @SchemaIgnore
+  public Expression getExpression(Schema schema) {
+    return ExpressionParser.fromJson(getJsonExpression(), schema);
+  }
+
+  public abstract Builder toBuilder();
+
+  @AutoValue.Builder
+  public abstract static class Builder {
+    abstract Builder setType(Type type);
+
+    abstract Builder setDataFile(SerializableDataFile dataFile);
+
+    @SchemaIgnore
+    public Builder setDataFile(DataFile df, String partitionPath, boolean includeMetrics) {
+      return setDataFile(SerializableDataFile.from(df, partitionPath, includeMetrics));
+    }
+
+    abstract Builder setExistingDeletes(List<SerializableDeleteFile> existingDeletes);
+
+    abstract Builder setAddedDeletes(List<SerializableDeleteFile> addedDeletes);
+
+    abstract Builder setSpecId(int specId);
+
+    abstract Builder setOperation(ChangelogOperation operation);
+
+    abstract Builder setOrdinal(int ordinal);
+
+    abstract Builder setCommitSnapshotId(long commitSnapshotId);
+
+    abstract Builder setStart(long start);
+
+    abstract Builder setLength(long length);
+
+    abstract Builder setJsonExpression(String expression);
+
+    abstract SerializableChangelogTask build();
+  }
+
+  public static SerializableChangelogTask from(
+      ChangelogScanTask task, Map<Integer, PartitionSpec> specs) {
+    return from(task, specs, false);
+  }
+
+  public static SerializableChangelogTask from(
+      ChangelogScanTask task, Map<Integer, PartitionSpec> specs, boolean includeMetrics) {
+    checkState(
+        task instanceof ContentScanTask, "Expected ChangelogScanTask to also be a ContentScanTask");
+    ContentScanTask<DataFile> contentScanTask = (ContentScanTask<DataFile>) task;
+    PartitionSpec spec = contentScanTask.spec();
+    SerializableChangelogTask.Builder builder =
+        SerializableChangelogTask.builder()
+            .setOperation(task.operation())
+            .setOrdinal(task.changeOrdinal())
+            .setCommitSnapshotId(task.commitSnapshotId())
+            .setDataFile(
+                contentScanTask.file(),
+                spec.partitionToPath(contentScanTask.partition()),
+                includeMetrics)
+            .setSpecId(spec.specId())
+            .setStart(contentScanTask.start())
+            .setLength(contentScanTask.length())
+            .setJsonExpression(ExpressionParser.toJson(contentScanTask.residual()));
+
+    if (task instanceof AddedRowsScanTask) {
+      AddedRowsScanTask addedRowsTask = (AddedRowsScanTask) task;
+      builder =
+          builder
+              .setType(Type.ADDED_ROWS)
+              .setAddedDeletes(
+                  toSerializableDeletes(addedRowsTask.deletes(), specs, includeMetrics));
+    } else if (task instanceof DeletedRowsScanTask) {
+      DeletedRowsScanTask deletedRowsTask = (DeletedRowsScanTask) task;
+      builder =
+          builder
+              .setType(Type.DELETED_ROWS)
+              .setAddedDeletes(
+                  toSerializableDeletes(deletedRowsTask.addedDeletes(), specs, includeMetrics))
+              .setExistingDeletes(
+                  toSerializableDeletes(deletedRowsTask.existingDeletes(), specs, includeMetrics));
+    } else if (task instanceof DeletedDataFileScanTask) {
+      DeletedDataFileScanTask deletedFileTask = (DeletedDataFileScanTask) task;
+      builder =
+          builder
+              .setType(Type.DELETED_FILE)
+              .setExistingDeletes(
+                  toSerializableDeletes(deletedFileTask.existingDeletes(), specs, includeMetrics));
+    } else {
+      throw new IllegalStateException("Unknown ChangelogScanTask type: " + task.getClass());
+    }
+    return builder.build();
+  }
+
+  static Type getType(ChangelogScanTask task) {
+    if (task instanceof AddedRowsScanTask) {
+      return Type.ADDED_ROWS;
+    } else if (task instanceof DeletedRowsScanTask) {
+      return Type.DELETED_ROWS;
+    } else if (task instanceof DeletedDataFileScanTask) {
+      return Type.DELETED_FILE;
+    } else {
+      throw new IllegalStateException("Unknown ChangelogScanTask type: " + task.getClass());
+    }
+  }
+
+  static long getTotalLength(List<ChangelogScanTask> tasks) {
+    return tasks.stream().mapToLong(SerializableChangelogTask::getLength).sum();
+  }
+
+  static long getLength(ChangelogScanTask task) {
+    if (task instanceof AddedRowsScanTask) {
+      return ((AddedRowsScanTask) task).length();
+    } else if (task instanceof DeletedRowsScanTask) {
+      return ((DeletedRowsScanTask) task).length();
+    } else if (task instanceof DeletedDataFileScanTask) {
+      return ((DeletedDataFileScanTask) task).length();
+    }
+    throw new IllegalStateException("Unknown ChangelogScanTask type: " + task.getClass());
+  }
+
+  static StructLike getPartition(ChangelogScanTask task) {
+    if (task instanceof AddedRowsScanTask) {
+      return ((AddedRowsScanTask) task).partition();
+    } else if (task instanceof DeletedRowsScanTask) {
+      return ((DeletedRowsScanTask) task).partition();
+    } else if (task instanceof DeletedDataFileScanTask) {
+      return ((DeletedDataFileScanTask) task).partition();
+    }
+    throw new IllegalStateException("Unknown ChangelogScanTask type: " + task.getClass());
+  }
+
+  static PartitionSpec getSpec(ChangelogScanTask task) {
+    if (task instanceof AddedRowsScanTask) {
+      return ((AddedRowsScanTask) task).spec();
+    } else if (task instanceof DeletedRowsScanTask) {
+      return ((DeletedRowsScanTask) task).spec();
+    } else if (task instanceof DeletedDataFileScanTask) {
+      return ((DeletedDataFileScanTask) task).spec();
+    }
+    throw new IllegalStateException("Unknown ChangelogScanTask type: " + task.getClass());
+  }
+
+  static DataFile getDataFile(ChangelogScanTask task) {
+    if (task instanceof AddedRowsScanTask) {
+      return ((AddedRowsScanTask) task).file();
+    } else if (task instanceof DeletedRowsScanTask) {
+      return ((DeletedRowsScanTask) task).file();
+    } else if (task instanceof DeletedDataFileScanTask) {
+      return ((DeletedDataFileScanTask) task).file();
+    }
+    throw new IllegalStateException("Unknown ChangelogScanTask type: " + task.getClass());
+  }
+
+  static List<DeleteFile> getAddedDeleteFiles(ChangelogScanTask task) {
+    if (task instanceof AddedRowsScanTask) {
+      return ((AddedRowsScanTask) task).deletes();
+    } else if (task instanceof DeletedRowsScanTask) {
+      return ((DeletedRowsScanTask) task).addedDeletes();
+    } else if (task instanceof DeletedDataFileScanTask) {
+      return Collections.emptyList();
+    }
+    throw new IllegalStateException("Unknown ChangelogScanTask type: " + task.getClass());
+  }
+
+  private static List<SerializableDeleteFile> toSerializableDeletes(
+      List<DeleteFile> dfs, Map<Integer, PartitionSpec> specs, boolean includeMetrics) {
+    return dfs.stream()
+        .map(
+            df ->
+                SerializableDeleteFile.from(
+                    df,
+                    checkStateNotNull(specs.get(df.specId())).partitionToPath(df.partition()),
+                    includeMetrics))
+        .collect(Collectors.toList());
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/SnapshotWindowFn.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/SnapshotWindowFn.java
new file mode 100644
index 000000000000..f9985c136264
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/SnapshotWindowFn.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import java.util.Collection;
+import java.util.Collections;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.NonMergingWindowFn;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.transforms.windowing.WindowMappingFn;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+/**
+ * A {@link WindowFn} that assigns each element to a 1-millisecond {@link IntervalWindow} anchored
+ * at the element's event timestamp.
+ *
+ * <p>We set the element's timestamp as its snapshot commit timestamp. All tasks/records from the
+ * same snapshot land in the same window.
+ *
+ * <p>With the per-snapshot watermark from {@link WatchForSnapshotsSdf}, the CoGroupByKey fires when
+ * a snapshot is fully drained. The watermark advances past the snapshot's commit time only after
+ * every downstream stage has finished processing that snapshot's records.
+ *
+ * <p>Two snapshots committed within the same millisecond may collapse into the same window. But
+ * that's okay because {@link ReadFromChangelogs} includes snapshot id in the key before routing to
+ * the CoGBK, so it won't produce incorrect joins.
+ */
+public class SnapshotWindowFn extends NonMergingWindowFn<Object, IntervalWindow> {
+  private static final Duration WINDOW_LENGTH = Duration.millis(1);
+
+  @Override
+  public Collection<IntervalWindow> assignWindows(AssignContext c) {
+    Instant ts = c.timestamp();
+    return Collections.singletonList(new IntervalWindow(ts, ts.plus(WINDOW_LENGTH)));
+  }
+
+  @Override
+  public boolean isCompatible(WindowFn<?, ?> other) {
+    return other instanceof SnapshotWindowFn;
+  }
+
+  @Override
+  public Coder<IntervalWindow> windowCoder() {
+    return IntervalWindow.getCoder();
+  }
+
+  @Override
+  public WindowMappingFn<IntervalWindow> getDefaultWindowMappingFn() {
+    // Just return a window covering the main-input window's end timestamp.
+    return new WindowMappingFn<>() {
+      @Override
+      public IntervalWindow getSideInputWindow(BoundedWindow mainWindow) {
+        Instant end = mainWindow.maxTimestamp();
+        return new IntervalWindow(end, end.plus(WINDOW_LENGTH));
+      }
+    };
+  }
+
+  @Override
+  public boolean equals(@Nullable Object obj) {
+    return obj instanceof SnapshotWindowFn;
+  }
+
+  @Override
+  public int hashCode() {
+    return SnapshotWindowFn.class.hashCode();
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/WatchForSnapshotsSdf.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/WatchForSnapshotsSdf.java
new file mode 100644
index 000000000000..8642a0f0b5fa
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/WatchForSnapshotsSdf.java
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.io.iceberg.IcebergScanConfig;
+import org.apache.beam.sdk.io.iceberg.ReadUtils;
+import org.apache.beam.sdk.io.iceberg.SnapshotInfo;
+import org.apache.beam.sdk.io.iceberg.TableCache;
+import org.apache.beam.sdk.io.range.OffsetRange;
+import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Gauge;
+import org.apache.beam.sdk.metrics.Metrics;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.splittabledofn.GrowableOffsetRangeTracker;
+import org.apache.beam.sdk.transforms.splittabledofn.ManualWatermarkEstimator;
+import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker;
+import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
+import org.apache.beam.sdk.transforms.splittabledofn.WatermarkEstimators.Manual;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.util.Preconditions;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.MoreObjects;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.util.SnapshotUtil;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * SplittableDoFn that watches an Iceberg table for new {@link Snapshot}s and emits them one at a
+ * time, advancing the watermark per snapshot. Each snapshot is later processed by {@link
+ * ChangelogScanner}.
+ *
+ * <p>The restriction tracks Snapshots via their <b>sequence numbers</b>, which are monotonic,
+ * unlike snapshot IDs. The initial range starts at the sequence number of the user-configured
+ * starting snapshot (or one if none configured) and runs to {@link Long#MAX_VALUE}. Each call to
+ * {@code @ProcessElement} claims the sequence numbers of newly discovered snapshots in
+ * chronological order.
+ *
+ * <p>Uses a {@link Manual} watermark estimator. After emitting a snapshot, the watermark is set to
+ * that snapshot's commit time. On empty polls, the watermark is bumped to {@code now() -
+ * MAX_SNAPSHOT_DISCOVERY_DELAY} to prevent downstream windows from stalling indefinitely during
+ * quiet periods.
+ */
+@DoFn.UnboundedPerElement
+class WatchForSnapshotsSdf extends DoFn<String, Long> {
+  private static final Logger LOG = LoggerFactory.getLogger(WatchForSnapshotsSdf.class);
+  private static final Counter snapshotsEmitted =
+      Metrics.counter(WatchForSnapshotsSdf.class, "snapshotsEmitted");
+  private static final Gauge latestEmittedSnapshotId =
+      Metrics.gauge(WatchForSnapshotsSdf.class, "latestEmittedSnapshotId");
+  // TODO(ahmedabu98): consider exposing this as a config option
+  private static final Duration MAX_SNAPSHOT_DISCOVERY_DELAY = Duration.standardMinutes(5);
+  private static final Long POLL_FOREVER = Long.MAX_VALUE;
+
+  private final IcebergScanConfig scanConfig;
+  private final Duration pollInterval;
+
+  WatchForSnapshotsSdf(IcebergScanConfig scanConfig, Duration pollInterval) {
+    this.scanConfig = scanConfig;
+    this.pollInterval = pollInterval;
+  }
+
+  @GetInitialRestriction
+  public OffsetRange initialRestriction() {
+    TableCache.setup(scanConfig);
+    Table table = TableCache.getRefreshed(scanConfig.getTableIdentifier());
+
+    long toSnapshotExclusiveSeq = POLL_FOREVER;
+    @Nullable Long toSnapshotId = ReadUtils.getToSnapshot(table, scanConfig);
+    if (toSnapshotId != null) {
+      toSnapshotExclusiveSeq =
+          Preconditions.checkStateNotNull(
+                      table.snapshot(toSnapshotId),
+                      "Configured end snapshot %s does not exist",
+                      toSnapshotId)
+                  .sequenceNumber()
+              + 1;
+    }
+
+    @Nullable Long fromSnapshotInclusiveId = ReadUtils.getFromSnapshotInclusive(table, scanConfig);
+    long fromSnapshotInclusiveSeq;
+    if (fromSnapshotInclusiveId == null) {
+      fromSnapshotInclusiveSeq = 1L; // sequence numbers start at 1
+    } else {
+      Snapshot fromSnapshotInclusive =
+          Preconditions.checkArgumentNotNull(
+              table.snapshot(fromSnapshotInclusiveId),
+              "The specified starting snapshot %s does not exist",
+              fromSnapshotInclusiveId);
+      fromSnapshotInclusiveSeq = fromSnapshotInclusive.sequenceNumber();
+
+      boolean sameLineage =
+          toSnapshotId == null
+              ? SnapshotUtil.isAncestorOf(table, fromSnapshotInclusiveId)
+              : SnapshotUtil.isAncestorOf(table, toSnapshotId, fromSnapshotInclusiveId);
+      checkArgument(
+          sameLineage,
+          "Configured starting snapshot %s is not an ancestor of %s",
+          fromSnapshotInclusiveId,
+          toSnapshotId == null ? "the current table" : "end snapshot " + toSnapshotId);
+    }
+
+    return new OffsetRange(
+        fromSnapshotInclusiveSeq, Math.max(fromSnapshotInclusiveSeq, toSnapshotExclusiveSeq));
+  }
+
+  @NewTracker
+  public RestrictionTracker<OffsetRange, Long> newTracker(@Restriction OffsetRange restriction) {
+    if (restriction.getTo() == POLL_FOREVER) {
+      return new GrowableOffsetRangeTracker(
+          restriction.getFrom(), this::estimateCurrentRangeEndExclusive);
+    }
+
+    return new OffsetRangeTracker(restriction);
+  }
+
+  private long estimateCurrentRangeEndExclusive() {
+    TableCache.setup(scanConfig);
+    Table table = TableCache.get(scanConfig.getTableIdentifier());
+
+    @Nullable Long toSnapshotId = ReadUtils.getToSnapshot(table, scanConfig);
+    if (toSnapshotId != null) {
+      @Nullable Snapshot toSnapshot = table.snapshot(toSnapshotId);
+      return toSnapshot == null ? Long.MIN_VALUE : toSnapshot.sequenceNumber() + 1;
+    }
+
+    @Nullable Snapshot current = table.currentSnapshot();
+    return current == null ? Long.MIN_VALUE : current.sequenceNumber() + 1;
+  }
+
+  @GetRestrictionCoder
+  public Coder<OffsetRange> restrictionCoder() {
+    return new OffsetRange.Coder();
+  }
+
+  @GetInitialWatermarkEstimatorState
+  public Instant initialWatermarkState() {
+    return BoundedWindow.TIMESTAMP_MIN_VALUE;
+  }
+
+  @NewWatermarkEstimator
+  public ManualWatermarkEstimator<Instant> newWatermarkEstimator(
+      @WatermarkEstimatorState Instant state) {
+    return new Manual(state);
+  }
+
+  @ProcessElement
+  public ProcessContinuation process(
+      RestrictionTracker<OffsetRange, Long> tracker,
+      ManualWatermarkEstimator<Instant> watermark,
+      OutputReceiver<Long> out) {
+    TableCache.setup(scanConfig);
+    Table table = TableCache.getRefreshed(scanConfig.getTableIdentifier());
+
+    @Nullable Long userToSnapshotId = ReadUtils.getToSnapshot(table, scanConfig);
+    boolean bounded = userToSnapshotId != null;
+
+    @Nullable Snapshot current = table.currentSnapshot();
+    if (current == null) {
+      // no snapshots yet.
+      LOG.info("Skipping scan: table is empty with no snapshots yet");
+      return pauseOrStop(watermark, bounded);
+    }
+
+    // Resolve the upper bound: user-specified bounded mode, or "current" for unbounded.
+    long toSnapshotId;
+    long toSnapshotSeq;
+    if (userToSnapshotId != null) {
+      toSnapshotId = userToSnapshotId;
+      toSnapshotSeq =
+          Preconditions.checkStateNotNull(
+                  table.snapshot(userToSnapshotId),
+                  "Configured toSnapshotId %s does not exist",
+                  userToSnapshotId)
+              .sequenceNumber();
+    } else {
+      toSnapshotId = current.snapshotId();
+      toSnapshotSeq = current.sequenceNumber();
+    }
+
+    long nextSeqInclusive = tracker.currentRestriction().getFrom();
+    if (toSnapshotSeq < nextSeqInclusive) {
+      // Nothing new since last poll.
+      LOG.info("Skipping scan: nothing new since last poll.");
+      return pauseOrStop(watermark, bounded);
+    }
+
+    // Collect snapshots in [nextSeqInclusive, toSnapshotSeq] chronologically
+    String tableId = scanConfig.getTableIdentifier();
+    List<SnapshotInfo> fresh = snapshotsAfter(table, tableId, nextSeqInclusive, toSnapshotId);
+    LOG.info("Collected snapshots: {}", fresh);
+
+    for (SnapshotInfo snap : fresh) {
+      if (!tracker.tryClaim(snap.getSequenceNumber())) {
+        return ProcessContinuation.stop();
+      }
+      Instant ts = Instant.ofEpochMilli(snap.getTimestampMillis());
+      out.outputWithTimestamp(snap.getSnapshotId(), ts);
+
+      if (watermark.currentWatermark().isBefore(ts)) {
+        watermark.setWatermark(ts);
+      }
+      snapshotsEmitted.inc();
+      latestEmittedSnapshotId.set(snap.getSnapshotId());
+      LOG.info(
+          "Emitted snapshot {} (sequence id: {}, commit ts: {})",
+          snap.getSnapshotId(),
+          snap.getSequenceNumber(),
+          ts);
+    }
+
+    return pauseOrStop(watermark, bounded);
+  }
+
+  /**
+   * On an empty poll, bump the watermark to {@code now() - MAX_SNAPSHOT_DISCOVERY_DELAY} so
+   * downstream windows can still fire. Returns {@code stop()} when end snapshot has been reached,
+   * otherwise {@code resume()} after the poll interval.
+   */
+  private ProcessContinuation pauseOrStop(
+      ManualWatermarkEstimator<Instant> watermark, boolean bounded) {
+    Duration delay =
+        MoreObjects.firstNonNull(
+            scanConfig.getMaxSnapshotDiscoveryDelay(), MAX_SNAPSHOT_DISCOVERY_DELAY);
+    Instant idleWatermark = Instant.now().minus(delay);
+    if (watermark.currentWatermark().isBefore(idleWatermark)) {
+      LOG.info(
+          "Sitting idle for {} seconds. Bumping watermark to {}",
+          TimeUnit.MILLISECONDS.toSeconds(
+              Instant.now().getMillis() - watermark.currentWatermark().getMillis()),
+          idleWatermark);
+      watermark.setWatermark(idleWatermark);
+    }
+    return bounded
+        ? ProcessContinuation.stop()
+        : ProcessContinuation.resume().withResumeDelay(pollInterval);
+  }
+
+  /**
+   * Returns snapshots with sequence number in {@code [nextSeqInclusive, toSnapshotSeq]}, keyed off
+   * the lineage ending at {@code toSnapshotId}.
+   */
+  @SuppressWarnings("return") // ancestorsOf accepts null returns as a "stop" signal
+  static List<SnapshotInfo> snapshotsAfter(
+      Table table, String tableIdentifier, long nextSeqInclusive, long toSnapshotId) {
+
+    List<SnapshotInfo> snapshots = new ArrayList<>();
+    // ancestorsOf returns an iterable of snapshots looking backwards.
+    // we'll need to reverse it to process snapshots chronologically.
+    for (Snapshot snapshot :
+        SnapshotUtil.ancestorsOf(
+            toSnapshotId, snapshotId -> snapshotId != null ? table.snapshot(snapshotId) : null)) {
+      if (snapshot.sequenceNumber() < nextSeqInclusive) {
+        break;
+      }
+      snapshots.add(SnapshotInfo.fromSnapshot(snapshot, tableIdentifier));
+    }
+    Collections.reverse(snapshots);
+    return snapshots;
+  }
+}
diff --git a/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/package-info.java b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/package-info.java
new file mode 100644
index 000000000000..8285d91689be
--- /dev/null
+++ b/sdks/java/io/iceberg/src/main/java/org/apache/beam/sdk/io/iceberg/cdc/package-info.java
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Iceberg CDC connectors. */
+package org.apache.beam.sdk.io.iceberg.cdc;
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/ReadUtilsTest.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/ReadUtilsTest.java
index 73a0fd19e893..6287a6e06197 100644
--- a/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/ReadUtilsTest.java
+++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/ReadUtilsTest.java
@@ -40,7 +40,6 @@
 import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.io.CloseableIterable;
-import org.apache.iceberg.parquet.ParquetReader;
 import org.checkerframework.checker.nullness.qual.Nullable;
 import org.junit.ClassRule;
 import org.junit.Rule;
@@ -75,14 +74,25 @@ public void testCreateReader() throws IOException {
           .commit();
     }
 
+    IcebergScanConfig scanConfig =
+        IcebergScanConfig.builder()
+            .setCatalogConfig(
+                IcebergCatalogConfig.builder()
+                    .setCatalogProperties(
+                        ImmutableMap.of("type", "hadoop", "warehouse", warehouse.location))
+                    .build())
+            .setTableIdentifier(tableId)
+            .setSchema(IcebergUtils.icebergSchemaToBeamSchema(simpleTable.schema()))
+            .build();
+
     int numFiles = 0;
     try (CloseableIterable<CombinedScanTask> iterable = simpleTable.newScan().planTasks()) {
       for (CombinedScanTask combinedScanTask : iterable) {
         for (FileScanTask fileScanTask : combinedScanTask.tasks()) {
           String fileName = Iterables.getLast(Splitter.on("/").split(fileScanTask.file().path()));
           List<Record> recordsRead = new ArrayList<>();
-          try (ParquetReader<Record> reader =
-              ReadUtils.createReader(fileScanTask, simpleTable, simpleTable.schema())) {
+          try (CloseableIterable<Record> reader =
+              ReadUtils.createReader(fileScanTask, simpleTable, scanConfig)) {
             reader.forEach(recordsRead::add);
           }
 
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/SerializableDataFileTest.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/SerializableDataFileTest.java
index d4e7793718d8..5126822c06f6 100644
--- a/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/SerializableDataFileTest.java
+++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/SerializableDataFileTest.java
@@ -61,6 +61,9 @@ public class SerializableDataFileTest {
           .add("nanValueCounts")
           .add("lowerBounds")
           .add("upperBounds")
+          .add("dataSequenceNumber")
+          .add("fileSequenceNumber")
+          .add("firstRowId")
           .build();
 
   @Test
diff --git a/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/cdc/DeleteReaderTest.java b/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/cdc/DeleteReaderTest.java
new file mode 100644
index 000000000000..32d0893fa06f
--- /dev/null
+++ b/sdks/java/io/iceberg/src/test/java/org/apache/beam/sdk/io/iceberg/cdc/DeleteReaderTest.java
@@ -0,0 +1,302 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.iceberg.cdc;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.DeleteFile;
+import org.apache.iceberg.FileMetadata;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.data.DeleteLoader;
+import org.apache.iceberg.data.GenericRecord;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.deletes.PositionDeleteIndex;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.StructLikeSet;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Verifies that {@link DeleteReader#read} returns the <b>union</b> of records matched by position
+ * and equality deletes.
+ *
+ * <p>The tests stub the {@link DeleteLoader} so we exercise the predicate-composition logic
+ * directly without writing real delete files. End-to-end is covered by other tests.
+ */
+@RunWith(JUnit4.class)
+public class DeleteReaderTest {
+  private static final Schema TABLE_SCHEMA =
+      new Schema(
+          Types.NestedField.required(1, "id", Types.IntegerType.get()),
+          Types.NestedField.required(2, "name", Types.StringType.get()));
+
+  private static final DeleteFile POS_FILE =
+      FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned())
+          .ofPositionDeletes()
+          .withPath("/test/pos.parquet")
+          .withFileSizeInBytes(100)
+          .withRecordCount(3)
+          .build();
+
+  private static final DeleteFile EQ_FILE_ID =
+      FileMetadata.deleteFileBuilder(PartitionSpec.unpartitioned())
+          .ofEqualityDeletes(1)
+          .withPath("/test/eq.parquet")
+          .withFileSizeInBytes(100)
+          .withRecordCount(2)
+          .build();
+
+  // ------------------------------------------------------------------------
+  // Test infrastructure
+  // ------------------------------------------------------------------------
+
+  /** {@link DeleteReader} that returns a stubbed {@link DeleteLoader} for tests. */
+  private static class StubDeleteReader extends DeleteReader<Record> {
+    private final DeleteLoader stub;
+
+    StubDeleteReader(List<DeleteFile> deletes, DeleteLoader stub) {
+      super(
+          "/test/data.parquet",
+          deletes,
+          TABLE_SCHEMA,
+          TABLE_SCHEMA,
+          true,
+          PreloadedDeletes.empty());
+      this.stub = stub;
+    }
+
+    StubDeleteReader(
+        List<DeleteFile> deletes,
+        DeleteLoader stub,
+        DeleteReader.PreloadedDeletes preloadedDeletes) {
+      super("/test/data.parquet", deletes, TABLE_SCHEMA, TABLE_SCHEMA, true, preloadedDeletes);
+      this.stub = stub;
+    }
+
+    @Override
+    protected StructLike asStructLike(Record record) {
+      return record;
+    }
+
+    @Override
+    protected InputFile getInputFile(String location) {
+      throw new UnsupportedOperationException("not used with a stubbed DeleteLoader");
+    }
+
+    @Override
+    protected DeleteLoader newDeleteLoader() {
+      return stub;
+    }
+  }
+
+  /** {@link DeleteLoader} that returns pre-built indexes. Unused arms return empty. */
+  private static class StubLoader implements DeleteLoader {
+    private final PositionDeleteIndex posIndex;
+    private final StructLikeSet eqSet;
+    private int posLoadCount = 0;
+    private int eqLoadCount = 0;
+
+    StubLoader(PositionDeleteIndex posIndex, StructLikeSet eqSet) {
+      this.posIndex = posIndex;
+      this.eqSet = eqSet;
+    }
+
+    @Override
+    public PositionDeleteIndex loadPositionDeletes(Iterable<DeleteFile> files, CharSequence path) {
+      posLoadCount++;
+      return posIndex;
+    }
+
+    @Override
+    public StructLikeSet loadEqualityDeletes(Iterable<DeleteFile> files, Schema schema) {
+      eqLoadCount++;
+      return eqSet;
+    }
+  }
+
+  /** A minimal HashSet-backed {@link PositionDeleteIndex} for tests. */
+  private static PositionDeleteIndex posIndexOf(long... positions) {
+    Set<Long> backing = new HashSet<>();
+    for (long p : positions) {
+      backing.add(p);
+    }
+    return new PositionDeleteIndex() {
+      @Override
+      public void delete(long pos) {
+        backing.add(pos);
+      }
+
+      @Override
+      public void delete(long from, long to) {
+        for (long p = from; p < to; p++) {
+          backing.add(p);
+        }
+      }
+
+      @Override
+      public boolean isDeleted(long pos) {
+        return backing.contains(pos);
+      }
+
+      @Override
+      public boolean isEmpty() {
+        return backing.isEmpty();
+      }
+
+      @Override
+      public long cardinality() {
+        return backing.size();
+      }
+    };
+  }
+
+  private static StructLikeSet eqSetOfIds(int... ids) {
+    Schema idSchema = TABLE_SCHEMA.select("id");
+    StructLikeSet set = StructLikeSet.create(idSchema.asStruct());
+    for (int id : ids) {
+      GenericRecord r = GenericRecord.create(idSchema);
+      r.setField("id", id);
+      set.add(r);
+    }
+    return set;
+  }
+
+  /** Builds N records (id=0..N-1, name="v0".."vN-1") matching {@code readSchema}. */
+  private static List<Record> records(Schema readSchema, int n) {
+    boolean hasPos = readSchema.findField("_pos") != null;
+    List<Record> recs = new ArrayList<>(n);
+    for (long i = 0; i < n; i++) {
+      GenericRecord r = GenericRecord.create(readSchema);
+      r.setField("id", (int) i);
+      r.setField("name", "v" + i);
+      if (hasPos) {
+        r.setField("_pos", i);
+      }
+      recs.add(r);
+    }
+    return recs;
+  }
+
+  /** Sorted list of "id" values from the output, for stable assertions. */
+  private static List<Integer> idsOf(CloseableIterable<Record> records) {
+    return ImmutableList.copyOf(records).stream()
+        .map(r -> (Integer) r.getField("id"))
+        .sorted()
+        .collect(Collectors.toList());
+  }
+
+  /** With no delete files at all, {@code read()} emits nothing. */
+  @Test
+  public void noDeletesEmitsNothing() {
+    StubLoader loader = new StubLoader(posIndexOf(), eqSetOfIds());
+    StubDeleteReader reader = new StubDeleteReader(Collections.emptyList(), loader);
+    List<Record> input = records(reader.requiredSchema(), 5);
+
+    CloseableIterable<Record> output = reader.read(CloseableIterable.withNoopClose(input));
+
+    assertEquals(Collections.emptyList(), idsOf(output));
+  }
+
+  /** Pos-only emits only the pos-deleted records. */
+  @Test
+  public void posOnlyEmitsPosDeletedRecords() {
+    StubLoader loader = new StubLoader(posIndexOf(1L, 3L), eqSetOfIds());
+    StubDeleteReader reader = new StubDeleteReader(ImmutableList.of(POS_FILE), loader);
+    List<Record> input = records(reader.requiredSchema(), 5);
+
+    CloseableIterable<Record> output = reader.read(CloseableIterable.withNoopClose(input));
+
+    assertEquals(ImmutableList.of(1, 3), idsOf(output));
+  }
+
+  /** Only equality deletes, emits records matching the eq set. */
+  @Test
+  public void eqOnlyEmitsEqDeletedRecords() {
+    StubLoader loader = new StubLoader(posIndexOf(), eqSetOfIds(2, 4));
+    StubDeleteReader reader = new StubDeleteReader(ImmutableList.of(EQ_FILE_ID), loader);
+    List<Record> input = records(reader.requiredSchema(), 5);
+
+    CloseableIterable<Record> output = reader.read(CloseableIterable.withNoopClose(input));
+
+    assertEquals(ImmutableList.of(2, 4), idsOf(output));
+  }
+
+  /** Pos-deletes plus equality deletes, emit the union without duplication. */
+  @Test
+  public void posAndEqEmitUnion() {
+    StubLoader loader = new StubLoader(posIndexOf(0L, 4L), eqSetOfIds(2, 4));
+    StubDeleteReader reader = new StubDeleteReader(ImmutableList.of(POS_FILE, EQ_FILE_ID), loader);
+    List<Record> input = records(reader.requiredSchema(), 6);
+
+    CloseableIterable<Record> output = reader.read(CloseableIterable.withNoopClose(input));
+
+    // id 4 is in both sides; it must appear exactly once.
+    assertEquals(ImmutableList.of(0, 2, 4), idsOf(output));
+  }
+
+  /** Preloaded position deletes are reused instead of loading the same delete files again. */
+  @Test
+  public void preloadedPositionDeletesAvoidSecondLoad() {
+    StubLoader loader = new StubLoader(posIndexOf(), eqSetOfIds());
+    PositionDeleteIndex preloadedPosIndex = posIndexOf(1L, 3L);
+    StubDeleteReader reader =
+        new StubDeleteReader(
+            ImmutableList.of(POS_FILE),
+            loader,
+            DeleteReader.PreloadedDeletes.of(preloadedPosIndex, Collections.emptyMap()));
+    List<Record> input = records(reader.requiredSchema(), 5);
+
+    CloseableIterable<Record> output = reader.read(CloseableIterable.withNoopClose(input));
+
+    assertEquals(ImmutableList.of(1, 3), idsOf(output));
+    assertEquals(0, loader.posLoadCount);
+  }
+
+  /** Preloaded equality delete sets are reused instead of loading the same delete files again. */
+  @Test
+  public void preloadedEqualityDeletesAvoidSecondLoad() {
+    StubLoader loader = new StubLoader(posIndexOf(), eqSetOfIds());
+    Map<Set<Integer>, StructLikeSet> preloadedEqSets = new HashMap<>();
+    preloadedEqSets.put(Collections.singleton(1), eqSetOfIds(2, 4));
+    StubDeleteReader reader =
+        new StubDeleteReader(
+            ImmutableList.of(EQ_FILE_ID),
+            loader,
+            DeleteReader.PreloadedDeletes.of(null, preloadedEqSets));
+    List<Record> input = records(reader.requiredSchema(), 5);
+
+    CloseableIterable<Record> output = reader.read(CloseableIterable.withNoopClose(input));
+
+    assertEquals(ImmutableList.of(2, 4), idsOf(output));
+    assertEquals(0, loader.eqLoadCount);
+  }
+}