apache · szehon-ho · Jun 23, 2023 · May 10, 2023 · May 11, 2023 · May 23, 2023
diff --git a/core/src/main/java/org/apache/iceberg/PartitionsTable.java b/core/src/main/java/org/apache/iceberg/PartitionsTable.java
@@ -49,6 +49,16 @@ public class PartitionsTable extends BaseMetadataTable {
         new Schema(
             Types.NestedField.required(1, "partition", Partitioning.partitionType(table)),
             Types.NestedField.required(4, "spec_id", Types.IntegerType.get()),
+            Types.NestedField.required(
+                9,
+                "last_updated",
+                Types.TimestampType.withZone(),
+                "Partition last updated timestamp"),
+            Types.NestedField.required(
+                10,
+                "last_updated_snapshot_id",
+                Types.LongType.get(),
+                "Partition last updated snapshot id"),
             Types.NestedField.required(
                 2, "record_count", Types.LongType.get(), "Count of records in data files"),
             Types.NestedField.required(
@@ -85,6 +95,8 @@ public TableScan newScan() {
   public Schema schema() {
     if (unpartitionedTable) {
       return schema.select(
+          "last_updated",
+          "last_updated_snapshot_id",
           "record_count",
           "file_count",
           "position_delete_record_count",
@@ -111,6 +123,8 @@ private DataTask task(StaticTableScan scan) {
           partitions,
           root ->
               StaticDataTask.Row.of(
+                  root.lastUpdatedAt,
+                  root.lastUpdatedSnapshotId,
                   root.dataRecordCount,
                   root.dataFileCount,
                   root.posDeleteRecordCount,
@@ -131,6 +145,8 @@ private static StaticDataTask.Row convertPartition(Partition partition) {
     return StaticDataTask.Row.of(
         partition.partitionData,
         partition.specId,
+        partition.lastUpdatedAt,
+        partition.lastUpdatedSnapshotId,
         partition.dataRecordCount,
         partition.dataFileCount,
         partition.posDeleteRecordCount,
@@ -142,13 +158,14 @@ private static StaticDataTask.Row convertPartition(Partition partition) {
   private static Iterable<Partition> partitions(Table table, StaticTableScan scan) {
     Types.StructType partitionType = Partitioning.partitionType(table);
     PartitionMap partitions = new PartitionMap(partitionType);
-
-    try (CloseableIterable<ContentFile<?>> files = planFiles(scan)) {
-      for (ContentFile<?> file : files) {
+    try (CloseableIterable<ManifestEntry<? extends ContentFile<?>>> entries = planEntries(scan)) {
+      for (ManifestEntry<? extends ContentFile<?>> entry : entries) {
+        Snapshot snapshot = table.snapshot(entry.snapshotId());
+        ContentFile<?> file = entry.file();
         StructLike partition =
             PartitionUtil.coercePartition(
                 partitionType, table.specs().get(file.specId()), file.partition());
-        partitions.get(partition).update(file);
+        partitions.get(partition).update(file, snapshot);
       }
     } catch (IOException e) {
       throw new UncheckedIOException(e);
@@ -158,25 +175,32 @@ private static Iterable<Partition> partitions(Table table, StaticTableScan scan)
   }
 
   @VisibleForTesting
-  static CloseableIterable<ContentFile<?>> planFiles(StaticTableScan scan) {
+  static CloseableIterable<ManifestEntry<?>> planEntries(StaticTableScan scan) {
     Table table = scan.table();
 
     CloseableIterable<ManifestFile> filteredManifests =
         filteredManifests(scan, table, scan.snapshot().allManifests(table.io()));
 
-    Iterable<CloseableIterable<ContentFile<?>>> tasks =
-        CloseableIterable.transform(
-            filteredManifests,
-            manifest ->
-                CloseableIterable.transform(
-                    ManifestFiles.open(manifest, table.io(), table.specs())
-                        .caseSensitive(scan.isCaseSensitive())
-                        .select(scanColumns(manifest.content())), // don't select stats columns
-                    t -> (ContentFile<?>) t));
+    Iterable<CloseableIterable<ManifestEntry<?>>> tasks =
+        CloseableIterable.transform(filteredManifests, manifest -> readEntries(manifest, scan));
 
     return new ParallelIterable<>(tasks, scan.planExecutor());
   }
 
+  private static CloseableIterable<ManifestEntry<?>> readEntries(
+      ManifestFile manifest, StaticTableScan scan) {
+    Table table = scan.table();
+    return CloseableIterable.transform(
+        ManifestFiles.open(manifest, table.io(), table.specs())
+            .caseSensitive(scan.isCaseSensitive())
+            .select(scanColumns(manifest.content())) // don't select stats columns
+            .entries(),
+        t ->
+            (ManifestEntry<? extends ContentFile<?>>)
+                // defensive copy of manifest entry without stats columns
+                t.copyWithoutStats());
+  }
+
   private static List<String> scanColumns(ManifestContent content) {
     switch (content) {
       case DATA:
@@ -249,19 +273,28 @@ static class Partition {
     private int posDeleteFileCount;
     private long eqDeleteRecordCount;
     private int eqDeleteFileCount;
+    private long lastUpdatedAt;
+    private long lastUpdatedSnapshotId;
 
     Partition(StructLike key, Types.StructType keyType) {
       this.partitionData = toPartitionData(key, keyType);
       this.specId = 0;
-      this.dataRecordCount = 0;
+      this.dataRecordCount = 0L;
       this.dataFileCount = 0;
-      this.posDeleteRecordCount = 0;
+      this.posDeleteRecordCount = 0L;
       this.posDeleteFileCount = 0;
-      this.eqDeleteRecordCount = 0;
+      this.eqDeleteRecordCount = 0L;
       this.eqDeleteFileCount = 0;
+      this.lastUpdatedAt = 0L;
+      this.lastUpdatedSnapshotId = 0L;
     }
 
-    void update(ContentFile<?> file) {
+    void update(ContentFile<?> file, Snapshot snapshot) {
+      long snapshotCommitTime = snapshot == null ? 0 : snapshot.timestampMillis() * 1000;
+      if (snapshotCommitTime > this.lastUpdatedAt) {
+        this.lastUpdatedAt = snapshotCommitTime;
+        this.lastUpdatedSnapshotId = snapshot.snapshotId();
+      }
       switch (file.content()) {
         case DATA:
           this.dataRecordCount += file.recordCount();

diff --git a/core/src/test/java/org/apache/iceberg/MetadataTableScanTestBase.java b/core/src/test/java/org/apache/iceberg/MetadataTableScanTestBase.java
@@ -81,18 +81,20 @@ protected void validateTaskScanResiduals(TableScan scan, boolean ignoreResiduals
   }
 
   protected void validateSingleFieldPartition(
-      CloseableIterable<ContentFile<?>> files, int partitionValue) {
+      CloseableIterable<ManifestEntry<? extends ContentFile<?>>> files, int partitionValue) {
     validatePartition(files, 0, partitionValue);
   }
 
   protected void validatePartition(
-      CloseableIterable<ContentFile<?>> files, int position, int partitionValue) {
+      CloseableIterable<ManifestEntry<? extends ContentFile<?>>> entries,
+      int position,
+      int partitionValue) {
     Assert.assertTrue(
         "File scan tasks do not include correct file",
-        StreamSupport.stream(files.spliterator(), false)
+        StreamSupport.stream(entries.spliterator(), false)
             .anyMatch(
-                file -> {
-                  StructLike partition = file.partition();
+                entry -> {
+                  StructLike partition = entry.file().partition();
                   if (position >= partition.size()) {
                     return false;
                   }