Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .palantir/revapi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1369,7 +1369,20 @@ acceptedBreaks:
old: "class org.apache.iceberg.encryption.EncryptingFileIO"
new: "class org.apache.iceberg.encryption.EncryptingFileIO"
justification: "New method for Manifest List reading"
- code: "java.method.addedToInterface"
new: "method org.apache.iceberg.UpdateSchema org.apache.iceberg.UpdateSchema::undeleteColumn(java.lang.String,\
\ boolean)"
justification: "Adds an expected new spark function for undeleting a column.\
\ This is backwards-compatible."
org.apache.iceberg:iceberg-core:
- code: "java.class.defaultSerializationChanged"
old: "class org.apache.iceberg.avro.SupportsIndexProjection"
new: "class org.apache.iceberg.avro.SupportsIndexProjection"
justification: "Serialization across versions is not guaranteed"
- code: "java.class.defaultSerializationChanged"
old: "class org.apache.iceberg.hadoop.SerializableConfiguration"
new: "class org.apache.iceberg.hadoop.SerializableConfiguration"
justification: "Serialization across versions is not guaranteed"
- code: "java.class.noLongerInheritsFromClass"
old: "class org.apache.iceberg.rest.auth.OAuth2Manager"
new: "class org.apache.iceberg.rest.auth.OAuth2Manager"
Expand Down
19 changes: 19 additions & 0 deletions api/src/main/java/org/apache/iceberg/UpdateSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,25 @@ default UpdateSchema updateColumnDefault(String name, Literal<?> newDefault) {
*/
UpdateSchema deleteColumn(String name);

/**
* Restore a previously deleted column from the schema history.
*
* <p>The name is used to search for the column in historical schemas. The column is restored with
* its original field ID and nullability, preserving data file compatibility.
*
* <p>If the original column was required, this method verifies that no data has been written to
* the table since the column was deleted. If data was written, the undelete fails because the
* restored required column cannot satisfy its non-null constraint for rows added after deletion.
* Pass {@code setNullable=true} to bypass this check and restore the column as optional instead.
*
* @param name name of the column to restore (supports dot notation for nested fields)
* @param setNullable if true, restore as optional even if the original was required
* @return this for method chaining
* @throws IllegalArgumentException if name already exists, was never deleted, parent struct does
* not exist, or the column was originally required and data was written after deletion
*/
UpdateSchema undeleteColumn(String name, boolean setNullable);

/**
* Move a column from its current position to the start of the schema or its parent struct.
*
Expand Down
148 changes: 136 additions & 12 deletions core/src/main/java/org/apache/iceberg/SchemaUpdate.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.iceberg;

import java.util.Collection;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
Expand Down Expand Up @@ -121,17 +122,7 @@ private void internalAddColumn(
if (parent != null) {
Types.NestedField parentField = findField(parent);
Preconditions.checkArgument(parentField != null, "Cannot find parent struct: %s", parent);
Type parentType = parentField.type();
if (parentType.isNestedType()) {
Type.NestedType nested = parentType.asNestedType();
if (nested.isMapType()) {
// fields are added to the map value type
parentField = nested.asMapType().fields().get(1);
} else if (nested.isListType()) {
// fields are added to the element type
parentField = nested.asListType().fields().get(0);
}
}
parentField = getNestedParentField(parentField);
Preconditions.checkArgument(
parentField.type().isNestedType() && parentField.type().asNestedType().isStructType(),
"Cannot add to non-struct column: %s: %s",
Expand Down Expand Up @@ -200,6 +191,140 @@ public UpdateSchema deleteColumn(String name) {
return this;
}

@Override
public UpdateSchema undeleteColumn(String name, boolean setNullable) {
Types.NestedField existingField = findField(name);
Preconditions.checkArgument(
existingField == null,
"Cannot undelete column '%s': a column with this name already exists in the current schema",
name);

Preconditions.checkArgument(
base != null,
"Cannot undelete column: table metadata is required to access historical schemas");

DeletedColumnInfo deletedInfo = findDeletedColumn(name);
Preconditions.checkArgument(
deletedInfo != null,
"Cannot undelete column '%s': column not found in any historical schema",
name);

int parentId = deletedInfo.parentId();
Types.NestedField originalField = deletedInfo.field();

Types.NestedField field;
if (setNullable || originalField.isOptional()) {
field = originalField.asOptional();
} else {
Preconditions.checkArgument(
!dataWrittenSinceDeletion(originalField.fieldId()),
"Cannot undelete required column '%s': data was written after the column was deleted. "
+ "Pass setNullable=true to restore the column as optional.",
name);
field = originalField;
}

if (parentId != TABLE_ROOT_ID) {
idToParent.put(field.fieldId(), parentId);
}

updates.put(field.fieldId(), field);
parentToAddedIds.put(parentId, field.fieldId());
addedNameToId.put(name, field.fieldId());

return this;
}

/**
* Returns true if any snapshot after the column's last presence added data files. Walks snapshots
* newest-first: once we hit a snapshot whose schema contains the field, no later snapshot wrote
* data without it.
*/
private boolean dataWrittenSinceDeletion(int fieldId) {
List<Snapshot> snapshots = Lists.newArrayList(base.snapshots());
snapshots.sort(Comparator.comparingLong(Snapshot::sequenceNumber).reversed());
Map<Integer, Schema> schemasById = base.schemasById();

for (Snapshot snapshot : snapshots) {
Integer schemaId = snapshot.schemaId();
if (schemaId != null && schemasById.get(schemaId).findField(fieldId) != null) {
return false;
}
Map<String, String> summary = snapshot.summary();
String added = summary != null ? summary.get(SnapshotSummary.ADDED_FILES_PROP) : null;
if (added != null && !"0".equals(added)) {
return true;
}
}
return false;
}

private record DeletedColumnInfo(int parentId, Types.NestedField field) {}

/** Find the first instance of the deleted column, from most recent to oldest. */
private DeletedColumnInfo findDeletedColumn(String name) {
List<Schema> schemas = base.schemas();

String[] parts = name.split("\\.");
String parentPath =
parts.length > 1
? String.join(".", java.util.Arrays.copyOf(parts, parts.length - 1))
: null;

if (parentPath != null) {
Types.NestedField currentParent = findField(parentPath);
Preconditions.checkArgument(
currentParent != null,
"Cannot undelete nested column '%s': parent struct '%s' does not exist in current schema. "
+ "Undelete the parent first.",
name,
parentPath);
}

for (int i = schemas.size() - 1; i >= 0; i--) {
Schema historicalSchema = schemas.get(i);

Types.NestedField field =
caseSensitive
? historicalSchema.findField(name)
: historicalSchema.caseInsensitiveFindField(name);

if (field != null) {
int parentId;
if (parentPath != null) {
Types.NestedField parentField =
caseSensitive
? historicalSchema.findField(parentPath)
: historicalSchema.caseInsensitiveFindField(parentPath);

parentField = getNestedParentField(parentField);
parentId = parentField.fieldId();
} else {
parentId = TABLE_ROOT_ID;
}

return new DeletedColumnInfo(parentId, field);
}
}

return null;
}

// Properly pull out the type of the parent field from a map or array
private Types.NestedField getNestedParentField(Types.NestedField parentField) {
Types.NestedField nestedParentField = parentField;
Type parentType = parentField.type();
if (parentType.isNestedType()) {
Type.NestedType nested = parentType.asNestedType();
if (nested.isMapType()) {
nestedParentField = nested.asMapType().fields().get(1);
} else if (nested.isListType()) {
nestedParentField = nested.asListType().fields().get(0);
}
}
return nestedParentField;
}

@Override
public UpdateSchema renameColumn(String name, String newName) {
Types.NestedField field = findField(name);
Expand Down Expand Up @@ -561,7 +686,6 @@ private static Schema applyChanges(
}
}

// apply schema changes
Types.StructType struct =
TypeUtil.visit(schema, new ApplyChanges(deletes, updates, parentToAddedIds, moves))
.asNestedType()
Expand Down
Loading
Loading