-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Spark 3.3: Support write to branch through table identifier #6965
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
080cf36
c15a7f9
a14d1d4
a51e1ba
57416c2
ae28f5c
e3a3698
b7050d6
784f2d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -397,6 +397,53 @@ public static Schema schemaFor(Table table, Long snapshotId, Long timestampMilli | |
| return table.schema(); | ||
| } | ||
|
|
||
| /** | ||
| * Return the schema of the snapshot at a given branch. | ||
| * | ||
| * <p>If branch does not exist, the table schema is returned because it will be the schema when | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic here for both finding schema and latest snapshot allows branch to be non existing. The reason I decided to do this way is because the core library will still allow auto-creation of branch, so it makes more sense to support that case for these util methods. We only block writing to non-existing branch through table identifier in Spark module, but we will support other cases like WAP branch that will leverage the core feature.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. We should block at the write, not in the helper methods.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense to me too. |
||
| * the new branch is created. | ||
| * | ||
| * @param table a {@link Table} | ||
| * @param branch branch name of the table (nullable) | ||
| * @return schema of the specific snapshot at the given branch | ||
| */ | ||
| public static Schema schemaFor(Table table, String branch) { | ||
| if (branch == null || branch.equals(SnapshotRef.MAIN_BRANCH)) { | ||
| return table.schema(); | ||
| } | ||
|
|
||
| Snapshot ref = table.snapshot(branch); | ||
| if (ref == null) { | ||
| return table.schema(); | ||
| } | ||
|
|
||
| return schemaFor(table, ref.snapshotId()); | ||
| } | ||
|
|
||
| /** | ||
| * Return the schema of the snapshot at a given branch. | ||
| * | ||
| * <p>If branch does not exist, the table schema is returned because it will be the schema when | ||
| * the new branch is created. | ||
| * | ||
| * @param metadata a {@link TableMetadata} | ||
| * @param branch branch name of the table (nullable) | ||
| * @return schema of the specific snapshot at the given branch | ||
| */ | ||
| public static Schema schemaFor(TableMetadata metadata, String branch) { | ||
| if (branch == null || branch.equals(SnapshotRef.MAIN_BRANCH)) { | ||
| return metadata.schema(); | ||
| } | ||
|
|
||
| SnapshotRef ref = metadata.ref(branch); | ||
| if (ref == null) { | ||
| return metadata.schema(); | ||
| } | ||
|
|
||
| Snapshot snapshot = metadata.snapshot(ref.snapshotId()); | ||
| return metadata.schemas().get(snapshot.schemaId()); | ||
| } | ||
|
|
||
| /** | ||
| * Fetch the snapshot at the head of the given branch in the given table. | ||
| * | ||
|
|
@@ -405,11 +452,11 @@ public static Schema schemaFor(Table table, Long snapshotId, Long timestampMilli | |
| * code path to ensure backwards compatibility. | ||
| * | ||
| * @param table a {@link Table} | ||
| * @param branch branch name of the table | ||
| * @param branch branch name of the table (nullable) | ||
| * @return the latest snapshot for the given branch | ||
| */ | ||
| public static Snapshot latestSnapshot(Table table, String branch) { | ||
| if (branch.equals(SnapshotRef.MAIN_BRANCH)) { | ||
| if (branch == null || branch.equals(SnapshotRef.MAIN_BRANCH)) { | ||
| return table.currentSnapshot(); | ||
| } | ||
|
|
||
|
|
@@ -423,15 +470,23 @@ public static Snapshot latestSnapshot(Table table, String branch) { | |
| * TableMetadata#ref(String)}} for the main branch so that existing code still goes through the | ||
| * old code path to ensure backwards compatibility. | ||
| * | ||
| * <p>If branch does not exist, the table's latest snapshot is returned it will be the schema when | ||
| * the new branch is created. | ||
| * | ||
| * @param metadata a {@link TableMetadata} | ||
| * @param branch branch name of the table metadata | ||
| * @param branch branch name of the table metadata (nullable) | ||
| * @return the latest snapshot for the given branch | ||
| */ | ||
| public static Snapshot latestSnapshot(TableMetadata metadata, String branch) { | ||
| if (branch.equals(SnapshotRef.MAIN_BRANCH)) { | ||
| if (branch == null || branch.equals(SnapshotRef.MAIN_BRANCH)) { | ||
| return metadata.currentSnapshot(); | ||
| } | ||
|
|
||
| SnapshotRef ref = metadata.ref(branch); | ||
| if (ref == null) { | ||
| return metadata.currentSnapshot(); | ||
| } | ||
|
|
||
| return metadata.snapshot(metadata.ref(branch).snapshotId()); | ||
| return metadata.snapshot(ref.snapshotId()); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch with the schema.