@@ -165,6 +165,7 @@ type MetadataBuilder struct {
165
165
// update tracking
166
166
lastAddedSchemaID * int
167
167
lastAddedPartitionID * int
168
+ lastAddedSortOrderID * int
168
169
}
169
170
170
171
func NewMetadataBuilder () (* MetadataBuilder , error ) {
@@ -384,29 +385,37 @@ func (b *MetadataBuilder) RemoveSnapshots(snapshotIds []int64) error {
384
385
return nil
385
386
}
386
387
387
- func (b * MetadataBuilder ) AddSortOrder (sortOrder * SortOrder , initial bool ) error {
388
+ func (b * MetadataBuilder ) AddSortOrder (sortOrder * SortOrder ) error {
388
389
curSchema := b .CurrentSchema ()
389
390
if curSchema == nil {
390
391
return errors .New ("can't add sort order with no current schema" )
391
392
}
392
393
393
- if err := sortOrder .CheckCompatibility (curSchema ); err != nil {
394
- return fmt .Errorf ("sort order %s is not compatible with current schema: %w" , sortOrder , err )
394
+ newOrderID := b .reuseOrCreateNewSortOrderID (sortOrder )
395
+ if _ , err := b .GetSortOrderByID (newOrderID ); err == nil {
396
+ if b .lastAddedSortOrderID != & newOrderID {
397
+ b .lastAddedSortOrderID = & newOrderID
398
+ sortOrder .orderID = newOrderID
399
+ b .updates = append (b .updates , NewAddSortOrderUpdate (sortOrder ))
400
+ }
401
+
402
+ return nil
395
403
}
404
+ sortOrder .orderID = newOrderID
396
405
397
- var sortOrders [] SortOrder
398
- if ! initial {
399
- sortOrders = append ( sortOrders , b . sortOrderList ... )
406
+ sortOrders := b . sortOrderList
407
+ if err := sortOrder . CheckCompatibility ( curSchema ); err != nil {
408
+ return fmt . Errorf ( "sort order %s is not compatible with current schema: %w" , sortOrder , err )
400
409
}
401
410
402
411
for _ , s := range sortOrders {
403
- if s .OrderID == sortOrder .OrderID {
404
- return fmt .Errorf ("sort order with id %d already exists" , sortOrder .OrderID )
412
+ if s .OrderID () == sortOrder .OrderID () {
413
+ return fmt .Errorf ("sort order with id %d already exists" , sortOrder .orderID )
405
414
}
406
415
}
407
416
408
417
b .sortOrderList = append (sortOrders , * sortOrder )
409
- b .updates = append (b .updates , NewAddSortOrderUpdate (sortOrder , initial ))
418
+ b .updates = append (b .updates , NewAddSortOrderUpdate (sortOrder ))
410
419
411
420
return nil
412
421
}
@@ -459,10 +468,10 @@ func (b *MetadataBuilder) SetCurrentSchemaID(currentSchemaID int) error {
459
468
func (b * MetadataBuilder ) SetDefaultSortOrderID (defaultSortOrderID int ) error {
460
469
if defaultSortOrderID == - 1 {
461
470
defaultSortOrderID = maxBy (b .sortOrderList , func (s SortOrder ) int {
462
- return s .OrderID
471
+ return s .OrderID ()
463
472
})
464
473
if ! slices .ContainsFunc (b .updates , func (u Update ) bool {
465
- return u .Action () == UpdateAddSortOrder && u .(* addSortOrderUpdate ).SortOrder .OrderID == defaultSortOrderID
474
+ return u .Action () == UpdateAddSortOrder && u .(* addSortOrderUpdate ).SortOrder .OrderID () == defaultSortOrderID
466
475
}) {
467
476
return errors .New ("can't set default sort order to last added with no added sort orders" )
468
477
}
@@ -752,7 +761,7 @@ func (b *MetadataBuilder) GetSpecByID(id int) (*iceberg.PartitionSpec, error) {
752
761
753
762
func (b * MetadataBuilder ) GetSortOrderByID (id int ) (* SortOrder , error ) {
754
763
for _ , s := range b .sortOrderList {
755
- if s .OrderID == id {
764
+ if s .OrderID () == id {
756
765
return & s , nil
757
766
}
758
767
}
@@ -847,6 +856,24 @@ func (b *MetadataBuilder) Build() (Metadata, error) {
847
856
}
848
857
}
849
858
859
+ func (b * MetadataBuilder ) reuseOrCreateNewSortOrderID (newOrder * SortOrder ) int {
860
+ if newOrder .IsUnsorted () {
861
+ return UnsortedSortOrder .OrderID ()
862
+ }
863
+
864
+ newOrderID := UnsortedSortOrderID + 1
865
+ for _ , order := range b .sortOrderList {
866
+ if slices .Equal (order .fields , newOrder .fields ) {
867
+ return order .OrderID ()
868
+ }
869
+ if order .OrderID () >= newOrderID {
870
+ newOrderID = order .OrderID () + 1
871
+ }
872
+ }
873
+
874
+ return newOrderID
875
+ }
876
+
850
877
func (b * MetadataBuilder ) reuseOrCreateNewPartitionSpecID (newSpec iceberg.PartitionSpec ) int {
851
878
newSpecID := 0
852
879
for _ , spec := range b .specs {
@@ -1012,10 +1039,13 @@ type commonMetadata struct {
1012
1039
1013
1040
func initCommonMetadataForDeserialization () commonMetadata {
1014
1041
return commonMetadata {
1015
- LastUpdatedMS : - 1 ,
1016
- LastColumnId : - 1 ,
1017
- CurrentSchemaID : - 1 ,
1018
- DefaultSpecID : - 1 ,
1042
+ LastUpdatedMS : - 1 ,
1043
+ LastColumnId : - 1 ,
1044
+ CurrentSchemaID : - 1 ,
1045
+ DefaultSpecID : - 1 ,
1046
+ DefaultSortOrderID : - 1 ,
1047
+ SortOrderList : nil ,
1048
+ Specs : nil ,
1019
1049
}
1020
1050
}
1021
1051
@@ -1135,7 +1165,7 @@ func (c *commonMetadata) CurrentSnapshot() *Snapshot {
1135
1165
func (c * commonMetadata ) SortOrders () []SortOrder { return c .SortOrderList }
1136
1166
func (c * commonMetadata ) SortOrder () SortOrder {
1137
1167
for _ , s := range c .SortOrderList {
1138
- if s .OrderID == c .DefaultSortOrderID {
1168
+ if s .OrderID () == c .DefaultSortOrderID {
1139
1169
return s
1140
1170
}
1141
1171
}
@@ -1217,14 +1247,14 @@ func (c *commonMetadata) checkSortOrders() error {
1217
1247
}
1218
1248
1219
1249
for _ , o := range c .SortOrderList {
1220
- if o .OrderID == c .DefaultSortOrderID {
1250
+ if o .OrderID () == c .DefaultSortOrderID {
1221
1251
if err := o .CheckCompatibility (c .CurrentSchema ()); err != nil {
1222
- return fmt .Errorf ("default sort order %d is not compatible with current schema: %w" , o .OrderID , err )
1252
+ return fmt .Errorf ("default sort order %d is not compatible with current schema: %w" , o .OrderID () , err )
1223
1253
}
1224
1254
1225
1255
return nil
1226
1256
}
1227
- if o .OrderID == UnsortedSortOrderID && len ( o . Fields ) != 0 {
1257
+ if o .OrderID () == UnsortedSortOrderID && o . Len ( ) != 0 {
1228
1258
return fmt .Errorf ("sort order ID %d is reserved for unsorted order" , UnsortedSortOrderID )
1229
1259
}
1230
1260
}
@@ -1246,20 +1276,6 @@ func (c *commonMetadata) constructRefs() {
1246
1276
}
1247
1277
1248
1278
func (c * commonMetadata ) validate () error {
1249
- if err := c .checkSchemas (); err != nil {
1250
- return err
1251
- }
1252
-
1253
- if err := c .checkPartitionSpecs (); err != nil {
1254
- return err
1255
- }
1256
-
1257
- if err := c .checkSortOrders (); err != nil {
1258
- return err
1259
- }
1260
-
1261
- c .constructRefs ()
1262
-
1263
1279
switch {
1264
1280
case c .LastUpdatedMS == 0 :
1265
1281
// last-updated-ms is required
@@ -1269,12 +1285,34 @@ func (c *commonMetadata) validate() error {
1269
1285
return fmt .Errorf ("%w: missing last-column-id" , ErrInvalidMetadata )
1270
1286
case c .CurrentSchemaID < 0 :
1271
1287
return fmt .Errorf ("%w: no valid schema configuration found in table metadata" , ErrInvalidMetadata )
1288
+ case c .SortOrderList == nil && c .FormatVersion > 1 :
1289
+ return fmt .Errorf ("%w: missing sort-orders" , ErrInvalidMetadata )
1290
+ case c .Specs == nil && c .FormatVersion > 1 :
1291
+ return fmt .Errorf ("%w: missing partition-specs" , ErrInvalidMetadata )
1292
+ case c .DefaultSortOrderID < 0 && c .FormatVersion > 1 :
1293
+ return fmt .Errorf ("%w: default-sort-order-id must be set for FormatVersion > 1" , ErrInvalidMetadata )
1294
+ case c .DefaultPartitionSpec () < 0 && c .FormatVersion > 1 :
1295
+ return fmt .Errorf ("%w: default-partition-spec-id must be set for FormatVersion > 1" , ErrInvalidMetadata )
1272
1296
case c .LastPartitionID == nil :
1273
1297
if c .FormatVersion > 1 {
1274
1298
return fmt .Errorf ("%w: last-partition-id must be set for FormatVersion > 1" , ErrInvalidMetadata )
1275
1299
}
1276
1300
}
1277
1301
1302
+ if err := c .checkSchemas (); err != nil {
1303
+ return err
1304
+ }
1305
+
1306
+ if err := c .checkPartitionSpecs (); err != nil {
1307
+ return err
1308
+ }
1309
+
1310
+ if err := c .checkSortOrders (); err != nil {
1311
+ return err
1312
+ }
1313
+
1314
+ c .constructRefs ()
1315
+
1278
1316
return nil
1279
1317
}
1280
1318
@@ -1299,9 +1337,12 @@ type metadataV1 struct {
1299
1337
}
1300
1338
1301
1339
func initMetadataV1Deser () * metadataV1 {
1302
- return & metadataV1 {
1340
+ meta := metadataV1 {
1303
1341
commonMetadata : initCommonMetadataForDeserialization (),
1304
1342
}
1343
+ meta .commonMetadata .DefaultSortOrderID = 0
1344
+
1345
+ return & meta
1305
1346
}
1306
1347
1307
1348
func (m * metadataV1 ) LastSequenceNumber () int64 { return 0 }
@@ -1488,7 +1529,7 @@ func NewMetadataWithUUID(sc *iceberg.Schema, partitions *iceberg.PartitionSpec,
1488
1529
return nil , err
1489
1530
}
1490
1531
1491
- if err = builder .AddSortOrder (& reassignedIds .sortOrder , true ); err != nil {
1532
+ if err = builder .AddSortOrder (& reassignedIds .sortOrder ); err != nil {
1492
1533
return nil , err
1493
1534
}
1494
1535
0 commit comments