Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 2 additions & 14 deletions src/create.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { validateSchemaForVersion } from './schema.js'
import { maxFieldId, validateSchemaForVersion } from './schema.js'
import { uuid4 } from './utils.js'

/**
Expand Down Expand Up @@ -95,19 +95,7 @@ export async function icebergCreate({
}

/**
* @import {Field, PartitionField, PartitionSpec, Schema, SortOrder, TableMetadata} from '../src/types.js'
* @param {Field[]} fields
* @returns {number}
*/
function maxFieldId (fields = []) {
let max = 0
for (const f of fields) {
if (max < f.id) max = f.id
}
return max
}

/**
* @import {PartitionField, PartitionSpec, Schema, SortOrder, TableMetadata} from '../src/types.js'
* @param {PartitionField[]} partitionFields
* @returns {number}
*/
Expand Down
36 changes: 34 additions & 2 deletions src/schema.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* @import {IcebergType, Schema} from '../src/types.js'
* @import {Field, IcebergType, Schema} from '../src/types.js'
*/

/**
Expand All @@ -18,6 +18,39 @@ export function validateSchemaForVersion(schema, formatVersion) {
}
}

/**
* @param {Field[]} fields
* @returns {number}
*/
export function maxFieldId(fields = []) {
let max = 0
for (const field of fields) {
if (max < field.id) max = field.id
const nested = maxNestedFieldId(field.type)
if (max < nested) max = nested
}
return max
}

/**
* @param {IcebergType} type
* @returns {number}
*/
function maxNestedFieldId(type) {
if (typeof type === 'string') return 0
if (type.type === 'list') {
const elementId = type['element-id'] ?? 0
return Math.max(elementId, maxNestedFieldId(type.element))
}
if (type.type === 'map') {
const keyId = type['key-id'] ?? 0
const valueId = type['value-id'] ?? 0
return Math.max(keyId, valueId, maxNestedFieldId(type.key), maxNestedFieldId(type.value))
}
if (type.type === 'struct') return maxFieldId(type.fields)
return 0
}

/**
* Spec v3 §"Reserved Field IDs": user schemas must not use field ids
* greater than 2147483447 (`Integer.MAX_VALUE - 200`). The reserved range
Expand All @@ -30,7 +63,6 @@ export function validateSchemaForVersion(schema, formatVersion) {
const MAX_USER_FIELD_ID = 2147483447

/**
* @import {Field} from '../src/types.js'
* @param {Field} field
* @param {number} formatVersion
* @param {string} path
Expand Down
159 changes: 132 additions & 27 deletions src/write/commit.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { validateSchemaForVersion } from '../schema.js'
import { maxFieldId, validateSchemaForVersion } from '../schema.js'
import { parseDecimalType } from './conversions.js'

/**
Expand Down Expand Up @@ -240,17 +240,10 @@ export function applyUpdates(metadata, updates) {
const newSchema = { ...up.schema, 'schema-id': schemaId }
validateSchemaForVersion(newSchema, next['format-version'])
const priorLastColumnId = next['last-column-id'] ?? 0
const priorAssignedIds = currentAssignedIdIndex(schemas, next['current-schema-id'])
validateAssignedFieldIds(newSchema, priorAssignedIds, priorLastColumnId)
validateSchemaEvolution(schemas, newSchema, priorLastColumnId, next['format-version'])
for (const field of newSchema.fields) {
if (field.id > priorLastColumnId && field.required) {
if (field['initial-default'] == null) {
throw new Error(`add-schema: required field ${field.name} (id ${field.id}) needs a non-null initial-default`)
}
if (field['write-default'] == null) {
throw new Error(`add-schema: required field ${field.name} (id ${field.id}) needs a non-null write-default`)
}
}
}
validateNewRequiredFields(newSchema, priorLastColumnId)
next = {
...next,
schemas: [...schemas, newSchema],
Expand Down Expand Up @@ -347,6 +340,134 @@ export function applyUpdates(metadata, updates) {
return next
}

/**
* @typedef {{ kind: string, path: string }} AssignedFieldId
*/

/**
* @param {Schema[]} schemas
* @param {number} currentSchemaId
* @returns {Map<number, AssignedFieldId>}
*/
function currentAssignedIdIndex(schemas, currentSchemaId) {
const currentSchema = schemas.find(s => s['schema-id'] === currentSchemaId) ?? schemas[schemas.length - 1]
const assignedIds = new Map()
if (currentSchema) indexAssignedFieldIds(currentSchema.fields, '', assignedIds)
return assignedIds
}

/**
* @param {Field[]} fields
* @param {string} prefix
* @param {Map<number, AssignedFieldId>} assignedIds
*/
function indexAssignedFieldIds(fields, prefix, assignedIds) {
for (const field of fields) {
const path = prefix ? `${prefix}.${field.name}` : field.name
assignedIds.set(field.id, { kind: 'field', path })
indexAssignedTypeIds(field.type, path, assignedIds)
}
}

/**
* @param {IcebergType} type
* @param {string} path
* @param {Map<number, AssignedFieldId>} assignedIds
*/
function indexAssignedTypeIds(type, path, assignedIds) {
if (typeof type === 'string') return
if (type.type === 'struct') {
indexAssignedFieldIds(type.fields, path, assignedIds)
} else if (type.type === 'list') {
assignedIds.set(type['element-id'], { kind: 'list element', path: `${path}.element` })
indexAssignedTypeIds(type.element, `${path}.element`, assignedIds)
} else if (type.type === 'map') {
assignedIds.set(type['key-id'], { kind: 'map key', path: `${path}.key` })
assignedIds.set(type['value-id'], { kind: 'map value', path: `${path}.value` })
indexAssignedTypeIds(type.key, `${path}.key`, assignedIds)
indexAssignedTypeIds(type.value, `${path}.value`, assignedIds)
}
}

/**
* @param {Schema} schema
* @param {Map<number, AssignedFieldId>} priorAssignedIds
* @param {number} priorLastColumnId
*/
function validateAssignedFieldIds(schema, priorAssignedIds, priorLastColumnId) {
validateAssignedFields(schema.fields, '', priorAssignedIds, priorLastColumnId)
}

/**
* @param {Field[]} fields
* @param {string} prefix
* @param {Map<number, AssignedFieldId>} priorAssignedIds
* @param {number} priorLastColumnId
*/
function validateAssignedFields(fields, prefix, priorAssignedIds, priorLastColumnId) {
for (const field of fields) {
const path = prefix ? `${prefix}.${field.name}` : field.name
validateAssignedId(field.id, 'field', path, priorAssignedIds, priorLastColumnId)
validateAssignedTypeIds(field.type, path, priorAssignedIds, priorLastColumnId)
}
}

/**
* @param {IcebergType} type
* @param {string} path
* @param {Map<number, AssignedFieldId>} priorAssignedIds
* @param {number} priorLastColumnId
*/
function validateAssignedTypeIds(type, path, priorAssignedIds, priorLastColumnId) {
if (typeof type === 'string') return
if (type.type === 'struct') {
validateAssignedFields(type.fields, path, priorAssignedIds, priorLastColumnId)
} else if (type.type === 'list') {
validateAssignedId(type['element-id'], 'list element', `${path}.element`, priorAssignedIds, priorLastColumnId)
validateAssignedTypeIds(type.element, `${path}.element`, priorAssignedIds, priorLastColumnId)
} else if (type.type === 'map') {
validateAssignedId(type['key-id'], 'map key', `${path}.key`, priorAssignedIds, priorLastColumnId)
validateAssignedId(type['value-id'], 'map value', `${path}.value`, priorAssignedIds, priorLastColumnId)
validateAssignedTypeIds(type.key, `${path}.key`, priorAssignedIds, priorLastColumnId)
validateAssignedTypeIds(type.value, `${path}.value`, priorAssignedIds, priorLastColumnId)
}
}

/**
* @param {number} id
* @param {string} kind
* @param {string} path
* @param {Map<number, AssignedFieldId>} priorAssignedIds
* @param {number} priorLastColumnId
*/
function validateAssignedId(id, kind, path, priorAssignedIds, priorLastColumnId) {
if (id > priorLastColumnId) return
const prior = priorAssignedIds.get(id)
if (!prior) {
throw new Error(`add-schema: ${kind} ${path} uses unassigned id ${id} (last-column-id ${priorLastColumnId})`)
}
if (prior.kind !== kind) {
throw new Error(`add-schema: ${kind} ${path} uses id ${id} previously assigned to ${prior.kind} ${prior.path}`)
}
}

/**
* @param {Schema} schema
* @param {number} priorLastColumnId
*/
function validateNewRequiredFields(schema, priorLastColumnId) {
for (const field of schema.fields) {
if (field.id > priorLastColumnId && field.required) {
if (field['initial-default'] == null) {
throw new Error(`add-schema: required field ${field.name} (id ${field.id}) needs a non-null initial-default`)
}
if (field['write-default'] == null) {
throw new Error(`add-schema: required field ${field.name} (id ${field.id}) needs a non-null write-default`)
}
}
}
}

/**
* Validate schema evolution rules that require comparing the new schema with
* prior schemas: existing field ids may be renamed/reordered, but their
Expand Down Expand Up @@ -546,19 +667,3 @@ function idsListEquivalent(a, b) {
}
return true
}

/**
* Highest field id at the top level of a schema. Mirrors `create.js` and is
* intentionally non-recursive; nested-type ids are not yet supported on
* schema add.
*
* @param {Field[]} fields
* @returns {number}
*/
function maxFieldId(fields = []) {
let max = 0
for (const f of fields) {
if (f.id > max) max = f.id
}
return max
}
Loading