Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 220 additions & 0 deletions open-api/rest-catalog-open-api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

from __future__ import annotations

from datetime import date
from typing import Any, Dict, List, Literal, Optional, Union
from uuid import UUID

from pydantic import BaseModel, Extra, Field

Expand Down Expand Up @@ -629,6 +631,183 @@ class PartitionStatisticsFile(BaseModel):
file_size_in_bytes: int = Field(..., alias='file-size-in-bytes')


class BooleanTypeValue(BaseModel):
__root__: bool = Field(..., example=True)


class IntegerTypeValue(BaseModel):
__root__: int = Field(..., example=42)


class LongTypeValue(BaseModel):
__root__: int = Field(..., example=9223372036854775807)


class FloatTypeValue(BaseModel):
__root__: float = Field(..., example=3.14)


class DoubleTypeValue(BaseModel):
__root__: float = Field(..., example=123.456)


class DecimalTypeValue(BaseModel):
__root__: str = Field(
...,
description="Decimal type values are serialized as strings. Decimals with a positive scale serialize as numeric plain text, while decimals with a negative scale use scientific notation and the exponent will be equal to the negated scale. For instance, a decimal with a positive scale is '123.4500', with zero scale is '2', and with a negative scale is '2E+20'",
example='123.4500',
)


class StringTypeValue(BaseModel):
__root__: str = Field(..., example='hello')


class UUIDTypeValue(BaseModel):
__root__: UUID = Field(
...,
description='UUID type values are serialized as a 36-character lowercase string in standard UUID format as specified by RFC-4122',
example='eb26bdb1-a1d8-4aa6-990e-da940875492c',
max_length=36,
min_length=36,
regex='^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$',
)


class DateTypeValue(BaseModel):
__root__: date = Field(
...,
description="Date type values follow the 'YYYY-MM-DD' ISO-8601 standard date format",
example='2007-12-03',
)


class TimeTypeValue(BaseModel):
__root__: str = Field(
...,
description="Time type values follow the 'HH:MM:SS.ssssss' ISO-8601 format with microsecond precision",
example='22:31:08.123456',
)


class TimestampTypeValue(BaseModel):
__root__: str = Field(
...,
description="Timestamp type values follow the 'YYYY-MM-DDTHH:MM:SS.ssssss' ISO-8601 format with microsecond precision",
example='2007-12-03T10:15:30.123456',
)


class TimestampTzTypeValue(BaseModel):
__root__: str = Field(
...,
description="TimestampTz type values follow the 'YYYY-MM-DDTHH:MM:SS.ssssss+00:00' ISO-8601 format with microsecond precision, and a timezone offset (+00:00 for UTC)",
example='2007-12-03T10:15:30.123456+00:00',
)


class TimestampNanoTypeValue(BaseModel):
__root__: str = Field(
...,
description="Timestamp_ns type values follow the 'YYYY-MM-DDTHH:MM:SS.sssssssss' ISO-8601 format with nanosecond precision",
example='2007-12-03T10:15:30.123456789',
)


class TimestampTzNanoTypeValue(BaseModel):
__root__: str = Field(
...,
description="Timestamp_ns type values follow the 'YYYY-MM-DDTHH:MM:SS.sssssssss+00:00' ISO-8601 format with nanosecond precision, and a timezone offset (+00:00 for UTC)",
example='2007-12-03T10:15:30.123456789+00:00',
)


class FixedTypeValue(BaseModel):
__root__: str = Field(
...,
description='Fixed length type values are stored and serialized as an uppercase hexadecimal string preserving the fixed length',
example='78797A',
)


class BinaryTypeValue(BaseModel):
__root__: str = Field(
...,
description='Binary type values are stored and serialized as an uppercase hexadecimal string',
example='78797A',
)


class CountMap(BaseModel):
keys: Optional[List[IntegerTypeValue]] = Field(
None, description='List of integer column ids for each corresponding value'
)
values: Optional[List[LongTypeValue]] = Field(
None, description="List of Long values, matched to 'keys' by index"
)


class PrimitiveTypeValue(BaseModel):
__root__: Union[
BooleanTypeValue,
IntegerTypeValue,
LongTypeValue,
FloatTypeValue,
DoubleTypeValue,
DecimalTypeValue,
StringTypeValue,
UUIDTypeValue,
DateTypeValue,
TimeTypeValue,
TimestampTypeValue,
TimestampTzTypeValue,
TimestampNanoTypeValue,
TimestampTzNanoTypeValue,
FixedTypeValue,
BinaryTypeValue,
]


class FileFormat(BaseModel):
__root__: Literal['avro', 'orc', 'parquet']


class ContentFile(BaseModel):
content: str
file_path: str = Field(..., alias='file-path')
file_format: FileFormat = Field(..., alias='file-format')
spec_id: int = Field(..., alias='spec-id')
partition: Optional[List[PrimitiveTypeValue]] = Field(
None,
description='A list of partition field values ordered based on the fields of the partition spec specified by the `spec-id`',
example=[1, 'bar'],
)
file_size_in_bytes: int = Field(
..., alias='file-size-in-bytes', description='Total file size in bytes'
)
record_count: int = Field(
..., alias='record-count', description='Number of records in the file'
)
key_metadata: Optional[BinaryTypeValue] = Field(
None, alias='key-metadata', description='Encryption key metadata blob'
)
split_offsets: Optional[List[int]] = Field(
None, alias='split-offsets', description='List of splittable offsets'
)
sort_order_id: Optional[int] = Field(None, alias='sort-order-id')


class PositionDeleteFile(ContentFile):
content: Literal['position-deletes']


class EqualityDeleteFile(ContentFile):
content: Literal['equality-deletes']
equality_ids: Optional[List[int]] = Field(
None, alias='equality-ids', description='List of equality field IDs'
)


class CreateNamespaceRequest(BaseModel):
namespace: Namespace
properties: Optional[Dict[str, str]] = Field(
Expand Down Expand Up @@ -668,6 +847,47 @@ class StatisticsFile(BaseModel):
blob_metadata: List[BlobMetadata] = Field(..., alias='blob-metadata')


class ValueMap(BaseModel):
keys: Optional[List[IntegerTypeValue]] = Field(
None, description='List of integer column ids for each corresponding value'
)
values: Optional[List[PrimitiveTypeValue]] = Field(
None, description="List of primitive type values, matched to 'keys' by index"
)


class DataFile(ContentFile):
content: Literal['data']
column_sizes: Optional[CountMap] = Field(
None,
alias='column-sizes',
description='Map of column id to total count, including null and NaN',
)
value_counts: Optional[CountMap] = Field(
None, alias='value-counts', description='Map of column id to null value count'
)
null_value_counts: Optional[CountMap] = Field(
None,
alias='null-value-counts',
description='Map of column id to null value count',
)
nan_value_counts: Optional[CountMap] = Field(
None,
alias='nan-value-counts',
description='Map of column id to number of NaN values in the column',
)
lower_bounds: Optional[ValueMap] = Field(
None,
alias='lower-bounds',
description='Map of column id to lower bound primitive type values',
)
upper_bounds: Optional[ValueMap] = Field(
None,
alias='upper-bounds',
description='Map of column id to upper bound primitive type values',
)


class Term(BaseModel):
__root__: Union[Reference, TransformTerm]

Expand Down
Loading