In [1]:
from pyiceberg.catalog import load_catalog
from pyiceberg.schema import Schema
from pyiceberg.types import NestedField, StringType, DoubleType, LongType, TimestampType
from pyiceberg.partitioning import PartitionSpec, PartitionField
from pyiceberg.table.sorting import SortOrder, SortField
from pyiceberg.transforms import MonthTransform, DayTransform

In [6]:
!cat ~/.pyiceberg.yaml

catalog:
   default:
     type: glue
     profile_name: default
     region_name: us-west-2


In [2]:
catalog = load_catalog()

In [7]:
schema = Schema(
    NestedField(1, "hvfhs_license_num", StringType()),
    NestedField(2, "dispatching_base_num", StringType()),
    NestedField(3, "originating_base_num", StringType()),
    NestedField(4, "request_datetime", TimestampType()),
    NestedField(5, "on_scene_datetime", TimestampType()),
    NestedField(6, "pickup_datetime", TimestampType()),
    NestedField(7, "dropoff_datetime", TimestampType()),
    NestedField(8, "PULocationID", LongType()),
    NestedField(9, "DOLocationID", LongType()),
    NestedField(10, "trip_miles", DoubleType()),
    NestedField(11, "trip_time", LongType()),
    NestedField(12, "base_passenger_fare", DoubleType()),
    NestedField(13, "tolls", DoubleType()),
    NestedField(14, "bcf", DoubleType()),
    NestedField(15, "sales_tax", DoubleType()),
    NestedField(16, "congestion_surcharge", DoubleType()),
    NestedField(17, "airport_fee", DoubleType()),
    NestedField(18, "tips", DoubleType()),
    NestedField(19, "driver_pay", DoubleType()),
    NestedField(20, "shared_request_flag", StringType()),
    NestedField(21, "shared_match_flag", StringType()),
    NestedField(22, "access_a_ride_flag", StringType()),
    NestedField(23, "wav_request_flag", StringType()),
    NestedField(24, "wav_match_flag", StringType()),
)

In [8]:
partition_spec = PartitionSpec(
    PartitionField(
        source_id=6, field_id=1000, transform=MonthTransform(), name="pickup_datetime_month"
    )
)
sort_order = SortOrder(SortField(source_id=6, transform=DayTransform()))

In [None]:
catalog.create_table(
    "nyc_taxi.fvhfv",
    schema,
    location="s3://daft-public-datasets/nyc-taxi-iceberg/hvfhv",
    partition_spec=partition_spec,
    sort_order=sort_order,
)