In [1]:
import pyarrow as pa

In [2]:
days = pa.array([1, 12, 17, 23, 28], type=pa.int8())
months = pa.array([1, 3, 5, 7, 1], type=pa.int8())
years = pa.array([1990, 2000, 1995, 2000, 1995], type=pa.int16())

birthdays_table = pa.table([days, months, years],
                           names=["days", "months", "years"])

In [4]:
print(birthdays_table)

pyarrow.Table
days: int8
months: int8
years: int16
----
days: [[1,12,17,23,28]]
months: [[1,3,5,7,1]]
years: [[1990,2000,1995,2000,1995]]


In [6]:
import pyarrow.parquet as pq
pq.write_table(birthdays_table,'birth.parquet')

In [7]:
birth = pq.read_table('birth.parquet')
print(birth)

pyarrow.Table
days: int8
months: int8
years: int16
----
days: [[1,12,17,23,28]]
months: [[1,3,5,7,1]]
years: [[1990,2000,1995,2000,1995]]


In [8]:
import pyarrow.dataset as ds
ds.write_dataset(birthdays_table, "savedir", format="parquet",
                 partitioning=ds.partitioning(
                    pa.schema([birthdays_table.schema.field("years")])
                ))

In [9]:
birthdays_dataset = ds.dataset("savedir", format="parquet", partitioning=["years"])

print(birthdays_dataset)

<pyarrow._dataset.FileSystemDataset object at 0x12745cca0>


In [13]:
birthdays_dataset.files

['savedir/1990/part-0.parquet',
 'savedir/1995/part-0.parquet',
 'savedir/2000/part-0.parquet']

In [12]:
birthdays_dataset.to_table()

pyarrow.Table
days: int8
months: int8
years: int32
----
days: [[1],[17,28],[12,23]]
months: [[1],[5,1],[3,7]]
years: [[1990],[1995,1995],[2000,2000]]