In [139]:
import pyarrow as pa
import pyarrow.flight

client = pa.flight.connect("grpc://0.0.0.0:8815")


In [140]:
# Upload a new dataset
data_table = pa.table(
    [["Mario", "Luigi", "Peach"]],
    names=["Character"]
)
upload_descriptor = pa.flight.FlightDescriptor.for_path("characters/")
writer, _ = client.do_put(upload_descriptor, data_table.schema)
writer.write_table(data_table)
writer.close()

In [141]:
upload_descriptor

<FlightDescriptor path: [b'characters/']>

In [142]:
info = client.get_flight_info(upload_descriptor)

In [143]:
info.endpoints[0].ticket

<Ticket b'./storage/characters/'>

In [145]:
reader = client.do_get(info.endpoints[0].ticket)
table = reader.read_all()
table.to_pandas()

Unnamed: 0,Character
0,Mario
1,Luigi
2,Peach
3,Mario
4,Luigi
5,Peach
6,Mario
7,Luigi
8,Peach
9,Mario


## A more serious test with more data

In [146]:
import pyarrow.parquet as pq
housing = pq.read_table("../housing_market/clean_export_pararius_24122021.parquet")
housing.to_pandas().head()

Unnamed: 0,link,price,house_size,interior,number_of_rooms,construction_period,postal_code,neighbourhood,housing_type,street,for_rent
28,https://www.pararius.com/apartment-for-rent/am...,2250.0,140,Furnished,5,,1019 AX,Oostelijk Havengebied,Apartment,Panamakade,True
26,https://www.pararius.com/apartment-for-rent/am...,2150.0,110,Furnished,3,,1058 DS,Westindische Buurt,Apartment,Postjeskade,True
7,https://www.pararius.com/apartment-for-rent/am...,1695.0,88,Upholstered,4,,1078 MS,Scheldebuurt,Apartment,Biesboschstraat,True
26,https://www.pararius.com/apartment-for-rent/am...,1500.0,45,Upholstered,2,,1051 BX,Staatsliedenbuurt,Apartment,Van Hogendorpstraat 166 II,True
29,https://www.pararius.com/apartment-for-rent/am...,1500.0,80,,2,1930.0,1052 KC,Frederik Hendrikbuurt,Apartment,Van Oldenbarneveldtstraat,True


In [148]:
upload_descriptor = pa.flight.FlightDescriptor.for_path("housing")
writer, _ = client.do_put(upload_descriptor, housing.schema)
writer.write_table(housing)
writer.close()

In [149]:
endpoints = client.get_flight_info(upload_descriptor).endpoints
reader = client.do_get(endpoints[0].ticket)
table = reader.read_all()
table.to_pandas().head()

Unnamed: 0,link,price,house_size,interior,number_of_rooms,construction_period,postal_code,neighbourhood,housing_type,street,for_rent
28,https://www.pararius.com/apartment-for-rent/am...,2250.0,140,Furnished,5,,1019 AX,Oostelijk Havengebied,Apartment,Panamakade,True
26,https://www.pararius.com/apartment-for-rent/am...,2150.0,110,Furnished,3,,1058 DS,Westindische Buurt,Apartment,Postjeskade,True
7,https://www.pararius.com/apartment-for-rent/am...,1695.0,88,Upholstered,4,,1078 MS,Scheldebuurt,Apartment,Biesboschstraat,True
26,https://www.pararius.com/apartment-for-rent/am...,1500.0,45,Upholstered,2,,1051 BX,Staatsliedenbuurt,Apartment,Van Hogendorpstraat 166 II,True
29,https://www.pararius.com/apartment-for-rent/am...,1500.0,80,,2,1930.0,1052 KC,Frederik Hendrikbuurt,Apartment,Van Oldenbarneveldtstraat,True


In [157]:
endpoints[0].ticket

<Ticket b'./storage/housing'>

In [150]:
for flight in client.list_flights():
    descriptor = flight.descriptor
    print("Path:", descriptor.path[0].decode('utf-8'), "Rows:", flight.total_records, "Size:", flight.total_bytes)
    print("=== Schema ===")
    print(flight.schema)
    print("==============")
    print("")

Path: ./storage/housing Rows: 1208 Size: 6644
=== Schema ===
link: string
price: float
house_size: int32
interior: string
number_of_rooms: int32
construction_period: int64
postal_code: string
neighbourhood: string
housing_type: string
street: string
for_rent: bool
__index_level_0__: int64
-- schema metadata --
pandas: '{"index_columns": ["__index_level_0__"], "column_indexes": [{"na' + 1637

Path: ./storage/characters Rows: 18 Size: 2178
=== Schema ===
Character: string



## DeltaServer test

In [154]:
client = pa.flight.connect("grpc://0.0.0.0:8815")

In [155]:
# List all Delta Tables
for flight in client.list_flights():
    descriptor = flight.descriptor
    print("Path:", descriptor.path[0].decode('utf-8'), "Rows:", flight.total_records, "Size:", flight.total_bytes)
    print("=== Schema ===")
    print(flight.schema)
    print("==============")
    print("")

Path: ./delta_storage/housing Rows: 1208 Size: 6645
=== Schema ===
link: string
price: float
house_size: int32
interior: string
number_of_rooms: int32
construction_period: int64
postal_code: string
neighbourhood: string
housing_type: string
street: string
for_rent: bool
__index_level_0__: int64



In [None]:
flight_descriptor = pa.flight.FlightDescriptor.for_path("housing/")