# Guida PyArrow

cf 

* https://arrow.apache.org/docs/python/getstarted.html
* https://arrow.apache.org/cookbook/py/data.html#applying-arithmetic-functions-to-arrays

In [1]:
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.compute as pc

# Inizializzazione dati

In [12]:
## Tipo di dato: array
pippo = pa.array([1,2,3], type = pa.int8())
pippo

<pyarrow.lib.Int8Array object at 0x00000203F93F4D00>
[
  1,
  2,
  3
]

In [10]:
## Tipo di dato: lista (insieme di array)

listone = pa.Table.from_pylist([
    {"pippo": 1, "anni": 10},
    {"pippo":2, "anni": 20}
])
listone

pyarrow.Table
pippo: int64
anni: int64
----
pippo: [[1,2]]
anni: [[10,20]]

In [24]:
pq.write_table(listone, 'listone.parquet')
listone2 = pq.read_table('listone.parquet')
listone2

pyarrow.Table
pippo: int64
anni: int64
----
pippo: [[1,2]]
anni: [[10,20]]

In [25]:
listone = pa.concat_tables([listone,listone2])
listone

pyarrow.Table
pippo: int64
anni: int64
----
pippo: [[1,2],[1,2]]
anni: [[10,20],[10,20]]

## Calcoli

In [30]:
listone = listone.append_column(
    "boooo",
    pc.multiply(listone['pippo'], 2))
listone

pyarrow.Table
pippo: int64
anni: int64
boooo: int64
----
pippo: [[1,2],[1,2]]
anni: [[10,20],[10,20]]
boooo: [[2,4,2,4]]

In [37]:
listone = listone.group_by("pippo").aggregate([("anni","sum")])
listone

pyarrow.Table
anni_sum: int64
pippo: int64
----
anni_sum: [[20,40]]
pippo: [[1,2]]