### Convert pandas to arrow table



In [None]:
import pandas as pd
import pyarrow as pa
import json

In [None]:
filename = "data-25k.json"

In [None]:
data =  pd.read_json(filename, orient="records")

In [None]:
data

In [None]:
data_table = pa.Table.from_pandas(data)
data_schema = pa.Schema.from_pandas(data)

In [None]:
fields = []
for i in data_table.column_names:
    if pa.types.is_fixed_size_list(data_table[i].type):
        fields.append(pa.field(i, data_table[i].type))
    else:
        print(data_table[i])
        inner_size = len(data_table[i][0])
        tensor_type = {"shape": [inner_size]}
        tensor_meta_type = {"tensor_type": json.dumps(tensor_type)}
        tensor_arrow_type = pa.list_(data_table[i][0][0].type, inner_size)
        fields.append(pa.field(i, tensor_arrow_type, metadata=tensor_meta_type))
    
schema = pa.schema(fields)

In [None]:
final_table = pa.Table.from_pandas(data, schema=schema)

In [None]:
final_table

### write arrow table to an arrow file

In [None]:
arrow_file_name = "data-25k.arrow"

In [None]:
with pa.OSFile(arrow_file_name, 'wb') as sink:
    with pa.ipc.new_file(sink, final_table.schema) as arrow_ipc:
        arrow_ipc.write(final_table)
        arrow_ipc.close()

### To send it to infer function, you can read the arrow file like so:

In [None]:
with pa.ipc.open_file("dev_smoke_test.arrow") as source:
            table = source.read_all() # to get pyarrow table
            table_df = source.read_pandas() # to get pandas dataframe


# pipeline.infer(table)