Skip to content

[Parquet] Cannot write LARGE_LIST #834

@lidavidm

Description

@lidavidm

Describe the bug, including details regarding any error messages, version, and platform.

go.mod has github.com/apache/arrow-go/v18 v18.6.0

func TestParquetLargeList(t *testing.T) {
	// Test that upstream is broken
	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
	defer mem.AssertSize(t, 0)

	schema := arrow.NewSchema([]arrow.Field{
		{
			Name:     "values",
			Type:     arrow.LargeListOf(arrow.PrimitiveTypes.Int32),
			Nullable: true,
		},
	}, nil)
	batch := testutil.RecordFromJSON(t, mem, schema, `[{"values": [1, 2, 3]}, {"values": null}, {"values": [4, 5]}]`)
	ch := make(chan arrow.RecordBatch, 1)
	ch <- batch

	var buf bytes.Buffer
	parquetProps, arrowProps := newWriterProps(mem, new(DefaultIngestOptions()))

	err := writeParquet(batch.Schema(), &buf, ch, -1, parquetProps, arrowProps)
	require.ErrorContains(t, err, "type mismatch, column is int32 writer, arrow array is large_list, and not a compatible type")
}

Component(s)

Parquet

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type: bugSomething isn't working

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions