Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions src/Apache.Arrow/Arrays/StringArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -124,22 +124,33 @@ public string GetString(int index, Encoding encoding = default)
return materializedStrings[index];
}

ReadOnlySpan<byte> bytes = GetBytes(index, out bool isNull);
if (index < 0 || index >= Length)
{
throw new ArgumentOutOfRangeException(nameof(index));
}

if (isNull)
if (IsNull(index))
{
return null;
}

if (bytes.Length == 0)
ReadOnlySpan<int> offsets = ValueOffsets;
int valueOffset = offsets[index];
int valueLength = offsets[index + 1] - valueOffset;

if (valueLength == 0)
{
return string.Empty;
}

ReadOnlySpan<byte> values = Values;

// Decode directly from the shared value buffer so the hot path only pays one
// bounds/null/offset pass before handing off to the requested encoding.
unsafe
{
fixed (byte* data = &MemoryMarshal.GetReference(bytes))
return encoding.GetString(data, bytes.Length);
fixed (byte* data = &MemoryMarshal.GetReference(values))
return encoding.GetString(data + valueOffset, valueLength);
}
}

Expand Down
133 changes: 133 additions & 0 deletions test/Apache.Arrow.Benchmarks/StringArrayGetStringBenchmark.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using System;
using System.Text;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Jobs;

namespace Apache.Arrow.Benchmarks
{
[MemoryDiagnoser]
[ShortRunJob]
public class StringArrayGetStringBenchmark
{
private StringArray _array;
private StringArray _slice;

[Params(1_024)]
public int Count { get; set; }

[GlobalSetup]
public void GlobalSetup()
{
var builder = new StringArray.Builder();
builder.Append("prefix");

for (int i = 0; i < Count; i++)
{
if ((i & 7) == 0)
{
builder.AppendNull();
}
else if ((i & 7) == 1)
{
builder.Append(string.Empty);
}
else
{
builder.Append($"value-{i:0000}-payload");
}
}

builder.Append("suffix");

_array = builder.Build();
_slice = (StringArray)_array.Slice(1, Count);
}

[GlobalCleanup]
public void GlobalCleanup()
{
_slice.Dispose();
_array.Dispose();
}

[Benchmark(Baseline = true)]
public int LegacyGetString()
{
int totalLength = 0;
for (int i = 0; i < _array.Length; i++)
{
totalLength += GetStringLegacy(_array, i)?.Length ?? 0;
}

return totalLength;
}

[Benchmark]
public int GetString()
{
int totalLength = 0;
for (int i = 0; i < _array.Length; i++)
{
totalLength += _array.GetString(i)?.Length ?? 0;
}

return totalLength;
}

[Benchmark]
public int LegacyGetStringFromSlice()
{
int totalLength = 0;
for (int i = 0; i < _slice.Length; i++)
{
totalLength += GetStringLegacy(_slice, i)?.Length ?? 0;
}

return totalLength;
}

[Benchmark]
public int GetStringFromSlice()
{
int totalLength = 0;
for (int i = 0; i < _slice.Length; i++)
{
totalLength += _slice.GetString(i)?.Length ?? 0;
}

return totalLength;
}

private static string GetStringLegacy(StringArray array, int index)
{
ReadOnlySpan<byte> bytes = array.GetBytes(index, out bool isNull);

if (isNull)
{
return null;
}

if (bytes.Length == 0)
{
return string.Empty;
}

return Encoding.UTF8.GetString(bytes);
}
}
}
99 changes: 99 additions & 0 deletions test/Apache.Arrow.Tests/StringArrayTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,105 @@ public void ReturnsAppendedValueMaterialize(string firstValue, string secondValu
Assert.True(array.IsMaterialized());
Assert.Equal(firstValue, retrievedValue);
}

[Fact]
public void ReturnsAppendedValueForSlice()
{
// Arrange
var array = new StringArray.Builder()
.Append("prefix")
.Append("value")
.AppendNull()
.Append(string.Empty)
.Build();

var slice = (StringArray)array.Slice(1, 3);

// Act / Assert
Assert.Equal("value", slice.GetString(0));
Assert.Null(slice.GetString(1));
Assert.Equal(string.Empty, slice.GetString(2));
}

[Fact]
public void ReturnsAppendedValueForSliceAfterMaterialize()
{
// Arrange
var array = new StringArray.Builder()
.Append("prefix")
.Append("value")
.AppendNull()
.Append(string.Empty)
.Build();

var slice = (StringArray)array.Slice(1, 3);

// Act
slice.Materialize();

// Assert
Assert.True(slice.IsMaterialized());
Assert.Equal("value", slice.GetString(0));
Assert.Null(slice.GetString(1));
Assert.Equal(string.Empty, slice.GetString(2));
}

[Fact]
public void ReturnsAppendedValueWithCustomEncoding()
{
// Arrange
const string expected = "héllø";
var array = new StringArray.Builder()
.Append(expected, Encoding.Unicode)
.Build();

// Act
var retrievedValue = array.GetString(0, Encoding.Unicode);

// Assert
Assert.Equal(expected, retrievedValue);
}

[Fact]
public void ReturnsAppendedValueWithCustomEncodingAfterMaterialize()
{
// Arrange
const string expected = "héllø";
var array = new StringArray.Builder()
.Append(expected, Encoding.Unicode)
.Build();

// Act
array.Materialize(Encoding.Unicode);
var retrievedValue = array.GetString(0, Encoding.Unicode);

// Assert
Assert.True(array.IsMaterialized(Encoding.Unicode));
Assert.Equal(expected, retrievedValue);
}

[Fact]
public void ReturnsAppendedValueForCustomEncodingSliceAfterMaterialize()
{
// Arrange
var array = new StringArray.Builder()
.Append("prefix", Encoding.Unicode)
.Append("héllø", Encoding.Unicode)
.AppendNull()
.Append(string.Empty, Encoding.Unicode)
.Build();

var slice = (StringArray)array.Slice(1, 3);

// Act
slice.Materialize(Encoding.Unicode);

// Assert
Assert.True(slice.IsMaterialized(Encoding.Unicode));
Assert.Equal("héllø", slice.GetString(0, Encoding.Unicode));
Assert.Null(slice.GetString(1, Encoding.Unicode));
Assert.Equal(string.Empty, slice.GetString(2, Encoding.Unicode));
}
}

public class Builder
Expand Down