Skip to content

Commit

Permalink
GH-36120: [C#] Support schema metadata through the C API (#36122)
Browse files Browse the repository at this point in the history
### What changes are included in this PR?

Import and export of field- and schema-level metadata via the C API.

### Are these changes tested?

Yes

* Closes: #36120

Authored-by: Curt Hagenlocher <curt@hagenlocher.org>
Signed-off-by: Will Jones <willjones127@gmail.com>
  • Loading branch information
CurtHagenlocher committed Jun 16, 2023
1 parent e39386b commit 14f2e4e
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 10 deletions.
52 changes: 49 additions & 3 deletions csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
Expand Up @@ -16,8 +16,10 @@

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using Apache.Arrow.Types;

namespace Apache.Arrow.C
Expand Down Expand Up @@ -83,8 +85,7 @@ public static unsafe void ExportField(Field field, CArrowSchema* schema)
{
ExportType(field.DataType, schema);
schema->name = StringUtil.ToCStringUtf8(field.Name);
// TODO: field metadata
schema->metadata = null;
schema->metadata = ConstructMetadata(field.Metadata);
schema->flags = GetFlags(field.DataType, field.IsNullable);
}

Expand All @@ -104,8 +105,8 @@ public static unsafe void ExportField(Field field, CArrowSchema* schema)
public static unsafe void ExportSchema(Schema schema, CArrowSchema* out_schema)
{
var structType = new StructType(schema.FieldsList);
// TODO: top-level metadata
ExportType(structType, out_schema);
out_schema->metadata = ConstructMetadata(schema.Metadata);
}

private static char FormatTimeUnit(TimeUnit unit) => unit switch
Expand Down Expand Up @@ -239,6 +240,51 @@ private static long GetFlags(IArrowType datatype, bool nullable = true)
}
}

private unsafe static byte* ConstructMetadata(IReadOnlyDictionary<string, string> metadata)
{
if (metadata == null || metadata.Count == 0)
{
return null;
}

int size = 4;
int[] lengths = new int[metadata.Count * 2];
int i = 0;
foreach (KeyValuePair<string, string> pair in metadata)
{
size += 8;
lengths[i] = Encoding.UTF8.GetByteCount(pair.Key);
size += lengths[i++];
lengths[i] = Encoding.UTF8.GetByteCount(pair.Value);
size += lengths[i++];
}

IntPtr result = Marshal.AllocHGlobal(size);
Marshal.WriteInt32(result, metadata.Count);
byte* ptr = (byte*)result + 4;
i = 0;
foreach (KeyValuePair<string, string> pair in metadata)
{
WriteMetadataString(ref ptr, lengths[i++], pair.Key);
WriteMetadataString(ref ptr, lengths[i++], pair.Value);
}

Debug.Assert((long)(IntPtr)ptr - (long)result == size);

return (byte*)result;
}

private unsafe static void WriteMetadataString(ref byte* ptr, int length, string str)
{
Marshal.WriteInt32((IntPtr)ptr, length);
ptr += 4;
fixed (char* s = str)
{
Encoding.UTF8.GetBytes(s, str.Length, ptr, length);
}
ptr += length;
}

private static unsafe void ReleaseCArrowSchema(CArrowSchema* schema)
{
if (schema == null) return;
Expand Down
43 changes: 41 additions & 2 deletions csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
Expand Up @@ -18,6 +18,8 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using Apache.Arrow.Types;

namespace Apache.Arrow.C
Expand Down Expand Up @@ -281,21 +283,58 @@ public Field GetAsField()

bool nullable = _cSchema->GetFlag(CArrowSchema.ArrowFlagNullable);

return new Field(fieldName, GetAsType(), nullable);
return new Field(fieldName, GetAsType(), nullable, GetMetadata(_cSchema->metadata));
}

public Schema GetAsSchema()
{
ArrowType fullType = GetAsType();
if (fullType is StructType structType)
{
return new Schema(structType.Fields, default);
return new Schema(structType.Fields, GetMetadata(_cSchema->metadata));
}
else
{
throw new ArgumentException("Imported type is not a struct type, so it cannot be converted to a schema.");
}
}

private unsafe static IReadOnlyDictionary<string, string> GetMetadata(byte* metadata)
{
if (metadata == null)
{
return null;
}

IntPtr ptr = (IntPtr)metadata;
int count = Marshal.ReadInt32(ptr);
if (count <= 0)
{
return null;
}
ptr += 4;

Dictionary<string, string> result = new Dictionary<string, string>(count);
for (int i = 0; i < count; i++)
{
result[ReadMetadataString(ref ptr)] = ReadMetadataString(ref ptr);
}
return result;
}

private unsafe static string ReadMetadataString(ref IntPtr ptr)
{
int length = Marshal.ReadInt32(ptr);
if (length < 0)
{
throw new InvalidOperationException("unexpected negative length for metadata string");
}

ptr += 4;
string result = Encoding.UTF8.GetString((byte*)ptr, length);
ptr += length;
return result;
}
}
}
}
16 changes: 11 additions & 5 deletions csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
Expand Up @@ -61,7 +61,7 @@ private static Schema GetTestSchema()
using (Py.GIL())
{
var schema = new Schema.Builder()
.Field(f => f.Name("null").DataType(NullType.Default).Nullable(true))
.Field(f => f.Name("null").DataType(NullType.Default).Nullable(true).Metadata("k0", "v0"))
.Field(f => f.Name("bool").DataType(BooleanType.Default).Nullable(true))
.Field(f => f.Name("i8").DataType(Int8Type.Default).Nullable(true))
.Field(f => f.Name("u8").DataType(UInt8Type.Default).Nullable(true))
Expand All @@ -72,7 +72,7 @@ private static Schema GetTestSchema()
.Field(f => f.Name("i64").DataType(Int64Type.Default).Nullable(true))
.Field(f => f.Name("u64").DataType(UInt64Type.Default).Nullable(true))

.Field(f => f.Name("f16").DataType(HalfFloatType.Default).Nullable(true))
.Field(f => f.Name("f16").DataType(HalfFloatType.Default).Nullable(true).Metadata("k1a", "").Metadata("k1b", "断箭"))
.Field(f => f.Name("f32").DataType(FloatType.Default).Nullable(true))
.Field(f => f.Name("f64").DataType(DoubleType.Default).Nullable(true))

Expand Down Expand Up @@ -105,6 +105,7 @@ private static Schema GetTestSchema()
// Checking wider characters.
.Field(f => f.Name("hello 你好 😄").DataType(BooleanType.Default).Nullable(true))

.Metadata("k2a", "v2abc").Metadata("k2b", "v2abc").Metadata("k2c", "v2abc")
.Build();
return schema;
}
Expand All @@ -114,8 +115,11 @@ private static IEnumerable<dynamic> GetPythonFields()
{
using (Py.GIL())
{
Dictionary<string, string> metadata0 = new Dictionary<string, string> { { "k0", "v0" } };
Dictionary<string, string> metadata1 = new Dictionary<string, string> { { "k1a", "" }, { "k1b", "断箭" } };

dynamic pa = Py.Import("pyarrow");
yield return pa.field("null", pa.GetAttr("null").Invoke(), true);
yield return pa.field("null", pa.GetAttr("null").Invoke(), true).with_metadata(metadata0);
yield return pa.field("bool", pa.bool_(), true);
yield return pa.field("i8", pa.int8(), true);
yield return pa.field("u8", pa.uint8(), true);
Expand All @@ -126,7 +130,7 @@ private static IEnumerable<dynamic> GetPythonFields()
yield return pa.field("i64", pa.int64(), true);
yield return pa.field("u64", pa.uint64(), true);

yield return pa.field("f16", pa.float16(), true);
yield return pa.field("f16", pa.float16(), true).with_metadata(metadata1);
yield return pa.field("f32", pa.float32(), true);
yield return pa.field("f64", pa.float64(), true);

Expand Down Expand Up @@ -164,8 +168,10 @@ private static dynamic GetPythonSchema()
{
using (Py.GIL())
{
Dictionary<string, string> metadata = new Dictionary<string, string> { { "k2a", "v2abc" }, { "k2b", "v2abc" }, { "k2c", "v2abc" } };

dynamic pa = Py.Import("pyarrow");
return pa.schema(GetPythonFields().ToList());
return pa.schema(GetPythonFields().ToList()).with_metadata(metadata);
}
}

Expand Down

0 comments on commit 14f2e4e

Please sign in to comment.