Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 35 additions & 11 deletions src/Microsoft.ML.Core/Data/MetadataBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ namespace Microsoft.ML.Data
/// </summary>
public sealed class MetadataBuilder
{
private readonly List<(string Name, ColumnType Type, Delegate Getter)> _items;
private readonly List<(string Name, ColumnType Type, Delegate Getter, Schema.Metadata Metadata)> _items;

public MetadataBuilder()
{
_items = new List<(string Name, ColumnType Type, Delegate Getter)>();
_items = new List<(string Name, ColumnType Type, Delegate Getter, Schema.Metadata Metadata)>();
}

/// <summary>
Expand All @@ -40,7 +40,7 @@ public void Add(Schema.Metadata metadata, Func<string, bool> selector)
foreach (var column in metadata.Schema)
{
if (selector(column.Name))
_items.Add((column.Name, column.Type, metadata.Getters[column.Index]));
_items.Add((column.Name, column.Type, metadata.Getters[column.Index], column.Metadata));
}
}

Expand All @@ -51,13 +51,17 @@ public void Add(Schema.Metadata metadata, Func<string, bool> selector)
/// <param name="name">The metadata name.</param>
/// <param name="type">The metadata type.</param>
/// <param name="getter">The getter delegate.</param>
public void Add<TValue>(string name, ColumnType type, ValueGetter<TValue> getter)
/// <param name="metadata">Metadata of the input column. Note that metadata on a metadata column is somewhat rare
Copy link
Member

@wschin wschin Dec 3, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does input column mean here? Is it a metadata column (specified by name, type, getter) or a column which metadata will be attached to? Maybe change input column to metadata column added?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah. I'll think about this and handle in next PR after this.

/// except for certain types (for example, slot names for a vector, key values for something of key type).</param>
public void Add<TValue>(string name, ColumnType type, ValueGetter<TValue> getter, Schema.Metadata metadata = null)
{
Contracts.CheckNonEmpty(name, nameof(name));
Contracts.CheckValue(type, nameof(type));
Contracts.CheckValue(getter, nameof(getter));
Contracts.CheckParam(type.RawType == typeof(TValue), nameof(getter));
_items.Add((name, type, getter));
Contracts.CheckParam(type.RawType == typeof(TValue), nameof(type));
Contracts.CheckValueOrNull(metadata);

_items.Add((name, type, getter, metadata));
}

/// <summary>
Expand All @@ -67,11 +71,31 @@ public void Add<TValue>(string name, ColumnType type, ValueGetter<TValue> getter
/// <param name="type">The metadata type.</param>
/// <param name="getter">The getter delegate that provides the value. Note that the type of the getter is still checked
/// inside this method.</param>
public void Add(string name, ColumnType type, Delegate getter)
/// <param name="metadata">Metadata of the input column. Note that metadata on a metadata column is somewhat rare
/// except for certain types (for example, slot names for a vector, key values for something of key type).</param>
public void Add(string name, ColumnType type, Delegate getter, Schema.Metadata metadata = null)
{
Contracts.CheckNonEmpty(name, nameof(name));
Contracts.CheckValue(type, nameof(type));
Utils.MarshalActionInvoke(AddDelegate<int>, type.RawType, name, type, getter);
Contracts.CheckValueOrNull(metadata);
Utils.MarshalActionInvoke(AddDelegate<int>, type.RawType, name, type, getter, metadata);
}

/// <summary>
/// Add one metadata column for a primitive value type.
/// </summary>
/// <param name="name">The metadata name.</param>
/// <param name="type">The metadata type.</param>
/// <param name="value">The value of the metadata.</param>
/// <param name="metadata">Metadata of the input column. Note that metadata on a metadata column is somewhat rare
/// except for certain types (for example, slot names for a vector, key values for something of key type).</param>
public void AddPrimitiveValue<TValue>(string name, PrimitiveType type, TValue value, Schema.Metadata metadata = null)
{
Contracts.CheckNonEmpty(name, nameof(name));
Contracts.CheckValue(type, nameof(type));
Contracts.CheckParam(type.RawType == typeof(TValue), nameof(type));
Contracts.CheckValueOrNull(metadata);
Add(name, type, (ref TValue dst) => dst = value, metadata);
}

/// <summary>
Expand Down Expand Up @@ -100,19 +124,19 @@ public Schema.Metadata GetMetadata()
{
var builder = new SchemaBuilder();
foreach (var item in _items)
builder.AddColumn(item.Name, item.Type, null);
builder.AddColumn(item.Name, item.Type, item.Metadata);
return new Schema.Metadata(builder.GetSchema(), _items.Select(x => x.Getter).ToArray());
}

private void AddDelegate<TValue>(string name, ColumnType type, Delegate getter)
private void AddDelegate<TValue>(string name, ColumnType type, Delegate getter, Schema.Metadata metadata)
{
Contracts.AssertNonEmpty(name);
Contracts.AssertValue(type);
Contracts.AssertValue(getter);

var typedGetter = getter as ValueGetter<TValue>;
Contracts.CheckParam(typedGetter != null, nameof(getter));
_items.Add((name, type, typedGetter));
_items.Add((name, type, typedGetter, metadata));
}
}
}
29 changes: 29 additions & 0 deletions src/Microsoft.ML.Core/Data/MetadataUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -494,5 +494,34 @@ public static bool TryGetCategoricalFeatureIndices(Schema schema, int colIndex,
cols.AddRange(GetTrainerOutputMetadata());
return cols;
}

private sealed class MetadataRow : IRow
{
private readonly Schema.Metadata _metadata;

public MetadataRow(Schema.Metadata metadata)
{
Contracts.AssertValue(metadata);
_metadata = metadata;
}

public Schema Schema => _metadata.Schema;
public long Position => 0;
public long Batch => 0;
public ValueGetter<TValue> GetGetter<TValue>(int col) => _metadata.GetGetter<TValue>(col);
public ValueGetter<UInt128> GetIdGetter() => (ref UInt128 dst) => dst = default;
public bool IsColumnActive(int col) => true;
}

/// <summary>
/// Presents a <see cref="Schema.Metadata"/> as a an <see cref="IRow"/>.
/// </summary>
/// <param name="metadata">The metadata to wrap.</param>
/// <returns>A row that wraps an input metadata.</returns>
public static IRow MetadataAsRow(Schema.Metadata metadata)
{
Contracts.CheckValue(metadata, nameof(metadata));
return new MetadataRow(metadata);
}
}
}
7 changes: 4 additions & 3 deletions src/Microsoft.ML.Core/Data/Schema.cs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ public sealed class Metadata
/// </summary>
public Schema Schema { get; }

public static Metadata Empty { get; } = new Metadata(new Schema(Enumerable.Empty<Column>()), new Delegate[0]);
public static Metadata Empty { get; } = new Metadata(new Schema(new Column[0]), new Delegate[0]);

/// <summary>
/// Create a metadata row by supplying the schema columns and the getter delegates for all the values.
Expand Down Expand Up @@ -256,11 +256,12 @@ public void GetValue<TValue>(string kind, ref TValue value)
/// <summary>
/// This constructor should only be called by <see cref="SchemaBuilder"/>.
/// </summary>
internal Schema(IEnumerable<Column> columns)
/// <param name="columns">The input columns. The constructed instance takes ownership of the array.</param>
internal Schema(Column[] columns)
{
Contracts.CheckValue(columns, nameof(columns));

_columns = columns.ToArray();
_columns = columns;
_nameMap = new Dictionary<string, int>();
for (int i = 0; i < _columns.Length; i++)
{
Expand Down
4 changes: 4 additions & 0 deletions src/Microsoft.ML.Core/Data/SchemaBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using System;
using System.Collections.Generic;
Expand Down Expand Up @@ -32,6 +33,9 @@ public SchemaBuilder()
/// <param name="metadata">The column metadata.</param>
public void AddColumn(string name, ColumnType type, Schema.Metadata metadata)
{
Contracts.CheckNonEmpty(name, nameof(name));
Contracts.CheckValue(type, nameof(type));
Contracts.CheckValueOrNull(metadata);
_items.Add((name, type, metadata));
}

Expand Down
9 changes: 9 additions & 0 deletions src/Microsoft.ML.Core/Utilities/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1076,6 +1076,15 @@ public static void MarshalActionInvoke<TArg1, TArg2, TArg3>(Action<TArg1, TArg2,
meth.Invoke(act.Target, new object[] { arg1, arg2, arg3 });
}

/// <summary>
/// A four-argument version of <see cref="MarshalActionInvoke(Action, Type)"/>.
/// </summary>
public static void MarshalActionInvoke<TArg1, TArg2, TArg3, TArg4>(Action<TArg1, TArg2, TArg3, TArg4> act, Type genArg, TArg1 arg1, TArg2 arg2, TArg3 arg3, TArg4 arg4)
{
var meth = MarshalActionInvokeCheckAndCreate(genArg, act);
meth.Invoke(act.Target, new object[] { arg1, arg2, arg3, arg4 });
}

public static string GetDescription(this Enum value)
{
Type type = value.GetType();
Expand Down
Loading