From 36ffe1c812521047442ddc831c457b189c3952ae Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Wed, 21 Nov 2018 14:50:51 -0800 Subject: [PATCH 01/16] Addition of the ValueMappingEstimator and ValueMappingTransform. This will be replacing the TermLookupTransform and provide a way to specify the mapping betweeen two values (note this is specified and not trained). A user can specify the mapping by providing a keys list and values list that must be equal in size. The Estimator will then generate a 1-1 mapping based on the two lists. The PR references #754 which covers the conversion of Transformer to use the new Estimator API. --- .../Transforms/ExtensionsCatalog.cs | 14 + .../Transforms/ValueMappingTransform.cs | 512 ++++++++++++++++++ test/Microsoft.ML.Tests/CSharpCodeGen.cs | 3 +- .../Transformers/ValueMappingTests.cs | 107 ++++ 4 files changed, 635 insertions(+), 1 deletion(-) create mode 100644 src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs create mode 100644 test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs diff --git a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs index 96393fdbaa..440efaba72 100644 --- a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs @@ -5,6 +5,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Transforms; +using System.Collections.Generic; namespace Microsoft.ML { @@ -90,4 +91,17 @@ public static ColumnSelectingEstimator SelectColumns(this TransformsCatalog cata => new ColumnSelectingEstimator(CatalogUtils.GetEnvironment(catalog), keepColumns, dropColumns, keepHidden, ignoreMissing); } + + /* + + public static class ValueMappingCatalog + { + public static ValueMappingEstimator ValueMap( + this TransformsCatalog catalog, + IEnumerable keys, + IEnumerable values, + params (string source, string name)[] columns) + => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, columns); + } + */ } diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs new file mode 100644 index 0000000000..a842eae446 --- /dev/null +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs @@ -0,0 +1,512 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Core.Data; +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.CommandLine; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Data.IO; +using Microsoft.ML.Runtime.Internal.Utilities; +using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Transforms; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; + +[assembly: LoadableClass(ValueMappingTransform.Summary, typeof(IDataTransform), typeof(ValueMappingTransform), null, typeof(SignatureLoadDataTransform), + "Value Mapping Transform", ValueMappingTransform.LoaderSignature)] + +[assembly: LoadableClass(ValueMappingTransform.Summary, typeof(ValueMappingTransform), null, typeof(SignatureLoadModel), + "Value Mapping Transform", ValueMappingTransform.LoaderSignature)] + +namespace Microsoft.ML.Transforms +{ + public sealed class ValueMappingEstimator : TrivialEstimator> + { + private (string input, string output)[] _columns; + + public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), + new ValueMappingTransform(env, keys, values, columns)) + { + _columns = columns; + } + + public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), + new ValueMappingTransform(env, keys, values, columns)) + { + _columns = columns; + } + + public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), + new ValueMappingTransform(env, keys, values, columns)) + { + _columns = columns; + } + + public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), + new ValueMappingTransform(env, keys, values, columns)) + { + _columns = columns; + } + + public override SchemaShape GetOutputSchema(SchemaShape inputSchema) + { + Host.CheckValue(inputSchema, nameof(inputSchema)); + + var resultDic = inputSchema.Columns.ToDictionary(x => x.Name); + + var outputType = typeof(TValueType); + ColumnType outputColumnType = default; + if (outputType.IsGenericEx(typeof(VBuffer<>))) + { + Type vBufferType = outputType.GetGenericArguments()[0]; + vBufferType.TryGetDataKind(out DataKind kind); + outputColumnType = new VectorType(PrimitiveType.FromKind(kind)); + } + else + { + outputType.TryGetDataKind(out DataKind kind); + outputColumnType = PrimitiveType.FromKind(kind); + } + + foreach (var (Input, Output) in _columns) + { + if (!inputSchema.TryFindColumn(Input, out var originalColumn)) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", Input); + + // Get the type from TOutputType + var col = new SchemaShape.Column(Output, originalColumn.Kind, outputColumnType, originalColumn.IsKey, originalColumn.Metadata); + resultDic[Output] = col; + } + return new SchemaShape(resultDic.Values); + } + } + + public sealed class ValueMappingTransform : ValueMappingTransform + { + public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), + ConvertToDataView(env, keys, values), columns) + { } + + public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), + ConvertToDataView(env, keys, values), columns) + { } + + public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), + ConvertToDataView(env, keys, values), columns) + { } + + public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), + ConvertToDataView(env, keys, values), columns) + { } + + private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values) + { + // Build DataView from the mapping + var keyType = ValueMappingTransform.GetPrimitiveType(typeof(TKeyType), out bool isKeyVectorType); + var valueType = ValueMappingTransform.GetPrimitiveType(typeof(TValueType), out bool isValueVectorType); + var dataViewBuilder = new ArrayDataViewBuilder(env); + dataViewBuilder.AddColumn(ValueMappingTransform.KeyColumnName, keyType, keys.ToArray()); + dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, valueType, values.ToArray()); + return dataViewBuilder.GetDataView(); + } + + private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values) + { + // Build DataView from the mapping + var keyType = ValueMappingTransform.GetPrimitiveType(typeof(TKeyType), out bool isKeyVectorType); + var valueType = ValueMappingTransform.GetPrimitiveType(typeof(TValueType), out bool isValueVectorType); + var dataViewBuilder = new ArrayDataViewBuilder(env); + dataViewBuilder.AddColumn(ValueMappingTransform.KeyColumnName, keyType, keys.ToArray()); + dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, valueType, values.ToArray()); + return dataViewBuilder.GetDataView(); + } + + private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values) + { + // Build DataView from the mapping + var keyType = ValueMappingTransform.GetPrimitiveType(typeof(TKeyType), out bool isKeyVectorType); + var valueType = ValueMappingTransform.GetPrimitiveType(typeof(TValueType), out bool isValueVectorType); + var dataViewBuilder = new ArrayDataViewBuilder(env); + dataViewBuilder.AddColumn(ValueMappingTransform.KeyColumnName, keyType, keys.ToArray()); + dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, valueType, values.ToArray()); + return dataViewBuilder.GetDataView(); + } + + private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values) + { + // Build DataView from the mapping + var keyType = ValueMappingTransform.GetPrimitiveType(typeof(TKeyType), out bool isKeyVectorType); + var valueType = ValueMappingTransform.GetPrimitiveType(typeof(TValueType), out bool isValueVectorType); + var dataViewBuilder = new ArrayDataViewBuilder(env); + dataViewBuilder.AddColumn(ValueMappingTransform.KeyColumnName, keyType, keys.ToArray()); + dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, valueType, values.ToArray()); + return dataViewBuilder.GetDataView(); + } + } + + public class ValueMappingTransform : OneToOneTransformerBase + { + internal const string Summary = "Maps text values columns to new columns using a map dataset."; + internal const string LoaderSignature = "ValueMappingTransform"; + + // Stream names for the binary idv streams. + private const string DefaultMapName = "DefaultMap.idv"; + protected static string KeyColumnName = "Key"; + protected static string ValueColumnName = "Value"; + private ValueMap _valueMap; + + private static VersionInfo GetVersionInfo() + { + return new VersionInfo( + modelSignature: "VALUMAPG", + verWrittenCur: 0x00010001, // Initial. + verReadableCur: 0x00010001, + verWeCanReadBack: 0x00010001, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(ValueMappingTransform).Assembly.FullName); + } + + public sealed class Column : OneToOneColumn + { + public static Column Parse(string str) + { + var res = new Column(); + if (res.TryParse(str)) + return res; + return null; + } + + public bool TryUnparse(StringBuilder sb) + { + Contracts.AssertValue(sb); + return TryUnparseCore(sb); + } + } + + public sealed class Arguments + { + [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] + public Column[] Column; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the text column containing the terms", ShortName = "term")] + public string TermColumn; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the column containing the values", ShortName = "value")] + public string ValueColumn; + + [Argument(ArgumentType.Multiple, HelpText = "The data loader", NullName = "", SignatureType = typeof(SignatureDataLoader))] + public IComponentFactory Loader; + + [Argument(ArgumentType.AtMostOnce, + HelpText = "If term and value columns are unspecified, specifies whether the values are key values or numeric.", ShortName = "key")] + public bool KeyValues = true; + + [Argument(ArgumentType.AtMostOnce, IsInputFileName = true, HelpText = "The data file containing the terms", ShortName = "data", SortOrder = 2)] + public string DataFile; + } + + protected ValueMappingTransform(IHostEnvironment env, IDataView lookupMap, (string Input, string Output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), columns) + { + _valueMap = CreateValueMapFromDataView(lookupMap); + } + + private ValueMap CreateValueMapFromDataView(IDataView dataView) + { + Contracts.Check(dataView.Schema.GetColumns().Count() == 2); + Contracts.Check(dataView.GetRowCount() > 0); + var keyType = dataView.Schema.GetColumnType(0); + var valueType = dataView.Schema.GetColumnType(1); + var valueMap = ValueMap.Create(keyType, valueType); + using (var cursor = dataView.GetRowCursor(c=> true)) + valueMap.Train(Host, cursor); + return valueMap; + } + + protected static ValueMappingTransform Create(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(GetVersionInfo()); + + // *** Binary format *** + // int: number of added columns + // for each added column + // string: output column name + // string: input column name + // Binary stream of mapping + + var length = ctx.Reader.ReadInt32(); + var columns = new (string Source, string Name)[length]; + for (int i = 0; i < length; i++) + { + columns[i].Name = ctx.LoadNonEmptyString(); + columns[i].Source = ctx.LoadNonEmptyString(); + } + + byte[] rgb = null; + Action fn = r => rgb = ReadAllBytes(env, r); + + if (!ctx.TryLoadBinaryStream(DefaultMapName, fn)) + throw env.ExceptDecode(); + + var binaryLoader = GetLoader(env, rgb); + return new ValueMappingTransform(env, binaryLoader, columns); + } + + private static byte[] ReadAllBytes(IExceptionContext ectx, BinaryReader rdr) + { + Contracts.AssertValue(ectx); + ectx.AssertValue(rdr); + ectx.Assert(rdr.BaseStream.CanSeek); + + long size = rdr.BaseStream.Length; + ectx.CheckDecode(size <= int.MaxValue); + + var rgb = new byte[(int)size]; + int cb = rdr.Read(rgb, 0, rgb.Length); + ectx.CheckDecode(cb == rgb.Length); + + return rgb; + } + + protected static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input) + => Create(env, ctx).MakeDataTransform(input); + + protected static PrimitiveType GetPrimitiveType(Type rawType, out bool isVectorType) + { + Type type = rawType; + isVectorType = false; + if (type.IsArray) + { + type = rawType.GetElementType(); + isVectorType = true; + } + + type.TryGetDataKind(out DataKind kind); + return PrimitiveType.FromKind(kind); + } + + public override void Save(ModelSaveContext ctx) + { + Host.CheckValue(ctx, nameof(ctx)); + ctx.SetVersionInfo(GetVersionInfo()); + SaveColumns(ctx); + + // convert value map to a dataview and serialize as bytes + var bytes = GetBytesFromDataView(Host, _valueMap.GetDataView(Host), KeyColumnName, ValueColumnName); + ctx.SaveBinaryStream(DefaultMapName, w => w.Write(bytes)); + } + + /// + /// Holds the values that the terms map to. + /// + protected abstract class ValueMap + { + public readonly ColumnType KeyType; + public readonly ColumnType ValueType; + + public ValueMap(ColumnType keyType, ColumnType valueType) + { + KeyType = keyType; + ValueType = valueType; + } + + public static ValueMap Create(ColumnType keyType, ColumnType valueType) + { + Func del = CreateValueMapInvoke; + var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(keyType.RawType, valueType.RawType); + return (ValueMap)meth.Invoke(null, new object[] { keyType, valueType }); + } + + private static ValueMap CreateValueMapInvoke(ColumnType keyType, ColumnType valueType) + { + return new ValueMap(keyType, valueType); + } + + public abstract void Train(IHostEnvironment env, IRowCursor cursor); + + public abstract Delegate GetGetter(IRow input, int index); + + public abstract IDataView GetDataView(IHostEnvironment env); + } + + private class ValueMap : ValueMap + { + private Dictionary _mapping; + + public ValueMap(ColumnType keyType, ColumnType valueType) + : base(keyType, valueType) + { + _mapping = new Dictionary(); + } + + public override void Train(IHostEnvironment env, IRowCursor cursor) + { + while(cursor.MoveNext()) + { + var keyGetter = cursor.GetGetter(0); + var valueGetter = cursor.GetGetter(1); + TKeyType key = default; + TValueType value = default; + keyGetter(ref key); + valueGetter(ref value); + if (_mapping.ContainsKey(key)) + { + throw env.Except($"Duplicate keys in data '{key}'"); + } + _mapping.Add(key, value); + } + } + + public override Delegate GetGetter(IRow input, int index) + { + var src = default(TKeyType); + ValueGetter getSrc = input.GetGetter(index);; + ValueGetter retVal = + (ref TValueType dst) => + { + getSrc(ref src); + if (_mapping.ContainsKey(src)) + { + if (ValueType.IsVector) + dst = Utils.MarshalInvoke(GetVector, ValueType.ItemType.RawType, _mapping[src]); + else + dst = Utils.MarshalInvoke(GetValue, ValueType.RawType, _mapping[src]); + } + else + dst = default; + }; + return retVal; + } + + public override IDataView GetDataView(IHostEnvironment env) + { + var dataViewBuilder = new ArrayDataViewBuilder(env); + var keyType = ValueMappingTransform.GetPrimitiveType(typeof(TKeyType), out bool isKeyVectorType); + var valueType = ValueMappingTransform.GetPrimitiveType(typeof(TValueType), out bool isValueVectorType); + dataViewBuilder.AddColumn(ValueMappingTransform.KeyColumnName, keyType, _mapping.Keys.ToArray()); + dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName,valueType, _mapping.Values.ToArray()); + return dataViewBuilder.GetDataView(); + } + + private static TValueType GetVector(TValueType value) + { + if (value is VBuffer valueRef) + { + VBuffer dest = default; + valueRef.CopyTo(ref dest); + if (dest is TValueType destRef) + return destRef; + } + + return default; + } + + private static TValueType GetValue(TValueType value) + => value; + } + + private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string termColumn, string valueColumn) + { + Contracts.AssertValue(host); + host.AssertValue(lookup); + host.AssertNonEmpty(termColumn); + host.AssertNonEmpty(valueColumn); + + int colTerm; + int colValue; + var schema = lookup.Schema; + + if (!schema.TryGetColumnIndex(termColumn, out colTerm)) + throw host.ExceptUserArg(nameof(Arguments.TermColumn), "column not found: '{0}'", termColumn); + if (!schema.TryGetColumnIndex(valueColumn, out colValue)) + throw host.ExceptUserArg(nameof(Arguments.ValueColumn), "column not found: '{0}'", valueColumn); + + // REVIEW: Should we allow term to be a vector of text (each term in the vector + // would map to the same value)? + var typeTerm = schema.GetColumnType(colTerm); + host.CheckUserArg(typeTerm.IsText, nameof(Arguments.TermColumn), "term column must contain text"); + var typeValue = schema.GetColumnType(colValue); + var cols = new List<(string Source, string Name)>() + { + (termColumn, KeyColumnName), + (valueColumn, ValueColumnName) + }; + + var view = new ColumnsCopyingTransformer(host, cols.ToArray()).Transform(lookup); + view = ColumnSelectingTransformer.CreateKeep(host, view, cols.Select(x=>x.Name).ToArray()); + + var saver = new BinarySaver(host, new BinarySaver.Arguments()); + using (var strm = new MemoryStream()) + { + saver.SaveData(strm, view, 0, 1); + return strm.ToArray(); + } + } + + private static BinaryLoader GetLoader(IHostEnvironment env, byte[] bytes) + { + env.AssertValue(env); + env.AssertValue(bytes); + + var strm = new MemoryStream(bytes, writable: false); + return new BinaryLoader(env, new BinaryLoader.Arguments(), strm); + } + + protected override IRowMapper MakeRowMapper(Schema schema) + => new Mapper(this, Schema.Create(schema), _valueMap, ColumnPairs); + + private sealed class Mapper : OneToOneMapperBase + { + private readonly Schema _inputSchema; + private readonly ValueMap _valueMap; + private readonly (string Source, string Name)[] _columns; + private readonly ValueMappingTransform _parent; + + internal Mapper(ValueMappingTransform transform, + Schema inputSchema, + ValueMap valueMap, + (string input, string output)[] columns) + : base(transform.Host.Register(nameof(Mapper)), transform, inputSchema) + { + _inputSchema = inputSchema; + _valueMap = valueMap; + _columns = columns; + _parent = transform; + } + + protected override Delegate MakeGetter(IRow input, int iinfo, Func activeOutput, out Action disposer) + { + Host.AssertValue(input); + Host.Assert(0 <= iinfo && iinfo < _columns.Length); + disposer = null; + + return _valueMap.GetGetter(input, ColMapNewToOld[iinfo]); + } + + protected override Schema.Column[] GetOutputColumnsCore() + { + var result = new Schema.Column[_columns.Length]; + for (int i = 0; i < _columns.Length; i++) + { + var srcCol = _inputSchema[_columns[i].Source]; + result[i] = new Schema.Column(_columns[i].Name, _valueMap.ValueType, srcCol.Metadata); + } + return result; + } + } + } +} diff --git a/test/Microsoft.ML.Tests/CSharpCodeGen.cs b/test/Microsoft.ML.Tests/CSharpCodeGen.cs index d16a9924f1..9942ebdac3 100644 --- a/test/Microsoft.ML.Tests/CSharpCodeGen.cs +++ b/test/Microsoft.ML.Tests/CSharpCodeGen.cs @@ -15,7 +15,8 @@ public CSharpCodeGen(ITestOutputHelper output) : base(output) { } - [Fact(Skip = "Execute this test if you want to regenerate CSharpApi file")] + //[Fact(Skip = "Execute this test if you want to regenerate CSharpApi file")] + [Fact] public void RegenerateCSharpApi() { var basePath = GetDataPath("../../src/Microsoft.ML.Legacy/CSharpApi.cs"); diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs new file mode 100644 index 0000000000..c5be035324 --- /dev/null +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -0,0 +1,107 @@ +// +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime.Api; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.RunTests; +using Microsoft.ML.Transforms; +using System; +using System.Collections.Generic; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.Tests.Transformers +{ + public class ValueMappingTests : TestDataPipeBase + { + public ValueMappingTests(ITestOutputHelper output) : base(output) + { + } + + class TestClass + { + public string A; + public string B; + public string C; + } + + [Fact] + public void ValueMapOneValueTest() + { + var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory()}; + IEnumerable values = new List() { 1, 2, 3, 4 }; + + var estimator = new ValueMappingEstimator, int>(Env, keys, values, new [] { ("A", "D"), ("B", "E"), ("C", "F") }); + var t = estimator.Fit(dataView); + + var result = t.Transform(dataView); + var cursor = result.GetRowCursor((col) => true); + var getterD = cursor.GetGetter(3); + var getterE = cursor.GetGetter(4); + var getterF = cursor.GetGetter(5); + cursor.MoveNext(); + + int dValue = 0; + getterD(ref dValue); + Assert.Equal(2, dValue); + int eValue = 0; + getterE(ref eValue); + Assert.Equal(3, eValue); + int fValue = 0; + getterF(ref fValue); + Assert.Equal(1, fValue); + } + + [Fact] + public void ValueMapVectorValueTest() + { + var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory()}; + List values = new List() { + new int[] {2, 3, 4 }, + new int[] {100, 200 }, + new int[] {400, 500, 600, 700 }}; + + var estimator = new ValueMappingEstimator, int>(Env, keys, values, new [] { ("A", "D"), ("B", "E"),("C", "F") }); + var t = estimator.Fit(dataView); + + var result = t.Transform(dataView); + var cursor = result.GetRowCursor((col) => true); + var getterD = cursor.GetGetter>(3); + var getterE = cursor.GetGetter>(4); + var getterF = cursor.GetGetter>(5); + cursor.MoveNext(); + + var valuesArray = values.ToArray(); + VBuffer dValue = default; + getterD(ref dValue); + Assert.Equal(values[1].Length, dValue.Length); + VBuffer eValue = default; + getterE(ref eValue); + Assert.Equal(values[2].Length, eValue.Length); + VBuffer fValue = default; + getterF(ref fValue); + Assert.Equal(values[0].Length, fValue.Length); + } + + [Fact] + public void ValueMappingWorkout() + { + var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory()}; + IEnumerable values = new List() { 1, 2, 3, 4 }; + + // Workout on value mapping + //var est = ML.Transforms.ValueMap, int>(keys, values, new [] { ("A", "D"), ("B", "E"),("C", "F") }); + //TestEstimatorCore(est, validFitInput: dataView); + } + } +} From 7335dd429c369887aace0f0da637d6994775c267 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Wed, 21 Nov 2018 15:59:29 -0800 Subject: [PATCH 02/16] Re-enabled workout test, fixed build errors --- src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs | 3 --- .../Transforms/ValueMappingTransform.cs | 11 +++++++---- .../Transformers/ValueMappingTests.cs | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs index 440efaba72..403c380f2a 100644 --- a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs @@ -92,8 +92,6 @@ public static ColumnSelectingEstimator SelectColumns(this TransformsCatalog cata keepColumns, dropColumns, keepHidden, ignoreMissing); } - /* - public static class ValueMappingCatalog { public static ValueMappingEstimator ValueMap( @@ -103,5 +101,4 @@ public static ValueMappingEstimator ValueMap new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, columns); } - */ } diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs index a842eae446..8433d5f75b 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Core.Data; +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; @@ -467,7 +468,9 @@ private static BinaryLoader GetLoader(IHostEnvironment env, byte[] bytes) } protected override IRowMapper MakeRowMapper(Schema schema) - => new Mapper(this, Schema.Create(schema), _valueMap, ColumnPairs); + { + return new Mapper(this, Schema.Create(schema), _valueMap, ColumnPairs); + } private sealed class Mapper : OneToOneMapperBase { @@ -497,13 +500,13 @@ protected override Delegate MakeGetter(IRow input, int iinfo, Func ac return _valueMap.GetGetter(input, ColMapNewToOld[iinfo]); } - protected override Schema.Column[] GetOutputColumnsCore() + protected override Schema.DetachedColumn[] GetOutputColumnsCore() { - var result = new Schema.Column[_columns.Length]; + var result = new Schema.DetachedColumn[_columns.Length]; for (int i = 0; i < _columns.Length; i++) { var srcCol = _inputSchema[_columns[i].Source]; - result[i] = new Schema.Column(_columns[i].Name, _valueMap.ValueType, srcCol.Metadata); + result[i] = new Schema.DetachedColumn(_columns[i].Name, _valueMap.ValueType, srcCol.Metadata); } return result; } diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index c5be035324..64bf8f119f 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -100,8 +100,8 @@ public void ValueMappingWorkout() IEnumerable values = new List() { 1, 2, 3, 4 }; // Workout on value mapping - //var est = ML.Transforms.ValueMap, int>(keys, values, new [] { ("A", "D"), ("B", "E"),("C", "F") }); - //TestEstimatorCore(est, validFitInput: dataView); + var est = ML.Transforms.ValueMap, int>(keys, values, new [] { ("A", "D"), ("B", "E"),("C", "F") }); + TestEstimatorCore(est, validFitInput: dataView); } } } From 03950c84528a39d0c8e48a92467f62cc11027195 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Tue, 27 Nov 2018 15:38:22 -0800 Subject: [PATCH 03/16] - Added command line bindings - Added comentary --- .../Transforms/ConversionsCatalog.cs | 14 +- .../Transforms/ExtensionsCatalog.cs | 9 -- .../Transforms/ValueMappingTransform.cs | 140 +++++++++++++++--- test/Microsoft.ML.Tests/CSharpCodeGen.cs | 3 +- .../Transformers/ValueMappingTests.cs | 29 +++- 5 files changed, 157 insertions(+), 38 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsCatalog.cs index b5185393c2..cf8a6fe72b 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsCatalog.cs @@ -4,12 +4,14 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Conversions; +using System.Collections.Generic; namespace Microsoft.ML { - using HashDefaults = HashingEstimator.Defaults; using ConvertDefaults = TypeConvertingEstimator.Defaults; + using HashDefaults = HashingEstimator.Defaults; /// /// Extensions for the HashEstimator. @@ -101,4 +103,14 @@ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog. string inputColumn, string outputColumn = null, bool bag = KeyToVectorMappingEstimator.Defaults.Bag) => new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, bag); } + + public static class ToMappedValueCatalog + { + public static ValueMappingEstimator ValueMap( + this TransformsCatalog catalog, + IEnumerable keys, + IEnumerable values, + params (string source, string name)[] columns) + => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, columns); + } } diff --git a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs index 403c380f2a..7f6c31e871 100644 --- a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs @@ -92,13 +92,4 @@ public static ColumnSelectingEstimator SelectColumns(this TransformsCatalog cata keepColumns, dropColumns, keepHidden, ignoreMissing); } - public static class ValueMappingCatalog - { - public static ValueMappingEstimator ValueMap( - this TransformsCatalog catalog, - IEnumerable keys, - IEnumerable values, - params (string source, string name)[] columns) - => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, columns); - } } diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs index 8433d5f75b..b867b1144a 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs @@ -17,6 +17,11 @@ using System.Linq; using System.Text; +[assembly: LoadableClass(ValueMappingTransform.Summary, typeof(IDataTransform), typeof(ValueMappingTransform), + typeof(ValueMappingTransform.Arguments), typeof(SignatureDataTransform), + ValueMappingTransform.UserName, "ValueMapping", "ValueMappingTransform", ValueMappingTransform.ShortName, + DocName = "transform/ValueMappingTransform.md")] + [assembly: LoadableClass(ValueMappingTransform.Summary, typeof(IDataTransform), typeof(ValueMappingTransform), null, typeof(SignatureLoadDataTransform), "Value Mapping Transform", ValueMappingTransform.LoaderSignature)] @@ -25,10 +30,25 @@ namespace Microsoft.ML.Transforms { + + /// + /// The ValueMappingEstimator is a 1-1 mapping from a key to value. The key type and value type are specified + /// through TKeyType and TValueType. Arrays are supported for vector types which can be used as either a key or a value + /// or both. The mapping is specified, not trained by providiing a list of keys and a list of values. + /// + /// Specifies the key type + /// Specifies the value type public sealed class ValueMappingEstimator : TrivialEstimator> { private (string input, string output)[] _columns; + /// + /// Constructs the ValueMappingEstimator, key type -> value type mapping + /// + /// Instance of the host environment + /// The list of keys of TKeyType + /// The list of values of TValueType + /// The list of columns to apply public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), new ValueMappingTransform(env, keys, values, columns)) @@ -36,6 +56,13 @@ public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, I _columns = columns; } + /// + /// Constructs the ValueMappingEstimator, key array type -> value type mapping + /// + /// Instance of the host environment + /// The list of keys of TKeyType + /// The list of values of TValueType + /// The list of columns to apply public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), new ValueMappingTransform(env, keys, values, columns)) @@ -43,6 +70,13 @@ public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, _columns = columns; } + /// + /// Constructs the ValueMappingEstimator, key type -> value array type mapping + /// + /// Instance of the host environment + /// The list of keys of TKeyType + /// The list of values of TValueType[] + /// The list of columns to apply public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), new ValueMappingTransform(env, keys, values, columns)) @@ -50,6 +84,13 @@ public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, I _columns = columns; } + /// + /// Constructs the ValueMappingEstimator, key array type -> value array type mapping + /// + /// Instance of the host environment + /// The list of keys of TKeyType[] + /// The list of values of TValueType[] + /// The list of columns to apply public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), new ValueMappingTransform(env, keys, values, columns)) @@ -57,6 +98,11 @@ public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, _columns = columns; } + /// + /// Retrieves the output schema given the input schema + /// + /// Input schema + /// Returns the generated output schema public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { Host.CheckValue(inputSchema, nameof(inputSchema)); @@ -90,23 +136,58 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) } } + /// + /// The ValueMappingTransform is a 1-1 mapping from a key to value. The key type and value type are specified + /// through TKeyType and TValueType. Arrays are supported for vector types which can be used as either a key or a value + /// or both. The mapping is specified, not trained by providiing a list of keys and a list of values. + /// + /// Specifies the key type + /// Specifies the value type public sealed class ValueMappingTransform : ValueMappingTransform { + /// + /// Constructs a ValueMappingTransform with a key type to value type + /// + /// Instance of the host environment + /// The list of keys that are TKeyType + /// The list of values that are TValueType + /// The specified columns to apply public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), ConvertToDataView(env, keys, values), columns) { } + /// + /// Constructs a ValueMappingTransform with a key array type to value type + /// + /// Instance of the host environment + /// The list of keys that are TKeyType[] + /// The list of values that are TValueType + /// The specified columns to apply public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), ConvertToDataView(env, keys, values), columns) { } + /// + /// Constructs a ValueMappingTransform with a key type to value array type + /// + /// Instance of the host environment + /// The list of keys that are TKeyType + /// The list of values that are TValueType[] + /// The specified columns to apply public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), ConvertToDataView(env, keys, values), columns) { } + /// + /// Constructs a ValueMappingTransform with a key array type to value array type + /// + /// Instance of the host environment + /// The list of keys that are TKeyType[] + /// The list of values that are TValueType[] + /// The specified columns to apply public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), ConvertToDataView(env, keys, values), columns) @@ -161,6 +242,8 @@ public class ValueMappingTransform : OneToOneTransformerBase { internal const string Summary = "Maps text values columns to new columns using a map dataset."; internal const string LoaderSignature = "ValueMappingTransform"; + internal const string UserName = "Value Mapping Transform"; + internal const string ShortName = "ValueMap"; // Stream names for the binary idv streams. private const string DefaultMapName = "DefaultMap.idv"; @@ -201,8 +284,8 @@ public sealed class Arguments [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] public Column[] Column; - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the text column containing the terms", ShortName = "term")] - public string TermColumn; + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the column containing the keys", ShortName = "key")] + public string KeyColumn; [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the column containing the values", ShortName = "value")] public string ValueColumn; @@ -210,10 +293,6 @@ public sealed class Arguments [Argument(ArgumentType.Multiple, HelpText = "The data loader", NullName = "", SignatureType = typeof(SignatureDataLoader))] public IComponentFactory Loader; - [Argument(ArgumentType.AtMostOnce, - HelpText = "If term and value columns are unspecified, specifies whether the values are key values or numeric.", ShortName = "key")] - public bool KeyValues = true; - [Argument(ArgumentType.AtMostOnce, IsInputFileName = true, HelpText = "The data file containing the terms", ShortName = "data", SortOrder = 2)] public string DataFile; } @@ -226,8 +305,9 @@ protected ValueMappingTransform(IHostEnvironment env, IDataView lookupMap, (stri private ValueMap CreateValueMapFromDataView(IDataView dataView) { + // The IDataView is expected to have 2 columns, key and value. Contracts.Check(dataView.Schema.GetColumns().Count() == 2); - Contracts.Check(dataView.GetRowCount() > 0); + //Contracts.Check(dataView.GetRowCount() > 0); var keyType = dataView.Schema.GetColumnType(0); var valueType = dataView.Schema.GetColumnType(1); var valueMap = ValueMap.Create(keyType, valueType); @@ -236,6 +316,29 @@ private ValueMap CreateValueMapFromDataView(IDataView dataView) return valueMap; } + private static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(args, nameof(args)); + env.Assert(!string.IsNullOrWhiteSpace(args.DataFile)); + env.AssertNonEmpty(args.KeyColumn); + env.AssertNonEmpty(args.ValueColumn); + + IMultiStreamSource fileSource = new MultiFileSource(args.DataFile); + IDataView loader; + if (args.Loader != null) + { + loader = args.Loader.CreateComponent(env, fileSource); + } + else + { + loader = new BinaryLoader(env, new BinaryLoader.Arguments(), fileSource); + } + + var transformer = new ValueMappingTransform(env, loader, args.Column.Select(x => (x.Source, x.Name)).ToArray()); + return transformer.MakeDataTransform(input); + } + protected static ValueMappingTransform Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); @@ -356,10 +459,10 @@ public ValueMap(ColumnType keyType, ColumnType valueType) public override void Train(IHostEnvironment env, IRowCursor cursor) { + var keyGetter = cursor.GetGetter(0); + var valueGetter = cursor.GetGetter(1); while(cursor.MoveNext()) { - var keyGetter = cursor.GetGetter(0); - var valueGetter = cursor.GetGetter(1); TKeyType key = default; TValueType value = default; keyGetter(ref key); @@ -420,30 +523,23 @@ private static TValueType GetValue(TValueType value) => value; } - private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string termColumn, string valueColumn) + private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string keyColumn, string valueColumn) { Contracts.AssertValue(host); host.AssertValue(lookup); - host.AssertNonEmpty(termColumn); + host.AssertNonEmpty(keyColumn); host.AssertNonEmpty(valueColumn); - int colTerm; - int colValue; var schema = lookup.Schema; - if (!schema.TryGetColumnIndex(termColumn, out colTerm)) - throw host.ExceptUserArg(nameof(Arguments.TermColumn), "column not found: '{0}'", termColumn); - if (!schema.TryGetColumnIndex(valueColumn, out colValue)) + if (!schema.TryGetColumnIndex(keyColumn, out int colKey)) + throw host.ExceptUserArg(nameof(Arguments.KeyColumn), "column not found: '{0}'", keyColumn); + if (!schema.TryGetColumnIndex(valueColumn, out int colValue)) throw host.ExceptUserArg(nameof(Arguments.ValueColumn), "column not found: '{0}'", valueColumn); - // REVIEW: Should we allow term to be a vector of text (each term in the vector - // would map to the same value)? - var typeTerm = schema.GetColumnType(colTerm); - host.CheckUserArg(typeTerm.IsText, nameof(Arguments.TermColumn), "term column must contain text"); - var typeValue = schema.GetColumnType(colValue); var cols = new List<(string Source, string Name)>() { - (termColumn, KeyColumnName), + (keyColumn, KeyColumnName), (valueColumn, ValueColumnName) }; diff --git a/test/Microsoft.ML.Tests/CSharpCodeGen.cs b/test/Microsoft.ML.Tests/CSharpCodeGen.cs index 9942ebdac3..d16a9924f1 100644 --- a/test/Microsoft.ML.Tests/CSharpCodeGen.cs +++ b/test/Microsoft.ML.Tests/CSharpCodeGen.cs @@ -15,8 +15,7 @@ public CSharpCodeGen(ITestOutputHelper output) : base(output) { } - //[Fact(Skip = "Execute this test if you want to regenerate CSharpApi file")] - [Fact] + [Fact(Skip = "Execute this test if you want to regenerate CSharpApi file")] public void RegenerateCSharpApi() { var basePath = GetDataPath("../../src/Microsoft.ML.Legacy/CSharpApi.cs"); diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index 64bf8f119f..e0c3032c55 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -6,6 +6,7 @@ using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.RunTests; +using Microsoft.ML.Runtime.Tools; using Microsoft.ML.Transforms; using System; using System.Collections.Generic; @@ -27,6 +28,12 @@ class TestClass public string C; } + class TestWrong + { + public string A; + public float B; + } + [Fact] public void ValueMapOneValueTest() { @@ -63,6 +70,8 @@ public void ValueMapVectorValueTest() var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; var dataView = ComponentCreation.CreateDataView(Env, data); + + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory()}; List values = new List() { new int[] {2, 3, 4 }, @@ -78,7 +87,7 @@ public void ValueMapVectorValueTest() var getterE = cursor.GetGetter>(4); var getterF = cursor.GetGetter>(5); cursor.MoveNext(); - + var valuesArray = values.ToArray(); VBuffer dValue = default; getterD(ref dValue); @@ -96,12 +105,24 @@ public void ValueMappingWorkout() { var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; var dataView = ComponentCreation.CreateDataView(Env, data); - IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory()}; + var badData = new[] { new TestWrong() { A = "bar", B = 1.2f } }; + var badDataView = ComponentCreation.CreateDataView(Env, badData); + + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; IEnumerable values = new List() { 1, 2, 3, 4 }; // Workout on value mapping - var est = ML.Transforms.ValueMap, int>(keys, values, new [] { ("A", "D"), ("B", "E"),("C", "F") }); - TestEstimatorCore(est, validFitInput: dataView); + var est = ML.Transforms.ValueMap(keys, values, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); + TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); + } + + [Fact] + void TestCommandLine() + { + var dataFile = GetDataPath("QuotingData.csv"); + Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{key=ID value=Text data=" + + dataFile + + @" col=A:B loader=Text{col=ID:R4:0 col=Text:TX:1 sep=, header=+} } in=f:\1.txt" }), (int)0); } } } From ec593b641af199b4574cd630d8e1f2c53439e123 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Tue, 27 Nov 2018 16:00:06 -0800 Subject: [PATCH 04/16] - Fix for ColumnCopyingTransformer --- src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs index b867b1144a..591c92f4e3 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs @@ -543,7 +543,7 @@ private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string (valueColumn, ValueColumnName) }; - var view = new ColumnsCopyingTransformer(host, cols.ToArray()).Transform(lookup); + var view = new ColumnCopyingTransformer(host, cols.ToArray()).Transform(lookup); view = ColumnSelectingTransformer.CreateKeep(host, view, cols.Select(x=>x.Name).ToArray()); var saver = new BinarySaver(host, new BinarySaver.Arguments()); From 74c700cf810a23821e3fc11cecd98628a26ad5bd Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Wed, 28 Nov 2018 07:22:35 -0800 Subject: [PATCH 05/16] - Adding support for missing value. --- .../Transforms/ValueMappingTransform.cs | 25 ++++++++-- .../Transformers/ValueMappingTests.cs | 49 +++++++++++++++---- 2 files changed, 61 insertions(+), 13 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs index 591c92f4e3..992a5d9aeb 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs @@ -30,7 +30,6 @@ namespace Microsoft.ML.Transforms { - /// /// The ValueMappingEstimator is a 1-1 mapping from a key to value. The key type and value type are specified /// through TKeyType and TValueType. Arrays are supported for vector types which can be used as either a key or a value @@ -478,7 +477,27 @@ public override void Train(IHostEnvironment env, IRowCursor cursor) public override Delegate GetGetter(IRow input, int index) { var src = default(TKeyType); - ValueGetter getSrc = input.GetGetter(index);; + ValueGetter getSrc = input.GetGetter(index); + TValueType missingValue = default; + + // Get the default value if the key is missing + if (!ValueType.IsVector) + { + bool identity; + ValueMapper conv; + if (Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion( + KeyType, + ValueType, + out conv, + out identity)) + { + TKeyType key = default; + TValueType value = default; + conv(key, ref value); + missingValue = value; + } + } + ValueGetter retVal = (ref TValueType dst) => { @@ -491,7 +510,7 @@ public override Delegate GetGetter(IRow input, int index) dst = Utils.MarshalInvoke(GetValue, ValueType.RawType, _mapping[src]); } else - dst = default; + dst = missingValue; }; return retVal; } diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index e0c3032c55..3c1562bbad 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -40,10 +40,10 @@ public void ValueMapOneValueTest() var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; var dataView = ComponentCreation.CreateDataView(Env, data); - IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory()}; + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; IEnumerable values = new List() { 1, 2, 3, 4 }; - var estimator = new ValueMappingEstimator, int>(Env, keys, values, new [] { ("A", "D"), ("B", "E"), ("C", "F") }); + var estimator = new ValueMappingEstimator, int>(Env, keys, values, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); var t = estimator.Fit(dataView); var result = t.Transform(dataView); @@ -70,15 +70,13 @@ public void ValueMapVectorValueTest() var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; var dataView = ComponentCreation.CreateDataView(Env, data); - - - IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory()}; - List values = new List() { + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory() }; + List values = new List() { new int[] {2, 3, 4 }, new int[] {100, 200 }, new int[] {400, 500, 600, 700 }}; - var estimator = new ValueMappingEstimator, int>(Env, keys, values, new [] { ("A", "D"), ("B", "E"),("C", "F") }); + var estimator = new ValueMappingEstimator, int>(Env, keys, values, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); var t = estimator.Fit(dataView); var result = t.Transform(dataView); @@ -87,7 +85,7 @@ public void ValueMapVectorValueTest() var getterE = cursor.GetGetter>(4); var getterF = cursor.GetGetter>(5); cursor.MoveNext(); - + var valuesArray = values.ToArray(); VBuffer dValue = default; getterD(ref dValue); @@ -100,6 +98,37 @@ public void ValueMapVectorValueTest() Assert.Equal(values[0].Length, fValue.Length); } + [Fact] + public void ValueMappingMissingKey() + { + var data = new[] { new TestClass() { A = "barTest", B = "test", C = "foo" } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; + IEnumerable values = new List() { 1, 2, 3, 4 }; + + var estimator = new ValueMappingEstimator, int>(Env, keys, values, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); + var t = estimator.Fit(dataView); + + var result = t.Transform(dataView); + var cursor = result.GetRowCursor((col) => true); + var getterD = cursor.GetGetter(3); + var getterE = cursor.GetGetter(4); + var getterF = cursor.GetGetter(5); + cursor.MoveNext(); + + int dValue = 1; + getterD(ref dValue); + Assert.Equal(0, dValue); + int eValue = 0; + getterE(ref eValue); + Assert.Equal(3, eValue); + int fValue = 0; + getterF(ref fValue); + Assert.Equal(1, fValue); + } + + [Fact] public void ValueMappingWorkout() { @@ -120,8 +149,8 @@ public void ValueMappingWorkout() void TestCommandLine() { var dataFile = GetDataPath("QuotingData.csv"); - Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{key=ID value=Text data=" - + dataFile + Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{key=ID value=Text data=" + + dataFile + @" col=A:B loader=Text{col=ID:R4:0 col=Text:TX:1 sep=, header=+} } in=f:\1.txt" }), (int)0); } } From 430e5ac5262faee28e4f66e0aaa987ff8f80d140 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Thu, 29 Nov 2018 15:27:55 -0800 Subject: [PATCH 06/16] - Removed array types for keys - Transform takes key and column name - Check cast is now back to a try convert. ` --- .../Transforms/ValueMappingTransform.cs | 156 ++++++------------ 1 file changed, 48 insertions(+), 108 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs index 992a5d9aeb..c35c7d3e7d 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs @@ -55,20 +55,6 @@ public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, I _columns = columns; } - /// - /// Constructs the ValueMappingEstimator, key array type -> value type mapping - /// - /// Instance of the host environment - /// The list of keys of TKeyType - /// The list of values of TValueType - /// The list of columns to apply - public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), - new ValueMappingTransform(env, keys, values, columns)) - { - _columns = columns; - } - /// /// Constructs the ValueMappingEstimator, key type -> value array type mapping /// @@ -83,20 +69,6 @@ public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, I _columns = columns; } - /// - /// Constructs the ValueMappingEstimator, key array type -> value array type mapping - /// - /// Instance of the host environment - /// The list of keys of TKeyType[] - /// The list of values of TValueType[] - /// The list of columns to apply - public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), - new ValueMappingTransform(env, keys, values, columns)) - { - _columns = columns; - } - /// /// Retrieves the output schema given the input schema /// @@ -153,19 +125,7 @@ public sealed class ValueMappingTransform : ValueMappingTr /// The specified columns to apply public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), - ConvertToDataView(env, keys, values), columns) - { } - - /// - /// Constructs a ValueMappingTransform with a key array type to value type - /// - /// Instance of the host environment - /// The list of keys that are TKeyType[] - /// The list of values that are TValueType - /// The specified columns to apply - public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), - ConvertToDataView(env, keys, values), columns) + ConvertToDataView(env, keys, values), KeyColumnName, ValueColumnName, columns) { } /// @@ -177,19 +137,7 @@ public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, /// The specified columns to apply public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), - ConvertToDataView(env, keys, values), columns) - { } - - /// - /// Constructs a ValueMappingTransform with a key array type to value array type - /// - /// Instance of the host environment - /// The list of keys that are TKeyType[] - /// The list of values that are TValueType[] - /// The specified columns to apply - public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), - ConvertToDataView(env, keys, values), columns) + ConvertToDataView(env, keys, values), KeyColumnName, ValueColumnName, columns) { } private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values) @@ -203,17 +151,6 @@ private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values) - { - // Build DataView from the mapping - var keyType = ValueMappingTransform.GetPrimitiveType(typeof(TKeyType), out bool isKeyVectorType); - var valueType = ValueMappingTransform.GetPrimitiveType(typeof(TValueType), out bool isValueVectorType); - var dataViewBuilder = new ArrayDataViewBuilder(env); - dataViewBuilder.AddColumn(ValueMappingTransform.KeyColumnName, keyType, keys.ToArray()); - dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, valueType, values.ToArray()); - return dataViewBuilder.GetDataView(); - } - private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values) { // Build DataView from the mapping @@ -224,17 +161,6 @@ private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values) - { - // Build DataView from the mapping - var keyType = ValueMappingTransform.GetPrimitiveType(typeof(TKeyType), out bool isKeyVectorType); - var valueType = ValueMappingTransform.GetPrimitiveType(typeof(TValueType), out bool isValueVectorType); - var dataViewBuilder = new ArrayDataViewBuilder(env); - dataViewBuilder.AddColumn(ValueMappingTransform.KeyColumnName, keyType, keys.ToArray()); - dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, valueType, values.ToArray()); - return dataViewBuilder.GetDataView(); - } } public class ValueMappingTransform : OneToOneTransformerBase @@ -261,6 +187,18 @@ private static VersionInfo GetVersionInfo() loaderAssemblyName: typeof(ValueMappingTransform).Assembly.FullName); } + private static VersionInfo GetTermLookupVersionInfo() + { + return new VersionInfo( + modelSignature: "TXTLOOKT", + // verWrittenCur: 0x00010001, // Initial. + verWrittenCur: 0x00010002, // Dropped sizeof(Float). + verReadableCur: 0x00010002, + verWeCanReadBack: 0x00010002, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(ValueMappingTransform).Assembly.FullName); + } + public sealed class Column : OneToOneColumn { public static Column Parse(string str) @@ -296,21 +234,23 @@ public sealed class Arguments public string DataFile; } - protected ValueMappingTransform(IHostEnvironment env, IDataView lookupMap, (string Input, string Output)[] columns) + protected ValueMappingTransform(IHostEnvironment env, IDataView lookupMap, string keyColumn, string valueColumn, (string Input, string Output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), columns) { - _valueMap = CreateValueMapFromDataView(lookupMap); + env.CheckNonEmpty(keyColumn, nameof(keyColumn), "A key column must be specified when passing in an IDataView for the value mapping"); + env.CheckNonEmpty(valueColumn, nameof(valueColumn), "A value column must be specified when passing in an IDataView for the value mapping"); + _valueMap = CreateValueMapFromDataView(lookupMap, keyColumn, valueColumn); } - private ValueMap CreateValueMapFromDataView(IDataView dataView) + private ValueMap CreateValueMapFromDataView(IDataView dataView, string keyColumn, string valueColumn) { - // The IDataView is expected to have 2 columns, key and value. - Contracts.Check(dataView.Schema.GetColumns().Count() == 2); - //Contracts.Check(dataView.GetRowCount() > 0); - var keyType = dataView.Schema.GetColumnType(0); - var valueType = dataView.Schema.GetColumnType(1); + // Confirm that the key and value columns exist in the dataView + Host.Check(dataView.Schema.TryGetColumnIndex(keyColumn, out int keyIdx), "Key column " + keyColumn + " does not exist in the given dataview"); + Host.Check(dataView.Schema.TryGetColumnIndex(valueColumn, out int valueIdx), "Value column " + valueColumn + " does not exist in the given dataview"); + var keyType = dataView.Schema.GetColumnType(keyIdx); + var valueType = dataView.Schema.GetColumnType(valueIdx); var valueMap = ValueMap.Create(keyType, valueType); - using (var cursor = dataView.GetRowCursor(c=> true)) + using (var cursor = dataView.GetRowCursor(c=> c == keyIdx || c == valueIdx)) valueMap.Train(Host, cursor); return valueMap; } @@ -334,7 +274,7 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData loader = new BinaryLoader(env, new BinaryLoader.Arguments(), fileSource); } - var transformer = new ValueMappingTransform(env, loader, args.Column.Select(x => (x.Source, x.Name)).ToArray()); + var transformer = new ValueMappingTransform(env, loader, args.KeyColumn, args.ValueColumn, args.Column.Select(x => (x.Source, x.Name)).ToArray()); return transformer.MakeDataTransform(input); } @@ -366,7 +306,7 @@ protected static ValueMappingTransform Create(IHostEnvironment env, ModelLoadCon throw env.ExceptDecode(); var binaryLoader = GetLoader(env, rgb); - return new ValueMappingTransform(env, binaryLoader, columns); + return new ValueMappingTransform(env, binaryLoader, KeyColumnName, ValueColumnName, columns); } private static byte[] ReadAllBytes(IExceptionContext ectx, BinaryReader rdr) @@ -449,6 +389,7 @@ private static ValueMap CreateValueMapInvoke(ColumnType ke private class ValueMap : ValueMap { private Dictionary _mapping; + private TValueType _missingValue; public ValueMap(ColumnType keyType, ColumnType valueType) : base(keyType, valueType) @@ -458,6 +399,25 @@ public ValueMap(ColumnType keyType, ColumnType valueType) public override void Train(IHostEnvironment env, IRowCursor cursor) { + // Validate that the conversion is supported for non-vector types + bool identity; + ValueMapper conv; + _missingValue = default; + if (!ValueType.IsVector) + { + if (Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion( + KeyType, + ValueType, + out conv, + out identity)) + { + TKeyType key = default; + TValueType value = default; + conv(key, ref value); + _missingValue = value; + } + } + var keyGetter = cursor.GetGetter(0); var valueGetter = cursor.GetGetter(1); while(cursor.MoveNext()) @@ -478,26 +438,6 @@ public override Delegate GetGetter(IRow input, int index) { var src = default(TKeyType); ValueGetter getSrc = input.GetGetter(index); - TValueType missingValue = default; - - // Get the default value if the key is missing - if (!ValueType.IsVector) - { - bool identity; - ValueMapper conv; - if (Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion( - KeyType, - ValueType, - out conv, - out identity)) - { - TKeyType key = default; - TValueType value = default; - conv(key, ref value); - missingValue = value; - } - } - ValueGetter retVal = (ref TValueType dst) => { @@ -510,7 +450,7 @@ public override Delegate GetGetter(IRow input, int index) dst = Utils.MarshalInvoke(GetValue, ValueType.RawType, _mapping[src]); } else - dst = missingValue; + dst = _missingValue; }; return retVal; } From 03c0143e0acbbad7e8202fba4d59a21f289d4f92 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Sun, 2 Dec 2018 18:10:14 -0800 Subject: [PATCH 07/16] - Support for treat values as key types, and support for back-compat with term lookup. --- .../Transforms/ValueMappingTransform.cs | 121 ++++++++++++++---- .../TermLookupTransformer.cs | 12 +- .../Transformers/ValueMappingTests.cs | 89 ++++++++++++- 3 files changed, 187 insertions(+), 35 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs index c35c7d3e7d..a84897fb58 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs @@ -28,6 +28,9 @@ [assembly: LoadableClass(ValueMappingTransform.Summary, typeof(ValueMappingTransform), null, typeof(SignatureLoadModel), "Value Mapping Transform", ValueMappingTransform.LoaderSignature)] +[assembly: LoadableClass("", typeof(IDataTransform), typeof(ValueMappingTransform), null, typeof(SignatureLoadDataTransform), + "", ValueMappingTransform.TermLookupLoaderSignature)] + namespace Microsoft.ML.Transforms { /// @@ -50,7 +53,22 @@ public sealed class ValueMappingEstimator : TrivialEstimat /// The list of columns to apply public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), - new ValueMappingTransform(env, keys, values, columns)) + new ValueMappingTransform(env, keys, values, false, columns)) + { + _columns = columns; + } + + /// + /// Constructs the ValueMappingEstimator, key type -> value type mapping + /// + /// Instance of the host environment + /// The list of keys of TKeyType + /// The list of values of TValueType + /// Specifies to treat the values as a + /// The list of columns to apply + public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyType, params (string input, string output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), + new ValueMappingTransform(env, keys, values, treatValuesAsKeyType, columns)) { _columns = columns; } @@ -79,28 +97,17 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) Host.CheckValue(inputSchema, nameof(inputSchema)); var resultDic = inputSchema.Columns.ToDictionary(x => x.Name); - - var outputType = typeof(TValueType); - ColumnType outputColumnType = default; - if (outputType.IsGenericEx(typeof(VBuffer<>))) - { - Type vBufferType = outputType.GetGenericArguments()[0]; - vBufferType.TryGetDataKind(out DataKind kind); - outputColumnType = new VectorType(PrimitiveType.FromKind(kind)); - } - else - { - outputType.TryGetDataKind(out DataKind kind); - outputColumnType = PrimitiveType.FromKind(kind); - } - + var vectorKind = Transformer.ValueColumnType.IsVector ? SchemaShape.Column.VectorKind.Vector : SchemaShape.Column.VectorKind.Scalar; + var isKey = Transformer.ValueColumnType.IsKey; + var columnType = (isKey) ? Transformer.ValueColumnType.ItemType : + Transformer.ValueColumnType; foreach (var (Input, Output) in _columns) { if (!inputSchema.TryFindColumn(Input, out var originalColumn)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", Input); // Get the type from TOutputType - var col = new SchemaShape.Column(Output, originalColumn.Kind, outputColumnType, originalColumn.IsKey, originalColumn.Metadata); + var col = new SchemaShape.Column(Output, vectorKind, columnType, isKey, originalColumn.Metadata); resultDic[Output] = col; } return new SchemaShape(resultDic.Values); @@ -122,10 +129,11 @@ public sealed class ValueMappingTransform : ValueMappingTr /// Instance of the host environment /// The list of keys that are TKeyType /// The list of values that are TValueType + /// Specifies to treat the values as a /// The specified columns to apply - public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) + public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyTypes, (string Input, string Output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), - ConvertToDataView(env, keys, values), KeyColumnName, ValueColumnName, columns) + ConvertToDataView(env, keys, values, treatValuesAsKeyTypes), KeyColumnName, ValueColumnName, columns) { } /// @@ -140,14 +148,40 @@ public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, I ConvertToDataView(env, keys, values), KeyColumnName, ValueColumnName, columns) { } - private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values) + private static ValueGetter>> GetKeyValueGetter(TValueType[] values) + { + return + (ref VBuffer> dst) => + { + var editor = VBufferEditor.Create(ref dst, values.Length); + for (int i = 0; i < values.Length; i++) + editor.Values[i] = values[i].ToString().AsMemory(); + dst = editor.Commit(); + }; + } + + private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyValue) { // Build DataView from the mapping var keyType = ValueMappingTransform.GetPrimitiveType(typeof(TKeyType), out bool isKeyVectorType); var valueType = ValueMappingTransform.GetPrimitiveType(typeof(TValueType), out bool isValueVectorType); + + // If treatValuesAsKeyValues can only be used with non-vector types + env.Check(!(treatValuesAsKeyValue && valueType.IsVector), "Treating values as key value types can only be used on non-vector types."); + var dataViewBuilder = new ArrayDataViewBuilder(env); dataViewBuilder.AddColumn(ValueMappingTransform.KeyColumnName, keyType, keys.ToArray()); - dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, valueType, values.ToArray()); + var valuesArr = values.ToArray(); + if (treatValuesAsKeyValue) + { + uint[] indices = Enumerable.Range(0, count: values.Count()).Select(i => (uint)i).ToArray(); + dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, GetKeyValueGetter(valuesArr), 0, indices.Length, indices); + } + else + { + dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, valueType, values.ToArray()); + } + return dataViewBuilder.GetDataView(); } @@ -170,12 +204,16 @@ public class ValueMappingTransform : OneToOneTransformerBase internal const string UserName = "Value Mapping Transform"; internal const string ShortName = "ValueMap"; + internal const string TermLookupLoaderSignature = "TermLookupTransform"; + // Stream names for the binary idv streams. private const string DefaultMapName = "DefaultMap.idv"; protected static string KeyColumnName = "Key"; protected static string ValueColumnName = "Value"; private ValueMap _valueMap; + public ColumnType ValueColumnType => _valueMap.ValueType; + private static VersionInfo GetVersionInfo() { return new VersionInfo( @@ -278,11 +316,29 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData return transformer.MakeDataTransform(input); } + /// + /// Helper function to determine the model version that is being loaded. + /// + private static bool CheckModelVersion(ModelLoadContext ctx, VersionInfo versionInfo) + { + try + { + ctx.CheckVersionInfo(versionInfo); + return true; + } + catch (Exception) + { + //consume + return false; + } + } + protected static ValueMappingTransform Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(GetVersionInfo()); + env.Check(CheckModelVersion(ctx, GetVersionInfo()) || + CheckModelVersion(ctx, GetTermLookupVersionInfo())); // *** Binary format *** // int: number of added columns @@ -338,7 +394,11 @@ protected static PrimitiveType GetPrimitiveType(Type rawType, out bool isVectorT isVectorType = true; } - type.TryGetDataKind(out DataKind kind); + if (!type.TryGetDataKind(out DataKind kind)) + { + throw new InvalidOperationException($"Unsupported type {type} used in mapping."); + } + return PrimitiveType.FromKind(kind); } @@ -401,19 +461,24 @@ public override void Train(IHostEnvironment env, IRowCursor cursor) { // Validate that the conversion is supported for non-vector types bool identity; - ValueMapper conv; + ValueMapper, TValueType> conv; + + // For keys that are not in the mapping, the missingValue will be returned. _missingValue = default; if (!ValueType.IsVector) { - if (Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion( - KeyType, + // For handling missing values, this follows how a missing value is handled when loading from a text source. + // First check if there is a String->ValueType conversion method. If so, call the conversion method with an + // empty string, the returned value will be the new missing value. + // NOTE this will return NA for R4 and R8 types. + if (Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion, TValueType>( + TextType.Instance, ValueType, out conv, out identity)) { - TKeyType key = default; TValueType value = default; - conv(key, ref value); + conv(string.Empty.AsMemory(), ref value); _missingValue = value; } } diff --git a/src/Microsoft.ML.Transforms/TermLookupTransformer.cs b/src/Microsoft.ML.Transforms/TermLookupTransformer.cs index 70c90a64d1..63be541419 100644 --- a/src/Microsoft.ML.Transforms/TermLookupTransformer.cs +++ b/src/Microsoft.ML.Transforms/TermLookupTransformer.cs @@ -19,9 +19,6 @@ [assembly: LoadableClass(TermLookupTransformer.Summary, typeof(TermLookupTransformer), typeof(TermLookupTransformer.Arguments), typeof(SignatureDataTransform), "Term Lookup Transform", "TermLookup", "Lookup", "LookupTransform", "TermLookupTransform")] -[assembly: LoadableClass(TermLookupTransformer.Summary, typeof(TermLookupTransformer), null, typeof(SignatureLoadDataTransform), - "Term Lookup Transform", TermLookupTransformer.LoaderSignature)] - namespace Microsoft.ML.Transforms.Categorical { using Conditional = System.Diagnostics.ConditionalAttribute; @@ -269,7 +266,7 @@ protected override void CopyValue(in VBuffer src, ref VBuffer dst) public const string LoaderSignature = "TermLookupTransform"; internal const string Summary = "Maps text values columns to new columns using a map dataset."; - +/* private static VersionInfo GetVersionInfo() { return new VersionInfo( @@ -281,7 +278,7 @@ private static VersionInfo GetVersionInfo() loaderSignature: LoaderSignature, loaderAssemblyName: typeof(TermLookupTransformer).Assembly.FullName); } - +*/ // This is the byte array containing the binary .idv file contents for the lookup data. // This is persisted; the _termMap and _valueMap are constructed from it. private readonly byte[] _bytes; @@ -629,7 +626,7 @@ private static byte[] ReadAllBytes(IExceptionContext ectx, BinaryReader rdr) return rgb; } - + /* public static TermLookupTransformer Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input) { Contracts.CheckValue(env, nameof(env)); @@ -639,9 +636,11 @@ public static TermLookupTransformer Create(IHostEnvironment env, ModelLoadContex h.CheckValue(input, nameof(input)); return h.Apply("Loading Model", ch => new TermLookupTransformer(ch, ctx, h, input)); } + */ public override void Save(ModelSaveContext ctx) { + /* Host.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(); ctx.SetVersionInfo(GetVersionInfo()); @@ -656,6 +655,7 @@ public override void Save(ModelSaveContext ctx) Host.AssertValue(_bytes); DebugValidateLoader(_ldr); ctx.SaveBinaryStream(DefaultMapName, w => w.Write(_bytes)); + */ } [Conditional("DEBUG")] diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index 3c1562bbad..5604d1c59a 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -3,6 +3,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Core.Data; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.RunTests; @@ -128,6 +129,92 @@ public void ValueMappingMissingKey() Assert.Equal(1, fValue); } + [Fact] + public void ValueMappingOutputSchema() + { + var data = new[] { new TestClass() { A = "barTest", B = "test", C = "foo" } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; + IEnumerable values = new List() { 1, 2, 3, 4 }; + + var estimator = new ValueMappingEstimator, int>(Env, keys, values, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); + var outputSchema = estimator.GetOutputSchema(SchemaShape.Create(dataView.Schema)); + Assert.Equal(6, outputSchema.Columns.Length); + Assert.True(outputSchema.TryFindColumn("D", out SchemaShape.Column dColumn)); + Assert.True(outputSchema.TryFindColumn("E", out SchemaShape.Column eColumn)); + Assert.True(outputSchema.TryFindColumn("F", out SchemaShape.Column fColumn)); + + Assert.Equal(typeof(int), dColumn.ItemType.RawType); + Assert.False(dColumn.IsKey); + + Assert.Equal(typeof(int), eColumn.ItemType.RawType); + Assert.False(eColumn.IsKey); + + Assert.Equal(typeof(int), fColumn.ItemType.RawType); + Assert.False(fColumn.IsKey); + } +/* + [Fact] + public void ValueMappingWithValuesAsKeyTypesOutputSchema() + { + var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; + IEnumerable> values = new List>() { "t".AsMemory(), "s".AsMemory(), "u".AsMemory(), "v".AsMemory() }; + + var estimator = new ValueMappingEstimator, ReadOnlyMemory>(Env, keys, values, true, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); + var outputSchema = estimator.GetOutputSchema(SchemaShape.Create(dataView.Schema)); + Assert.Equal(6, outputSchema.Columns.Length); + Assert.True(outputSchema.TryFindColumn("D", out SchemaShape.Column dColumn)); + Assert.True(outputSchema.TryFindColumn("E", out SchemaShape.Column eColumn)); + Assert.True(outputSchema.TryFindColumn("F", out SchemaShape.Column fColumn)); + + Assert.Equal(typeof(int), dColumn.ItemType.RawType); + Assert.True(dColumn.IsKey); + + Assert.Equal(typeof(int), eColumn.ItemType.RawType); + Assert.True(eColumn.IsKey); + + Assert.Equal(typeof(int), fColumn.ItemType.RawType); + Assert.True(fColumn.IsKey); + + var t = estimator.Fit(dataView); + } + */ + + [Fact] + public void ValueMappingValuesAsKeyTypes() + { + var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; + IEnumerable values = new List() { 51, 25, 42, 61 }; + + var estimator = new ValueMappingEstimator, uint>(Env, keys, values, true, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); + + var t = estimator.Fit(dataView); + + var result = t.Transform(dataView); + var cursor = result.GetRowCursor((col) => true); + var getterD = cursor.GetGetter(3); + var getterE = cursor.GetGetter(4); + var getterF = cursor.GetGetter(5); + cursor.MoveNext(); + + uint dValue = 1; + getterD(ref dValue); + Assert.Equal(1, dValue); + uint eValue = 0; + getterE(ref eValue); + Assert.Equal(2, eValue); + uint fValue = 0; + getterF(ref fValue); + Assert.Equal(0, fValue); + } + [Fact] public void ValueMappingWorkout() @@ -151,7 +238,7 @@ void TestCommandLine() var dataFile = GetDataPath("QuotingData.csv"); Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{key=ID value=Text data=" + dataFile - + @" col=A:B loader=Text{col=ID:R4:0 col=Text:TX:1 sep=, header=+} } in=f:\1.txt" }), (int)0); + + @" col=A:B loader=Text{col=ID:U8:0 col=Text:TX:1 sep=, header=+} } in=f:\1.txt" }), (int)0); } } } From 68ec5bfc7f7bfe61419848ecf3ae6bb0242cebff Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Mon, 3 Dec 2018 18:29:31 -0800 Subject: [PATCH 08/16] - TermLookup compatibility with tests --- .../Transforms/ValueMappingTransform.cs | 74 ++++++++-- .../Transformers/ValueMappingTests.cs | 130 ++++++++++++++++-- test/data/backcompat/termlookup.zip | Bin 0 -> 38180 bytes test/data/backcompat/termlookup_with_key.zip | Bin 0 -> 38197 bytes 4 files changed, 181 insertions(+), 23 deletions(-) create mode 100644 test/data/backcompat/termlookup.zip create mode 100644 test/data/backcompat/termlookup_with_key.zip diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs index a84897fb58..452792018d 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs @@ -99,7 +99,7 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) var resultDic = inputSchema.Columns.ToDictionary(x => x.Name); var vectorKind = Transformer.ValueColumnType.IsVector ? SchemaShape.Column.VectorKind.Vector : SchemaShape.Column.VectorKind.Scalar; var isKey = Transformer.ValueColumnType.IsKey; - var columnType = (isKey) ? Transformer.ValueColumnType.ItemType : + var columnType = (isKey) ? PrimitiveType.FromKind(DataKind.U4) : Transformer.ValueColumnType; foreach (var (Input, Output) in _columns) { @@ -171,11 +171,42 @@ private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable (uint)i).ToArray(); - dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, GetKeyValueGetter(valuesArr), 0, indices.Length, indices); + // If the values are key values, there are two different ways in which they are handled: + // 1) If the values are of type uint, then it is assumed that these values are the + // key values. In this case, the values are used for the key values. + // 2) If the values are not of type uint. Then key type values are generated as a number range starting at 0. + if (valueType.RawKind == DataKind.U4) + { + IEnumerable indices = values.Select((x) => Convert.ToUInt32(x)); + var min = indices.Min(); + dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, GetKeyValueGetter(valuesArray), min, indices.Count(), indices.ToArray()); + } + else + { + // When generating the indices, treat each value as being unique, i.e. two values that are the same will + // be assigned the same index. The dictionary is used to maintain uniqueness, indices will contain + // the full list of indices (equal to the same length of values). + Dictionary keyTypeValueMapping = new Dictionary(); + uint[] indices = new uint[values.Count()]; + uint index = 0; + for(int i = 0; i < values.Count(); ++i) + { + TValueType value = values.ElementAt(i); + if(!keyTypeValueMapping.ContainsKey(value)) + { + keyTypeValueMapping.Add(value, index); + index++; + } + + var keyValue = keyTypeValueMapping[value]; + indices[i] = keyValue; + } + + dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, GetKeyValueGetter(valuesArray), 0, indices.Count(), indices); + } } else { @@ -337,8 +368,10 @@ protected static ValueMappingTransform Create(IHostEnvironment env, ModelLoadCon { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); - env.Check(CheckModelVersion(ctx, GetVersionInfo()) || - CheckModelVersion(ctx, GetTermLookupVersionInfo())); + + // Checks for both the TermLookup for backwards compatibility + var termLookupModel = CheckModelVersion(ctx, GetTermLookupVersionInfo()); + env.Check(termLookupModel || CheckModelVersion(ctx, GetVersionInfo())); // *** Binary format *** // int: number of added columns @@ -362,7 +395,8 @@ protected static ValueMappingTransform Create(IHostEnvironment env, ModelLoadCon throw env.ExceptDecode(); var binaryLoader = GetLoader(env, rgb); - return new ValueMappingTransform(env, binaryLoader, KeyColumnName, ValueColumnName, columns); + var keyColumnName = (termLookupModel) ? "Term" : KeyColumnName; + return new ValueMappingTransform(env, binaryLoader, keyColumnName, ValueColumnName, columns); } private static byte[] ReadAllBytes(IExceptionContext ectx, BinaryReader rdr) @@ -414,9 +448,9 @@ public override void Save(ModelSaveContext ctx) } /// - /// Holds the values that the terms map to. + /// Base class that contains the mapping of keys to values. /// - protected abstract class ValueMap + private abstract class ValueMap { public readonly ColumnType KeyType; public readonly ColumnType ValueType; @@ -446,15 +480,25 @@ private static ValueMap CreateValueMapInvoke(ColumnType ke public abstract IDataView GetDataView(IHostEnvironment env); } + /// + /// Implementation mapping class that maps a key of TKeyType to a specified value of TValueType. + /// private class ValueMap : ValueMap { private Dictionary _mapping; private TValueType _missingValue; + private Dictionary CreateDictionary() + { + if (typeof(TKeyType) == typeof(ReadOnlyMemory)) + return new Dictionary, TValueType>(new ReadOnlyMemoryUtils.ReadonlyMemoryCharComparer()) as Dictionary; + return new Dictionary(); + } + public ValueMap(ColumnType keyType, ColumnType valueType) : base(keyType, valueType) { - _mapping = new Dictionary(); + _mapping = CreateDictionary(); } public override void Train(IHostEnvironment env, IRowCursor cursor) @@ -476,11 +520,11 @@ public override void Train(IHostEnvironment env, IRowCursor cursor) ValueType, out conv, out identity)) - { - TValueType value = default; - conv(string.Empty.AsMemory(), ref value); - _missingValue = value; - } + { + TValueType value = default; + conv(string.Empty.AsMemory(), ref value); + _missingValue = value; + } } var keyGetter = cursor.GetGetter(0); diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index 5604d1c59a..9cc85c1a0e 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -4,13 +4,17 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Core.Data; +using Microsoft.ML.Data; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Model; using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.Runtime.Tools; using Microsoft.ML.Transforms; using System; using System.Collections.Generic; +using System.IO; +using System.Linq; using Xunit; using Xunit.Abstractions; @@ -35,6 +39,16 @@ class TestWrong public float B; } + public class TestTermLookup + { + public string Label; + public int GroupId; + + [VectorType(2107)] + public float[] Features; + }; + + [Fact] public void ValueMapOneValueTest() { @@ -154,7 +168,7 @@ public void ValueMappingOutputSchema() Assert.Equal(typeof(int), fColumn.ItemType.RawType); Assert.False(fColumn.IsKey); } -/* + [Fact] public void ValueMappingWithValuesAsKeyTypesOutputSchema() { @@ -171,32 +185,67 @@ public void ValueMappingWithValuesAsKeyTypesOutputSchema() Assert.True(outputSchema.TryFindColumn("E", out SchemaShape.Column eColumn)); Assert.True(outputSchema.TryFindColumn("F", out SchemaShape.Column fColumn)); - Assert.Equal(typeof(int), dColumn.ItemType.RawType); + Assert.Equal(typeof(uint), dColumn.ItemType.RawType); Assert.True(dColumn.IsKey); - Assert.Equal(typeof(int), eColumn.ItemType.RawType); + Assert.Equal(typeof(uint), eColumn.ItemType.RawType); Assert.True(eColumn.IsKey); - Assert.Equal(typeof(int), fColumn.ItemType.RawType); + Assert.Equal(typeof(uint), fColumn.ItemType.RawType); Assert.True(fColumn.IsKey); var t = estimator.Fit(dataView); } - */ [Fact] - public void ValueMappingValuesAsKeyTypes() + public void ValueMappingValuesAsUintKeyTypes() { var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; var dataView = ComponentCreation.CreateDataView(Env, data); IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; + + // These are the expected key type values IEnumerable values = new List() { 51, 25, 42, 61 }; var estimator = new ValueMappingEstimator, uint>(Env, keys, values, true, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); var t = estimator.Fit(dataView); + var result = t.Transform(dataView); + var cursor = result.GetRowCursor((col) => true); + var getterD = cursor.GetGetter(3); + var getterE = cursor.GetGetter(4); + var getterF = cursor.GetGetter(5); + cursor.MoveNext(); + + // The expected values will contain the actual uints and are not generated. + uint dValue = 1; + getterD(ref dValue); + Assert.Equal(25, dValue); + uint eValue = 0; + getterE(ref eValue); + Assert.Equal(42, eValue); + uint fValue = 0; + getterF(ref fValue); + Assert.Equal(51, fValue); + } + + + [Fact] + public void ValueMappingValuesAsStringKeyTypes() + { + var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; + + // Generating the list of strings for the key type values, note that foo1 is duplicated as intended to test that the same index value is returned + IEnumerable> values = new List>() { "foo1".AsMemory(), "foo2".AsMemory(), "foo1".AsMemory(), "foo3".AsMemory() }; + + var estimator = new ValueMappingEstimator, ReadOnlyMemory>(Env, keys, values, true, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); + var t = estimator.Fit(dataView); + var result = t.Transform(dataView); var cursor = result.GetRowCursor((col) => true); var getterD = cursor.GetGetter(3); @@ -204,18 +253,18 @@ public void ValueMappingValuesAsKeyTypes() var getterF = cursor.GetGetter(5); cursor.MoveNext(); + // The expected values will contain the generated key type values. uint dValue = 1; getterD(ref dValue); Assert.Equal(1, dValue); uint eValue = 0; getterE(ref eValue); - Assert.Equal(2, eValue); + Assert.Equal(0, eValue); uint fValue = 0; getterF(ref fValue); Assert.Equal(0, fValue); } - [Fact] public void ValueMappingWorkout() { @@ -240,5 +289,70 @@ void TestCommandLine() + dataFile + @" col=A:B loader=Text{col=ID:U8:0 col=Text:TX:1 sep=, header=+} } in=f:\1.txt" }), (int)0); } + + [Fact] + void TestSavingAndLoading() + { + var data = new[] { new TestClass() { A = "bar", B = "foo", C = "test", } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + var est = new ValueMappingEstimator, int>(Env, + new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory() }, + new List() { 2, 43, 56 }, + new [] {("A","D"), ("B", "E")}); + var transformer = est.Fit(dataView); + using (var ms = new MemoryStream()) + { + transformer.SaveTo(Env, ms); + ms.Position = 0; + var loadedTransformer = TransformerChain.LoadFrom(Env, ms); + var result = loadedTransformer.Transform(dataView); + Assert.Equal(5, result.Schema.ColumnCount); + Assert.True(result.Schema.TryGetColumnIndex("D", out int col)); + Assert.True(result.Schema.TryGetColumnIndex("E", out col)); + } + } + + [Fact] + void TestValueMapBackCompatTermLookup() + { + // Model generated with: xf=drop{col=A} + // Expected output: Features Label B C + var data = new[] { new TestTermLookup() { Label = "good", GroupId=1 } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + string termLookupModelPath = GetDataPath("backcompat/termlookup.zip"); + using (FileStream fs = File.OpenRead(termLookupModelPath)) + { + var result = ModelFileUtils.LoadTransforms(Env, dataView, fs); + Assert.True(result.Schema.TryGetColumnIndex("Features", out int featureIdx)); + Assert.True(result.Schema.TryGetColumnIndex("Label", out int labelIdx)); + Assert.True(result.Schema.TryGetColumnIndex("GroupId", out int groupIdx)); + } + } + + [Fact] + void TestValueMapBackCompatTermLookupKeyTypeValue() + { + // Model generated with: xf=drop{col=A} + // Expected output: Features Label B C + var data = new[] { new TestTermLookup() { Label = "Good", GroupId=1 } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + string termLookupModelPath = GetDataPath("backcompat/termlookup_with_key.zip"); + using (FileStream fs = File.OpenRead(termLookupModelPath)) + { + var result = ModelFileUtils.LoadTransforms(Env, dataView, fs); + Assert.True(result.Schema.TryGetColumnIndex("Features", out int featureIdx)); + Assert.True(result.Schema.TryGetColumnIndex("Label", out int labelIdx)); + Assert.True(result.Schema.TryGetColumnIndex("GroupId", out int groupIdx)); + + Assert.True(result.Schema[labelIdx].Type.IsKey); + var keyType = result.Schema[labelIdx].Type.AsKey; + Assert.Equal((ulong)0, keyType.Min); + Assert.Equal(5, keyType.KeyCount); + + var t = result.GetColumn(Env, "Label"); + uint s = t.First(); + Assert.Equal((uint)3, s); + } + } } } diff --git a/test/data/backcompat/termlookup.zip b/test/data/backcompat/termlookup.zip new file mode 100644 index 0000000000000000000000000000000000000000..242d5782ede106b49bfa3e157210e1a12b1398b0 GIT binary patch literal 38180 zcmV)rK$*W#O9KQH000080I*|&O$TG9HV6O!00{s902lxO090~eX>Mt5XGv~mZ(LSo za&u{KZZ33qbTia506|_Z08mQ<1QY-O00;oEV}nhL0wNH$X8-`wjR61?0000`a%E&` zV{~tFTupCeWo#~MWqIt~2|QKn`Z(}SrjW=GDnsU>GG)wqkSSB4q(PCXGOLgVWDcc5 z14*W&!4OhpiOgb~qREtmD3X%GfA4ebavJWvC#>H+_w!$$v(C16wXD7N{;ubFpZ8tw z($~Z3uUFU5Bgc{9h%50w;zASp=MOYtQ;&bXjANX7PCNBnbEA=l2F^&8lJE~H2&<;W zzWp0rt*wnL9PO-KH9V{>r~cwEmddH^)lWTgPCcgo>wh+D3k6}dBK|O+uvUT33Ja4L z@aG>50yE+N#N8n$tn&yf6=9v2oSdX4JQ5xz#|Z1+`DM&gzi^!J@4gV$&xDnd@ca|u zag?xr{Eh9=PyJj**wMDB?GqO|!rJ?HHa2aW#P_^->QQ;>*zgio@u_3*cNXFoC&j2z z2Zsw>i^GMh;JQ*YaJjl#IG%axIP)XwxR(7I#J|TvSf3way*Q@b27msJcJ$9Jw4-_w zLyE;|?)hskjtgfsv;pzhPfoU>e|}ZkQS())Zzc-cram{)znlGqcx)K>#V1*xEK@0> z?{h*z=c&kNobfFQ83hUZ-UU}bKL?4uOHiuQbs&+1Z;cwo1Ae1)K?SZF%rnF8n|oEk zT<%+Z(Yc9B3H{2_NhJdO9%_)Xk<0&u+cBU_uMvqBs{oPacMqj%Dq#D)iI2tHcv!|( zk(o}}0C}88)Yiu~&*{FAmSNUL)8>g9c;W;>UP~TKa*bjyW1o#N>li$rcN%zeJ1X&%5T80 zwA0eVu_tTbO4>DXxE@v6ZhFFrfQPWD>#BwRL0TmEE<>^w!srt;Na5)DGk(pWlj>tz zz+VSRLCP2X`kp{yaB*{KXFc?sEPrpbpc(DZr-+1mPf(fz9|MaBk1py9PAL)|f8%Zc z$H#`|L|!XT^>UTNn_jI*IQ1oL0e{Ypv@&>Cq*leP?X`^umymQ&teOnfV zG5kbiyHp5>a8gM|E`NjydD8Qr^KU_G%TU^;Q;+8kKJg2>xDTm3`|{@zpJECtaUn(i zzxz8#ClcaGgm^L`owXYge@QEif+$=KKrI*WX#F{}S;F z3CFfu4TpDZ}d@s%3+&p(*jA_S3LRNb5dM*x?2;aNASAG5n5A=hp zn@+xm4Z1RCno~N!)%b0S@aD&$Vt+yT!m}*!K64U3799mx?Aw_L_8uCGX62Iom2x|e zxPjaQegj#bGjKHp_m-@!5>y@UsV*)`1Usi*O~j}f*f{4!v$DR#WZy{3=$hii(T>%S zX}RTikW~X18aNAlVSEmEKfU8^m%azpg4_$67uTb&c8Y}#+-#Wh&-9Nk=cBM&pSmtR z`(>I>1W);!;D43olhFU}_P$T^X9E6b=vU7GJaM7=KFvROQR92?uiw%>i7sdaJtikP z)s`moCC|#2j#iJ*dchjy<2a8sH!ewEjM?7^s<)d}nX8&m@pB2Z98a3&{!20Km^>fr zb@+0bQN>Y6b7xk?TZl95de2SS3p@-97g3J2LEKX+rz`K$0jC>(HAN-}u-vy}r?QuP zl+Uo%)hL(f{CSL#%wGoMrpYz~+dD{to+eR3OU4|!L*Ny4Ek(Q(6BUatZ z%i!K~8Sg9Mwa~<4SD&TR2rS7*yw|usM89O(Ma~z-WA>T;0e-o0SI|bs=D&camX~YzKgrPw8CUq*6L_2d$8U2^w8W^m4y|5idqpF7N-I@S5 zeEs3U>}2r%G7m9b?TslakFB8;$4~M0+Vq7~|HPM^c24+Ly$`vCVXJP?&#G-H%e*lxzx!3Cnb+#WFA{KLD37wQT7@5Z$ zA8B_1X5ii@ZgLiy^z53;^UGmN5xwY=@kZFwqAqyr*xcivj^3-v=-4xQ6jog|>uKzI z0IY3FC-yPbzzI!_upqPh@N7i4%&e#wrPO#LC7D(-_xOoOs&jj`mde%rGOZ_~C$;6N zeSw*F%Duwh+1O9gdgZD2PFks^j>B(P?}?TBKd?Ki{_#kx z+~*=+tM@x3-ZswbsD~{_DGskKtVcPmq>tY-Yd}vdD+-H|ss|xUkIme7Zlg+pTZcLL zYG9MWc3;ihdh`tMd!w@KdX%-+Og65zeol*cUk13jW0*$ZuGgZlGfGX+v+9_rYG?zz z5Z_7Oc(DTBkF?iZWW5F6nvE=>7taB1!Sge@;pTv&aXYcf=1+$$TPZPHb#jIkFen&TEyYV=`J z#G9wg?!ifW7xFZ?5~#cGL;Xiup<5<7P@icm{P<`QB-t-)VLV-k=D&Y+c&%O$8WPmC z*k+_?Zi{$h9ZS@M1gn3V-cL+nCBpr0i}u7q8*$;EA$rm_iHpwE`&5_#`oBbb^H_Ca zmTKZN=-($tkt@nC9~c)Hgl zgAnEUIu(<`0hbo6A)~PwaID(uE4CV85>aDKhV~X7kwAunVyPqI{J|lGM$aL3!Lq7HpobWM%i1MsdDO#r_3|GfBI-2$sU(1ipbw_e$ZzI$2fAz*NBPr9Ha% zP$1xVCc=fc6R@*B*RKuM!30(edVS*yV^t(TQ~SkB86Q}awqRR&@eYt%sj;;_pKI;RMX$ZvQkm9z2b&bX>f}Yqy^Q5QP4OqcYuC>-#s3-l-%as<)~=svivP2A z{mfGQpS0`0gMYp^k-OIZ7Wzr**#$GtI`j@3Etk>ldeouI_LYD`JKG0l$0;+HKDkYhffuQZl z-MvjE5I!g};@fr`$^+c#Wm5>zK92)w@*@=VK5j02^iU6QeOIp3+)$p=H2PrTGO9(JOIri`+?y66cb+9Lj;grUmI(T6{bXh5{8j9XiMZe&w zLzm}O-Ha#v{qFg6yPs>!E&MljsqtTBB3k zZj_nO5fXZS$F94;?le&T#U}$EUTEL(I`Af%=^x2=wJ#kF7qY&baV2AJ;ip$#6PL;& z4kG#Ym3-2$+}TX?e|KcwbogYm03Y`qeEJ!}|DlpUE!Ce6KW>x$=CuQO^c`a}|H6lv zgfMXPW^Xng9a&z%9C)-48flYg^iI`*9Os%h6-iC7Z9y(5zs95f$Yno=Aw256a#4=v zg1Lo1Uy4>sjH?`I$cOeuFy{e_nnNnobT{Wf{aRQ1G*~w8|e_&Dx%DqCK4^HDQe7ako z0d?#|K;QZ%5X~;L={(j0t0e8~_MF28{^L7M$}+bSpuBHK+fwG6z@-yK|AICL@az*c zd)ME9ZgangON*|eI>lwGZKc=HcDd_m(PTH~7QXP+rH5qO^M4AT)Mx*Bmf(LsBzGq8 ziS7H%Q}`8T3BJzMzAhrHRI>s9kM{irRh(I&CeEo-^JnnsndVgm7UI#4ST`EuZB^*9 zk|medB-F#(v2jJ7&_?wAJ@yC8ugXB}qSjho$y!ioR*l|vACH#noeich$Dpp;&6PH}k~F+F0EYDJ#7tyWq^fWP7L+1m-W`+&wIcxASwA0bnc ze$5$~2YK3OQ|go-!D;D4lliB5U`Ll>)*WhW;O|?>X})S*95~4~B(k%G!D}&|XS}N} zgPe6vUy`)~Wl}H}PTf-v!@abM+G!2w#$C=swpOK(pSg_I^xi|rk{jI58HYz- zCW!@lU&W(+6)witislx6ifyUX@;f>3f~LJ<;>LBbpLb4Dt2zm)55$GNwTp+E13?by zM^1yeQ1iB<54Qtue{1PebOqoz;uSfPh2|6&d5l)<3W@IfhYft?3k_>KXrn>*h|zxk z&Om51$?`qCJqGxwS2R5BO92FZ!R)d6GODs&Wcfqp1oYy^eZ2=hB+M;*#)qqXniRAC zQK|m2Dfq(F-+QcEbH9uD%Jiq1P%3IhYH_^;imheq2if{PNX(DrkxM_>8l(U=kS(Y{l7bXUN$ zBMpQTz;C?m|M=LP7+9&zbt(o~YkcYFHjN`|jhXmA^?Sgs%?-{BB3=;dU{|`d z>JYfrC(#UsdV{o5#7Ccs&+M72I%32f=XHp>cpyBW>|L#P5Y41p>{QHZs8}>Xo+w?AdIR0hZXl` z8$Q`A!~dhW_aDG_0mr@zNww(S9cN> z(2Tt)6=-za)?}PB9^I6(hX1%19^H`96%ZOdxA1qYtmg|JPlB`Sw)pS584WmF-r7$J zXCVuBl!^UzBxomTFqxG4L*d?+LrFLTzy(c|25(*txMX(UG)L|^#WiA)YS)-e4#(FC zbenb@2IiMl_Lgnlu*`{O(#Y)yEUgw*p1}EnNt;wtxor!$BUibP`1OF=62Uw^3T)uN zIZ+xFUwayopEEGICwhamY|aK5g9zx2ttjTtK;Z6YX3z1#vuOI~O}lIoE})sN8eChb zW9AmV==EgQDqPw>4nC>4M_fo_^NCfW?I$9)vMu_#!QXe z8&SheqM9W5@(I@U6JM^P8S!?3>m%@}^x2J1d@oj^v}~4f9Jw(-`~LFn*i{Yi{>1Rs zt?qcBRp-7zvlEZrdfFc@XoE*8Rk~xLwp3Ah?cFo5{}8uLomLoh z$)zw$Q~H8Rjulq}QwYQfJU)ESLleA3qCa=4@`1O}%aJV~mdz z_dbVVgI`;Q(z!?|vTptyoSp-fOK`b`VjF?i;zsScrWVN98|(gY=PS@My9B*4y_mqK zRESi67IYH6P=&d>Tsj9g*GdXAj$MZFNgwm=ap!<@uf$eOrU>+ll<}8xn<%t$r7msH zz}dMSv3L63;g9bMQvM_O|4ox zSB*yP*v8pFz+2!emv#1d9h`b?^Dt)312DQE6Jh(Z4w{;{@0{9ShDt;f%-?T_M|CVe zki92F{oj1Mf9tr=dM6$C?~R1f96#EUl_%jwt$xm^U=S#+mRvO&69}8wKNN_cI{S#(+ys+amHY4_)mgBj4V-o-6Ww)Pa3H{Fsdow|QZYtuF%_4eIVUKp! z&=Vp)v9L#s_}`(|7h}D><_3x$w5N>SYK%v7H+wU-8r6Z&Wbc9HQFZ9{0^gxim*OF$ za;W9zNy6)&`eVtp@aVmQN2Bh0u|a=bqlBzh<|Gt)EF9x>^aqRE%PNkR`v6Bna_4Gy zcaYV}>>HwW0iIm*F#kAt@J`T_VOoa`dZsJ%)C%=%upl)0Qt*CHK&Z+$tPb#nO+I_> zAE=Loi!YTO>~>@Ub1Zvb;)y4)RJKN^*|P^z^ih-alF7RRASi0Vq-u^U$X+elc5YP= zJP)c_wBu?7NcZ?fOZbPP0@Tlpsy>~>2K~{`ZKKU@@jsv^VkcdcGd>IGNi*xrR-`BH zBYrBS@0mjXJMDVXHp!;2QcXRdUd{jg2{s5(9v7~P6T78}d(-?!HQ#&lm9MkvNJpry zV|`tOs&Cl4$wG$EXHS1iManl1y|nSo<%PZ(C`EP9{Pr`1Nbe@Em!5$KwWQK!5g_#4 z4;aMG--iu)&!oFqY0d%Q?9w|roOl#mn|cB+N4SH!l0H9E=YF`RRh_eV(gmE!_KtaP zlLYU5!`Jv+7GMI)WZNOLe6eL9;L2N4>V6pFPMdl!;_`=;hmCLSNDc;?o4Jc$-rA~Y zcvSnDqSZ6-7k;16br4(hue1x<5(u>g+Lx)OC0Qq6rLLz!*I*dNFU6g9Q0U zF+z77TB8~5QjVq{8MvOYunw+#X1@MmT`>eLdx7)ZR0l&Gd(=k#i&37I!x0?vc=WDt z#Ump(Y|vZp`n0-Yg*QlCQhs)Lz#U9qEthPncLh43SZh`>S2%jPL|o6v3~=|@qn_Or z1)ON4uYe{t=yBOWksOYEfLr8K$W?U!_KZ+1G9?Ru3>(JX*)(?WfVFjqR_qL%3W^UZ zVtx*059Zs4+9`u8W8QX-anLuy)cmsj8(@NiRjQ7PsFi!VlK9B4w&!(Hk+oWXwR z`D>AfOIQN2L4PCR^K(xpLS^#Es6|FxJ`poSBV)c77f%aF>$ zHhw%RZW}!R4S5jy@b&#B$rTl7Nb6;rjN=h#=#yKN#|fkKPOf#b2p7Y{jys=rsY~Ng zB~LZFdW_Hu4!5mzJ>(9V+w8Bu>T-ts*U!Y~RqldwBTJt%j99_xO5F?S{9O>P#U>=z z$`6I|^3hLA=`n%zp>kbWjGO?Wi>^-mgR~ob^66jhrFsz@*R>=sIT;Brk2&yh+KM9j zvLhAldC%d>L*pWl9Fz_we%|U3|FIP4Y|SVu`#2l|rQ z9P?M=QOef1HSY-Be82g2|JHG-5_e4bFz5`$le^4srP;yQZS%$`IctzOCaW>;yb0Kx zQXZ@u-vr)W@fCX7T;RR!#mK_(d6>ZZBsoMDmADE%M6Lg{Z<7O*FMMxJZ{`Oqy0^*V zHpD}R9`8ZU%5&grme)zKwhbN~_G9q5fi3zm*V3zJwz$KSN2*soQ(1tD?FQw|IUcZ) zb}@ftu^)U~Cbqia_7U{P)0esCrQX<}=L{Y5ZQFI`-$we$-<$&LSCO805}7Ib>FNFE zX*1^j&NhFR-m^^|hnb=8`#9dOPj~b-H1Xk6c&{48QQm|w# zTTey1BgDV&jZ2dYfJHK4ZCu&9;O}JD;oN@-==%0j_6xNGx1pHOvi;ajnkh)PWQc}r zKhSLzdPMix41&$N^A0E-fI5}@eqU!LRLgF;&dmb{(UU4h8;j!hV}qVDuB%05bNIil z-cQT`{m-iRGeJ*3Ez*-sfuAY*>D7ChX?^tMGp^nfB0Vlu?T@LlzeB$~YExloNCLWj zw0G?~Lce>9_(7i&*YRNR<+W?f1w2ac%(=+0`!*bU@l=of4IZA|U-I-72Obr=9WpS) zj1BtIyh9C$=`L8r_wj<)`E3wgS@iCUsRI0AOgu$ac7LX4y z?UPtVh6${rmO+oZPD{aL*J52(S4Vj2J4nCB${*-EvLhCLAT?P>y%;|4Oh0Pb2In$Q zl(2PUi#}w$VzI?a7wAyUIEyxH1qovTJ_9#bkQ%yOa46s?WT@r6ef#_X>Lfj!BBAGk z4fpC0KK&kFi!k>2;=U(mnbt!T1$ycm5pn7k+1 z43ECo>sZ$rhlk|Lt61w^7J~F=36`<~LZr7p{A`$^09C!c{7S|mJnGTSTW7_D4f?@E ziz}szwn5IQgiD{`MhK=}x~8~%J#E&09{)RQm^aEfVszoeHmPfU~o~4$in5r z2p0SH%0jioLTxofUk=)N0>2mZQr+LU1O-BV!GI9gBPzJ~B-1nnmkN!aAE_kWYlX?z zPo?+W#x}h#SR}6!;^zQ0N-Md@*K7vd!Tr_&E>3{!e6r9oPyt<9>YbHc?urUNJ#cp5 zqCGa~i`E~NGPx1>k4ArF4$za$GWtKTe}~?B6SA{j>H!*5eq-<|0sSXOg?)mdcz7Sl zsLmFj1!r2!_+3-*KqXwy{!FzBO%OUJR=*IB2F16w*)U*({?7R$v8*y1;p5shPV)}x zL8-7r;*r(b;5m7hnS~JP(HfT^mylIpZZ>?`b|oXk$-jHPAiWkG35%Iz5+Efjs>$kG*@C)%qJ>M`@xM|%<=k}fZ(d*V5$ozb;)V#l`2&Wlf&zSL_E z*lZsWQSI3X^x}?T+4t>0b0cR<91lPGY(-M)d_`weiT@c{{Ddtw=qt^3rVMKM{bSJ! zOhZq6oDK9W-_4OfSLn}<*9zZhDncz;DS{_cFhzhdD~>g&b?{jo1u0_jI(V92K~x^VwC;MT5xMP}{; zEfji8Yu8#S-z>xH3RY*srxSxTiceZ$@6)w4pLMWB?=rMWn0m|_Y}+T6h^rXEPEJXa zy*F%OU4{qiI!*+oSZX3|GiryXWjXFU?_h-udW%37(^wOqe_OqujY8gRwdto{YD=0+ zZ?+0~WK&&sGfnURa;YuOb}bIKRPB58S4*^QcsAhC?GG0zaNMteiuI$)7W?qfC=XJG z26*%kQ|1oDvj`ZB>q_nQ(ol}Uq7yZAcr?b;+1pS`kC0Gmsk9vpYe7Vq+TrB zd+61AxO;S`&4EQ$uy=i@6`ExLS`@05l^na#h&`znHB~LJLC-2K)B8r}=s!Br<7PR% zCxTos>*@W?AJB`P3^-?HdLQ+36wnUSz@wJWjmH)`;vq&SefdrPbg;X7<@pfdd8>v{;C3lEf$J!(xU(7* zcT3swSu?{1H$Bg@JUECmWGJ+udyEPC!F#VumfQnSi@xce=YqhMUO6wtcCW7mJlcc! zp3{yM(EadwQ3=ux*@9bIZi!(#_TKe5=y(d7!)hm$J~EsR;F8aq)(BY;MiG)T#SsSQ z?9w(&j1IR#gBDI49$00D4fE2^WZ#1hsR0z_$3Ixku8v1_ z)}%(YS>d67pB%H7RSKMW?eM9?91na=+;k~x@MwOFo8WB%b}PR7^AT!n&|6hYKS@fG zhZ9T9?a59^f)RIS_|a7&AiT&VSE+6#G^*XnJL*dh8>IFycfb9Nw0Pv^oM@@X1bytP z#VbV*BVf5%kHLsyF?g|Q-#-;e3SgsN4jyH?rmw5Oz4c`4iq-9K^TDP8y&!DSzdau! zGa<4a&XgW7eA%lBF$VYcjdPkoz&$sX7#;=GCtTHbMZX0arFE#6CeIWb^rD-V?TL$X z`$whsKO3Ago9X?csqN1M{f~olx_^lDOU9aTX@E!1&dYv#vN!`x4!^nR`F1?)6q4Mk zD1k@Mos-c$+ZqjTcEqczZNfv7+^Wf70(Sa+k+@Y<*r4~ww|f_QO$v-1QrpG4M1XZs z$?of=ykM;ULCPj>8T88xen{EK2D{&XQ1RFJh!n24bVwwt8WZ$r&2`S6Qf0{LX7|=O zS&h)eZardTL;5a{dXXMqc4?TOb!>;z&R+(uJjE8hWpHY`xUVVjBz7MT+o%pg1M-(X zkDCHh!@H*Y+%{nQoNF{vYA2eL!FnTe!6t0bFZJ!zy?&T9LITP7)mq%NE~5 zy_du;(ji3hm&5PX`3Sg|bFEG?FG+?)E8Y&67~nydp=H%!VLX}~xnSfh0sGQOKW}YH zY|uOJGStZ4FAnbMG(0-we89E0@B_|%33#V08M{fz16M4aq_$UnM*5pXgvf1%5$!rH z+?LV@n4mA^7ivgw1-PMUF_2Hmi})*3GsfapbIdr!^h0P|o!|*nfHzxavm-Vzm>jpA2&N-YJcly4Oma9ciIQC^5!AUAw&RR(o zI8QfTvZ&7pdIM}K@F^VV)eHFItF@caGZ|xh$}sk55ntnC>C(x(S;9h4&HAs zfvr`}>1$VILGvlA`c49N=~vl1s^?*Yp6Tr^JH31!*q)cYztM*UvV_+p^C0w4px#{+ zXh#Euyz&=H@MwsMty)}r4q9g>N+T?|tVHx+)$mNZYeqsFwPv2p&Pn9j zwyXDz&BqYaq!W_|s$L<5UW&ms3(_$`f2lHBKdWZl56DT6mF*OzkH7`Qm3EEYrs{wP zTb{Zn+0WfwZ>H}XX-O`OuxIzv1AevGa;Y*CXz$)QuDC`ELRT1;&1W1!+D?)6_zD=I zA6NK{*i~p?gMMvvr+e1p-M^efgM@yj>HTb?r~d`|nWpzYLqDVR{@J^xlV@it$w`kK5YXWvJ{WrnM@&_} ztFK^!zA(Knj4f5?m*^8eklXOx(*sTKY|~}$+ToG#!=ewju|==g9nIp|rVZAs9{Eb1 z*$8X)lxCLdYr?5D-s(J?$C0JS`0g!CS&y#TGPFT}MGYJD^8!R{l!L7Q(dcor)MYmv z{j7|=pN{?~gL72WISduz?x0OOq|X|yBwk8ee(b~nIfCpnDLtLa>(A4Z&M zjg(#(yhij+46xiR?LanY1jh#AE@OiJ5Yxno1LMDr^xc8i?y@A{5x_Glafh>a6zfUv#MDQ_GC6Yi@1`*jftmL2P1V+nv0x|_bf)R zLGREsdD5@J;$L26H(P}~ia99c(USdngUi|KrzdWkG+XfORPTwmxgw?%J*dXHvX z1(T{;)Ow@w!KLDaVYZs~?b}!2;fs&n!YGwfFmfS#-DxBf*4_%Y`qEkrhUfQZ8WXUG zAHI#NBf|#$yEx%tI#nuY4ePnHV?7R*@3P)?>e5H#=~FbFsSO7T=COC}7rsZ{S3NUy z2!D>`s%y5AA3lo-`YJDN50xGIKcLSD=v%${4C(glqh3!RY11?7bU?0?=5l@0eoWDe zR(4#p?^lDaFmC7N0xMzRDU&*@4~nS!K=Fe0S{uM>2eFaET8Pam93S<;$^I@}#pkQ2hA^`{fh9~Z^LL#5F|#f!zz zVsAd)#fOJ0%dSU1xtWDJ5&B(ja?;|>I2cSbD=A$4896+8%}PXN z5INp@T7US+5aO?MR(x$uf_&_0DX$GV$HUHbfl-6#V$Kiq@TSmyQxTzGgbW_y>?{KxY)T`G_~)6 zQ=R}G4ep%`uk{1^*y7JzLcw)kU$+z*HN45t2{o~TW;X}1ML*mo@V0?p84RwJKlpfF z5YPif2bT{>p|l@!>U|Tnz+Un|T6KsT%C~S;=Qa~5Y|uA-WN%Ow+47G@FE9t`J?m_z z_k?KwJ$iKS_;c62wdkJ4`z2?1@#y7tE=zJkWKULLe@K7cALP%jQY+e#2J0wWD0r9Q zp)f~g=okTeG~FILDU8s&`+Fw)u)jzA5;xz~#1A4v?4qB&9=9Xm4Bp0j*m@CNWlo-c z+2_daE_IRo>ov%^fs0*bPrWfgZ+9h>JTzG2NA!u#w2KcketkVuwI`VNK_}d}OnpjP zaR5{F6+=qRrj80A>XOyQC9)i>bVsZ6C?(K6CvSQ0KeQG~*7)9jw^9YQ*-?x3R<6PZ z{U`m#(*nsR{}Mf^ubznY@BR+vY13#EPaeb*01^Cb%zIC|%8m%$bQWjMVVD9>nh|&A zGw0Z@{R4bCXiP`%AA|(g`IbOFRo=_~Wq33rZzE$G4<5q99JFlb<59Pc@%B1OJnY%I zj%yPEd6ZD|&3D&g(5DR?%#0YJ7mmC^JHYuC*&fE&)0+AkaS$jy5*^=+c%GQY{YbhI z@fhdrRxv0??!PGy+c{o@44OpxbPu{?f_^hyeYgpY+Aq_4vnR)5lefJ1`sW-=<6bl# z!4`ee$J44?cFMxLd{!1EQ7+J0m0D(dSsGR;9WdT{a1Hu_+v@{212Y7#*ibGWCx#9B zWwolW?Oz-JPel47a}ept<}lK4?Uyb-uUm_rWb}Cz#fe9?cUCC9AVl=rNrcK)T;v8mvrR>mC_!K=W;c&W}bE_13;q)3a3>xW6#Q zirTTY(LTIMsBR~ct7}cay08OT zcJBESSExe9=sXRU*VZAz_u3@y7?vTzr-~IWaiB^dK6CV)y`95RT| zk_RfwxQ%9yR->n!kpq$P{Meu`wWWBtF-rF@=t(o-lF*YkI8HpAzb3t@IlTx?m#cBpB~jaPIf!eW@Z!kq~b!nM8Y6CDYEo#G#qUGpPBmJY{egbggNcAA~h$MQDu|19Jxjfpt zw0mo?6fZXD)mV43eGu0Fmt*f|YYObyiuBBLkly2FIlccj_WswA{?_@sPXn3p=#I~w z=WAESj z)|4An`Q;HJR_Zi#v$PqxxoMr~r)U((=Z;heVJJqtE3EysC+;HhUEy-bu3HFRQA~Y6 zdkZG$F%Ooae@cE*@k*qZjk~vDWjZu>m|`91;nw3g9jV*EE3jbEtHx+jr2tPM5v@2 zU3~d3xx>x$NKcCLGg-cm^rR?HL{Hr3nMQh2lqW`f94=9nggyd7e0Rr}f*hp?Us|6q zbl7_^K4!(ke)s-W13Qb*4aaV@dJahEZ|h-_iRG+DZRSl1-y=l$97*0K`52*>truDw z!Bvk8%LwBNx0WDjUA9*xneHKaZysx-ORN9A%b`dXYGGmi|EVgmkWhv zA}a#Cb7d>D5Hx&Ma_hWwBqmTBpXi)|n6LP;Cg=k_Tf_WbbBB7G) z@t>rHh%A_==v)6U#@$%1C3NG=^}p@ziW1q>Qof+#)3N9lHuLRWk4QGLh6 zj1Bs;?xzJTJs_p)RD#n}7VNbkR&IcMhS{dY6xOpEk7R_oU7r6*wKDpX)|yp2Z5 zG^@O3z{4Fq?n|4ZO2Hz)afq4<4?X$=4vzOT(f(JP^PLFTi#YtB@5TtdcL4*FuWvq5 zafl6V6}gWzIAk8l(~3t_YO0*ZixQBs_{Q6DDX~aBn~L|HqVvc(>R@jETdJ6#4@(is z;JEYa!8wA(QCTlS+kkByn`QO=R}k`v`8fGyY|(dw)3dzS;ewbaWkG9mKOV@~ zV0*uc^tf4w^b*q|{qID1;zBho(o-}~i}cf?yvE(dcFy*B=rcCPu^ro8!A!7`WecG~ zzu(=E#*_|^N_1ww;cKdZo*nshW(43$>@E+tVgUUw*Q;}sWQOzKM(QW8IXuFpAP+c% zK6$lWM&7BMWP0Thhmb#HOgu$(9+6o8@KEAsIRwjnqgkzWGW^E3)V|C5d7)hTv#-ya z6usqga$W&hZp*F7s=0?gecwpSVeQum`J&=*g&wgz6{|ApT_@0}=N8T^WMORBYy&4X1h7$G-oXYfgpx-NT zXBRDDWS$6l!dc~VNbAm8Y)XLMYR>Cogf;kEDg_5cGm(I-`s5n=1SF=7e(MGG7^E*} z=jkVJqmbP52kIOt&mcTImQ`(eEQ=|44su-gX-1W=z>`8=#`ns$etpjT0&FY{dSKw* zde5aL|2N<%ILbZ!-V1|-nBEPVx&>&1%gdpX$j^v!$RiIHW*$`Be^?>io)$as#B0T$ zdL=mDl92u-c;c6r)Rv$8jY$8q2v0q&E&uU1wnvQcbW;&Ncv^%fMf>0VkYuLX@>J8> z^7Ov}pJoa?P9j+S&$j%a%Gb6$5&ty!UnK8-R7*MwN1j>fJgD9SXRBMDjnLvjR9)ED ziM1RpxaU=nEmVpI)r|ZB-b&l_B`Y-^b=|2`5Jo8GiTjLp>&)SafOu@UR;5Iqi@A=( zv!CzGFONkSjPi~i_liP}IrxoAu8u%h_D#HdiVH@r*WTpqWeP$NwrJ!Pr?!24e$ye*mtkoy+*wO;IzXIsW!zGd(o(SzaTnhc?DOJ#sMQP zi1IKq&D@;`h+j~_Hv>9--LgNe+*3@Q07&48X#ei-Aa0K|`wsEsK{}DlR^@&=_}Qx5 zFP?U*>RGAW|1ltE>Q>d$+VhSI@xhPxN264nOj}kEigWHw&N^oa-EuAQYJTH{zWCcu z&)2OZpuV(zq6U965#-$pl-%aw;nqSzc8yVczN5p9Ph!Cp#Qby4BIRXiNU8XZ4wV~W zh+qiwv15Y%gzmcB!ShMS5Th*D#L|=Ah^h4vrMq!+hkg3KMMn$o-roHUJ?ZhGXP@O# z!`>fW4_znJ#hr|8d;am7g}(dMm?0!*aP?!Aek6|iQa`sZJF2uH=1PP-EiB{AZb|7~ z3}>XC+H_o_#HNTRoxR@oJ;?l{(NDkG#cZJ`-Rxr4(35U{!9@o^AV(PpJlc{Ih@0Gv= zy_0H4s=A5h54ejKEd1!@`6GI}k)V^Ux!9sNu>8Pc!OjE*c8ML0mwJ)r^~?L;-e*PW z*QaLit)&8*59sjPYb-D%&b>d!iX6ox#n+`XI9@m+_R|!fh<^H*`^n#c|9$122x9tZ zyx&dnNkzQP)6#sk8K(H8?z+X(-ZyRrDgM{)x<4X4UgggezYwOU_=fWN7PoPDuyYYS zY|do}XX%*Q9SHCfpU_G9cUPh5cSQ~53BU!NFW>LGT8uujqd2ZbiAVjsPc1MJDg+gs zGfvWToZ_Q5qK-s`UPPLeM`SABg(J^a9NC+3EC@-A_kY{-`3R!Q|AvuG`Ut|v-KBJ3 z^e|$6NS^Av*EWP^H+|NTD4{t`@%f$0kM7L;R=xKO{m9`I)B(aC(f2v-euJ+j$skLQ z|CZuY9j*McpUy4KL6#if)P0Lck5YYdmpZ@bKwC#zQW}|KYv+6x*2A!G0C(VyX z^_zY5KfuQoeCwyj$*WF>-(JAa+^bxS($HSAWhplSmcr z?zW)(LTKtcd zTPqWRlRl;*^2j}SNvqxuqE)E=>b9B{Wku*>(I%r+1mLA(mkv1*;JX*zV*2s`g=-fE zE{M)C{N7`<2kTo;BQ*6IVtpC`$S2DxG?x(RlP-s&VR22SKbsH zx)x3cH%=U1%D!rB?%)%@pbV*1j!ehJ|0gEMoQ?MV_k(l!U0NWa}%dkl$_vvdh%7%Ujl70SDCn+}bzT6fFB^W)rMCoRM;_R&4X z{HjR;|FV5g8mRN%GBR&^7aj>E|5WxelMe}|uD0Wu0#7=@{O)jEoO6WwAJ^3r=fEdk zX}7SL0Dkw^tL>ofbf<-I1%zhJ57i|1hZcb^8{G)#U03QJp5IfAPFCEZxw^X+mD$lw zn?OK*+EY$H^5PxHRX9tTjuHC6y%sk=7zH7ZhEEG+FdsuS7ir3thVDZc?vH+%$F&#v zfUocO*s%w(_ZIg&m}!Mj?_5jX61pA}^g#kbk_-n(Q@)LQB?N@)CTVoR8*aV=y8dr% zd#ek%O>aK-0_A8Ji-Rz>x3|)`vYkxbjs-+FP}qlCj~09hvvJAHr9xGngIfhR6hWPcX+ zW_pP&@nont1)sPOkHhb_?f;xiW2qWWbidZ0m)H`|ErgVxbTdm`lP9U-4SC+?fpr%O9SXi3w)je2F$_FeGee+kVAd5dZ{_W|Rs z+(Fj6-->xljIQ6=j_U*I7mS+Co3Tmp<%3U-;erlL;`zvKZ%LRpw?)0og=yaN$GHCGwe_T~xBsSUJ}KIh zQhQPxpK%rj?2y{^lvB~(Xr=>pz83Td;0bMe*Yn?I(pfHo+pbM$+p}*fN-}HnK$Fia zxh){TU-ss{$k4?~)at5|Fc$&(=DHd!mU(!Tu{KF4wz3Gl?7Y&F@(>F9RZY}6=D2My z!lGqKWABaZ@NehV33W&2CmE*E-L*wxwp+bXBh3A!e4mu8si1y^lf8%swWR{;7=8+ruvq;qJ!7 z0ZSZ=m|1JxRv4VywteB6mxl&Dxc?{8eB2zQ`SWHy&Hpy3hVytWtF?U@c${0ZCaft4 zT8};JdrTO8$7B?|&~igHI^cV6A*XdM>eOlIY)!x&t#u&m%#s2)cX3l4XLSW8Y5teo zVTIHfPejdgn0>9kD-yP6MN)XGC1UV_`+bkvcI1iuZNmUj6C}uZ^s?)kjmS9qv_uC0 zlQbWf78T}JX!I3z(81>ORifRHp&f5Zo<~aON4=K0s@U?8y@qSL36va~*xu%fdvNzI zBN+w|Jm$x@p6*?N-H@oK^kE!&UWdPDcybVt4S93LKy$d@b!On=dBH7npXL+4oN&w7 z=eRbUo~V+{Vqpye_-xsU=Lv+Uo*b2|z)XNHgkRvq|GEr}_vz6t z!FW8r?0geujm!fGJ`uNLRk{b~hPl7$_b>)pxaOt|ai|RkyH`0=z_sXqE zgAQ(&CO&(uGg#nQI8ooUmJB^>-LN%-?J2U8L(VwPzHjcK|K)N}=sNV}I@>=EJl!0C zmzibo#Pt4G;Q2&q4H{Lm&|r7_{8L9g(bD6F^v#57ebpns}jX4QjC6Rln9X^!-GXv)PltrRqsD=BwsERu}6YM)~w6l zXoVzcb!f@n+K60Nx+S1+$pCqKQ)~TvN?l|H$4RHxvFeC99%VnNKdOvLcOpmH{^v5A zzxBVXX0K2=@d_lGNAh6 zt>bgSP(p!VAMYcCW*==z+@WD4A+m3mb=ga7y6l33<}YTUV*a<6*%A@V6g{anFdOKJ zQ64u1Uu~*#&ppEriCD4cm}<-8ezS7_m$rQR33Z&xiM3ZJOZEO3ic4rksq|d@iaw*Q zV9UPvGH{N&PPevH8&&Dp%(RYx{=-2wh6F-IUN<^{hJiRJ$EL!cSfM{Vq+lnTnGL>1 z=@kALO@+PDNV;yGV8LO{%uUq;&{4iOY=7Tt z&@fhUq7cQlpFZMo;Lhjy^T6O(vz0{TtAcCC`TOp>tS$I|?VSlU6@UN7uU$e4Wobbo zOUW*o@7PNyL|RmeNQCSXNw!p!Eu}1xq-=?b%-EGIN!ILJmMj%Xss1zli08LF&r>n| zJLkFg-1BwkUUTovxvzPDw(sYQY?4T9+WZB1D09BQ=5RMcEA;$ysNZ{HpkMS_R9!i1 z(SJ1UKg^=i|Lo+Osj57>8K(WJSx|bsDvx7Q>1igx{28UktMW*l0h2ATCyR7uD1BVI zN;o?PZ&{sO=^4W-uq;Q@;se|dVPlI(rUVS~MJ95E^ceIVks5KXqyA`D;6wdlEXwa@ zFSmq+sN$TR%2Yl%2SD)>k7Gyn?E*t$V%eNP9kjPwHED&agJwP6E%&okzybQI8+RmC z0R1D^p-tH%geX0Eb`jKUfUB*$;hG+|mbnP1~c%DRHTOlGuQN=UGbp};n%&KWzc&U86J}6 zRA{LJ$>uVtE?M(>cV`m@~}V#UW9erxkye-6xhhS@gn|A75p%=>Vy7AH#6 zVwgIpDM`%Q4h2I^YM~hPtZJ!2c^LHPoUaGZ#}xhW<(tQMVUUmJjb@A}GPwo` zQhFA((?#AFcY=gN^4qs8(*ryA-5J}*s03Cx)s)ki05Ghqm8Z=h4J`dh7FK)$z@y*7 zxHp}f5a`9j-e&VQ>rE^DF|+u9kskPx>5xVB`5ZX?LXZL1>jAjJGvG#Z9O>IN;yz-A3E^=`rprlzID>f#~-r=J;h`*4Tc$0ZGQ%OO!nnTuP`&;>8y&o z{*7LaKOVXN(-D?*hJzEg+OcGV(P`Gq`+zud(IPi zV&HIaa^>=uYk-H}!unFHg@izVU+gZq*p=!0d@@kn%jD7vzc0*KJTle*=USC@me3Bu zx%)g7*yLWp9jR5O)rG`D|0b^6&w1hla8<0C$Gf5x5qj+9({w5nkwl-=EPOVCFiN|7 z3wG8bPl70tUk|?|272dMX_1xtX@5JPe*PTz^s=+<)8o+p8=szBFl$c`2645Xze?a< zM|5Y?o(5J7dJ$WBsxrKbj=R!p8tiqQN9R00sRyD5#JJ}j#x4S4k11E-0lptR-0wOs zo{XoNs&uqLQ0|K__I#SaJH$Hc!k85B2$X2sQobH6b)p%ezqrF2>IO@)EQov4DV z>FtF0^krJLUW`f8Ir=br+Q{Rtd*P1fJ&OnW{_4{^AxC+%4-)Is2XVP&UfcT~EHOE) zF|w=)NeN7LR;(C8q#lMn5fOd>-ZvFAG@fb#0`sX;fkO>3&?`l<+*aj8Y7;-XCOJbRQcUQoLeOUX^7g=|@kL4B4!+eQY1ym^^5T8Lc)S~yRgU4(6a zYD!_1o?Al9lGVCrmZ%D902j8`)f==nf;awpO%boeK+^%XW!^i5K>VeeJQiyKaKoVd zec=Qb=(H69;?Wd@D8240*@0spwZ6gKOh-T7Bi;uGCJZj>c5Z{4E$@>Hi4DR+jRJ0_ z&tAdG0*ggGMu=7VqN7gpo8ONDH&LG62>Av?@6$xZHQP&wY6UlKSivxIWUIg3wq;dF zv=?W+^rd!Upg-pkP{ET#@!RF;=g&c&UUtSro7u?IgYaLVL^ae%j&<@+SgvlU%J_L>ye@c~y8Gw!Kf;7&%7}^xa1M?Ctjz z!PYpyCh!^pt3!>~M;5IH$k-Kbc5iMVD_zQSVFNe#FzW7M9yJf7w4LiT;T|Fc`gv<9 z=*}2SPqc|KIcq@n1cSa;U(k5_AT09sPNaT#2ONE}FLukP0hl5yuG(@xvCy-%=W&wv zy#{un8bb$6s}aA2y783K{YcJ|FIT!HFCwK5C)Ag5Rw8yMCBO6uwh{wR1l4JQ3bIR7`6EhOcbK}nWat)*mEEC7zbK{pkK-0?`T&b0rs-^yR$^E1qW>(b=L-PfH=Syf~c$j zDhqvAzk9+6QdC;C_idyE5)Wix%adh^@o-(s0$p^cjEyM+27SXJwm1D4)VsC1 zZ&BcoZ+KuYx1bQ^HI=fJ!pwV%>_f@8;{V{`e%E=?Pz<$x(7O(#$7Y>ATC*BFyR0pu zAI%D6dJM0$d9nkm(yK>{s;B`=#6oJ>rDT9j)?%5EJa$Xa}x}?S`aBtpDXT$abSay zC&$r`Pe9OyDg z@>)O#wR}#YGlD_AiXxu12HU<*BYh7J{l3uaC(nu9MvM25?H0gZuZr4$6*mzBJrK0t zQ0*@ST+f}UIw`;l=zUE!?Wz|73qw{d?lczQl$~4P@_`IE74k1drN}{;m-P6;z9K@P zuQH1kVtl{(8{9>O<;r5?{jf~T*>~cv{tCUg&lXWmH)5f0;lBOyWmXp$wp(`f(b!|e ztWsdHvq&RSay-KO$#5)iPwsa*Qd10`oHk2x^vEX$`ojlv(~IA}`LC4z|69i#(j@p( zRe2uYyR#Yl{IgF_F`1{IEua2tt{%(NlSnDw`s4XzIZOoac>pWYFU?HV(L#eH9etWG zs2_T^#o<$GS1~QEF2_bDpT*W#B_6yr)VpIF_WTLAjf3y~M}$Bhp)aMw=*S0L8JT?w z4Y@#~1X8JB!~i1p3dJzwQi1sw<*Rb!$borS&B8#+caZ!%#Q~a|&k2Fv({M+4cI~uz zf9PR;e=grEII{R9Ll-g#8`YiImSp}CZj3*oTTD3!Z&!}CyL^aP=r0PnfisaEV0^bQ z?P{B1MBPKKk14$gsm^G9ve5brGNEQky_Gr#S!AQc{vx=Xn8ujLHtty^p7!_9Pc^cf zs=NOY@Ml6lbpgSl$HDm7-uL7)`rq?TPO+u@$ys*fGZwX6Jh^>p{E^=GBvRpI|9i~5 z?`-+H+Ks!$wAv@+^HE`{4KrMIIlPGXGR%7{mb2bH3l@D-da?`yTzcKx?%;tk)ZF~O zL|u>bqiSX^nxbwofFelc}9>#^T+C`!kqUf-dti^b#o3Q`7^;YY#(o6Ee5`@xn{! zSFB+D_a^>S#2p8Jw$%P-XWB8(F!5Wk#}e#ACa(uSHPeoB(!~GTxpMNeesrc?iTOs2 zuS$=b_=gacKU(m0D}D>+(YBMP=@g?8%IbV4FnDh(Y}ZS|E&>kQ$(~-sVg3kF<52Hh z%&btWTMiph4yPCFy^UJZ@Mklgs~|+R`WrV?c{?lztp$vmz4q|~j|IF}!ZDNn`ZiIs z^ByGN<5XPs+;|LnDs|Ft-YpVHG-!IdbLS`_YM<87SGlKd=Qo%KoGdMlo_+=Qz74G>dhNzmlbHo-o44ebG8QNcf({B!%=rfzJHi|>_jwE^~0vKSqyJY-yYmnrY!I>gV+ z=v6idK@Q|NjJ{OJL^9aVU)6mcG55=V;pVWRf{Ce_9@Mo@e z^Rp&C`J``;AM?_m?R}4%_?X&%_3d%B=W6?7rJG6U=eZA=ZcV@PkQEiyZdC9=*ON&d zO~#;KvuA#1EC#!capUei*vRtk36ph00qUiGf)|hIpIf^+HAp3&5a>g~s4ayumH;CU zqcO$SCE$5b!1D7iK0!6Xi@06a^g-9kwjX`BxF1>%8Cb4dyFbG8yWNUcu$dEe@X7nuh$6F`MhUZwp-z3q5VTuU@%FBk+&W-n)%97l~*7 zWV-y;9poCk=I$EdFi-;(omBIFj6`sM7Q0}H5Ci?0t0x&p>iYg;FmEdIK3lo^*_>WG zm8<`e_SaMU{JW|=`WY6$>{R7_QVQ0eZhvj;_A-D0OWTcEs9$Cq_Cz(ppJ_*6(5tQ* zm%|Hijt|7zoyNuzw*3c&1Q5?8+2!6HOWx~T@5SsUD$f$HjNXZuf!fL2@s z)%=0?kjRKb>f7|YE^MvC~w^uj#e(#6JggRrd3JsB;C*g5q*ey&pNFY3VyolB>DI}xNH zEw@MCstT!Q?xbwV@I*#0U%vQ?`5tm_l-ICH#+ewuo=hWR#D%}-AC(^e0aN{i+W%SU zXKTTWDW%8veE@sVO)g_iISWdUUks;Qg%v-rd^PQ#l^$QKp2UR*@n8~7hpKz(8@XF?5h=wVl9pLHUg_pb^Y{qFGysplX6w2A zr`2A=czIM>!XO-|pE9M;i- zwH<5JJRS{0zA7i**(NkX@d+)Q=W80F;Y7ENV#ij9{yLM{!i9~5s6G20?*|H@(`~c1 zs23QKi4VZttK(jQzbVq;QVQ+a#Yt=wU${jtE>Ni&s4q;@BR}*239mZrRQ%u;!UI<8 zg{E8pm&84TCs@)!sqclGy1jYCr0yCvy?H?!-0?rZ{ulW5F(T}z^ka*p#hAklVz#-V zDYsy6I}B>G9(#9dyf~+%BwQXFjW;%Yyp&yljxuN0;}QJZ6jpb|a)|NkY2T4zZzoUz zi$fOC>4p^G7<`WBT6{mWX=PgL3W{22(8GNdwO%bG*XK22Z&Cvp>1I^0iIo!qJ^%du zZFgL!n`ta|-6PAVI{*vDmF#w@nuh+_8ve1c)kD9;um2(?$>6L~1s01|e>_cn57{xi zA)bxxCerLCkZ+#i51xPgz}`-li_B~Dd!<^NMvPyd;+-4j8PoQU&`ZgrO22f5+RqB~Ulo2*?I)oZj@>*TZ*q--B z`{o!4Us>F*kF*WLVPBBPd+G*DDr&LpvDpIbeLr}(-*sMmHQo=MEg=Ijj2RtS9ABUd zy8C6Bi@TxwT3Z9v`kp~&wUnL>L_dQ#j7{2@3oD_^$HhJ*mJpM#S1z#OJ)=1dy^(*g z&9LnN9Keez1q=+pvh4mE;i7+roml84;hH7B4wXPM|2ga7rn|^!ZhfqHMjW!%ts*?8 z5<$jKlB~NwoB?h8+a!d8Qiy?GP2Ok^Pif2ll>QeeJ;j3U-TfLR(&ifU5;Vb1=$?vV zj(r%+#uxqW7~reha7y2k#6~Lkg?s$t0#u#%`EDG3Hy5qvzJbIjy;mV`#Tv;^kosNG zM=vPGp$v<<9inJEq!_`J_jrl9~LNhBtEtR1T>Ds?s|C#*`FvX^XyzK z;={?>ETZTOA~w^row%2Us3d(>uE|O!2KrA%izK$+YW!^@_;hm+!RMUi2>$eJyMK+~ z7liOR+_Wr$?{5HzJG%qgy;62q4}-otaOq||N3RvWwon5bZl+9f+&b$;? zOo-K)Ry*xPDk0DZEa7^8IA-UABYFbs94xP4WObva4CI;ydLB;qszY^e4Ce zTJyf_$~M)^vBzLvYMNFybu#i;U~Q&KO%$@XuG%BzTm~|JWWs^_tp|8kAZNm}{U$L= zpLoKYTvM<1xA*DsTjBTaY{owS&Zj4zb)Vi^fkZm=ZPi*kM`xS6osVHMhR6Oddyb;F zK3o*XqV;fZ-mNuwDsSV>T|9WtJk|T^ML6^p#^o7>sZlVeb@(o#X4|nRyuZ7PXBa9S zvIaVeeURru0cq{p=a9aYeS+@Q9O!tA&TvX}4z%Jtc`pMw3R&niUeQ^fMhNt#cP$iU z`Zd2nT|%;n;q4xP9bU+C`{@kA4n!w4gVqk z>>Bm6ovWuOo6gna<~`X@u2C;LxsRz_J$~Hy_Z9i6T)n*fw_JVBStSxlLh*hTUX%X^ z_zU)!3F7Iu2{NwTlAP8kGljVWjDg?2lkx$;wtslLb`=IZ98OiNqMwlu`v_B5^EEONp_o+y{F6}Ckg31{ucBdm}Ka!z3Z*1mH_37Dr5M*3wnBx%6;+2R_Mg`%=l9BQfPv+o#r~ofOHxC+TT@YK<4Y% ze4C`wAQth0C-3&$Cq(VJINroj)=i=W`o`$4Ug=g&h2~CjRx$ zUrzU!=7X~kcTtka4J0IF?NYr>mk`sT9W+@jDab($A%4j$7a-bzDn>Fz6Jz2JtQI?H zD^&SEzy25a^;t!wT0@6|QMKDNTVC!pMzsX*cZy>$TgvNrU?aM{qOu)tmRYfDtpN_b z-m{f{ACDEF7aADX5(RyDg8wr$Qaf~uf#bAy&J<%7J2>B;{$t3DDs)k!s?n{62hsz)@W~+BUn`29D89nk{IZ- zE|kjeqpJ8Vm40S0Z?^n;q1pE9{{{Mg_3OPfTz6W&ehg=cCkz9<1LzTd=Haz?fllm> zo7jk%`+k+2*cegADu+X#YQEKd-NH!t8L2E4pMdj&hx=XU<*53G^g*{KNVSP`hqhS_ zR6u#|YH&jyR2tNGg6(!9w0B=G)Mk(baj2zyt~9*~<%k=|D{@5;(=X5QYL@=WY3NyK z5^SGu9E2kkv`>b|48n(fUMXeSO+$ZPiCfU|2(f;BqIuLik5UxOPYO>E_;L+#b9#EL zAte|QOp|k68FCBBNEti3%h>^J;MX}8%oIXQ1fMxiBa)e>^tXiGmSXnJd(y0%_s5j} z$-F-qkBH*wxAxZS6-(U>(Q*ztdqE8PtID60uo1lxRdpOs(+N^SD{$z^A2XzOc%-7~ zksV*>K`(vQdMjr~6;y6N}w_BGi-Lz>M`l^c4Z$}IU;ngI&T^}D{qd{_%+3{bi^xaV` zeAJH~f{na78xlpMk%@IS7klpoBI-28%O%p{kypU>oa|#85G^;)la?ZE?vS74Hf0B*!R=?KTt~p z1QY-O00;oEV}nhoQxZGK9RL7EGywo10000)VRT_kZ((F*a!qe!Wo%qbZ((F*a$HDd zVPs`;E_8Twon2jXTQ`z@)>iF*2`OokdCb`BDOV-2ckJvwrf7?fSu0Tub^D;vXf(bXjh{B3wvRV|KRmyDd;4(nkL~B}{_~$VKR*1i*?--IsTtKffFfXYls;+neq7;rRaRVRPKBZvL^^|KD!^ z`;o730n_s8AoaeMRXd4JmOPw!toJ?@TQUvEzRr=NHGm-ELTf8V~{55JE$uXo4K z&wp$W+lS_Jo3J7Myg7aTdZvHbJ(NNMjeLhR1-`Gr_wOO2?`~mbz%(nA{c5$o`R|wQ z;m@D8e{3ITp}t?=yt-88X;A0zd(`9j-^blkrJczN#+teCJm-1&_m}6>n{$Oe{PyGi z^VjFY`SXQbDtaQl-G4Y8wwtF3^7{Gn%hNfNH;3&%Uy1>q6>Gn(E{5=Gvwzq&`!~biU!D(77Y-LG2SWSr1@Ys<@nRky|NOR{iZB%D zr|tgt)7RMoG3 zhR4!)UmcSh(f)UCa6{6--UdFQ8Tq_`RA<=x?O{;CrGp*x(0(i0oK#pD=H<)Ot9|&k z*$+(HPZxW6{?=07?%y3A&RN(rHN{Q(;oO~`yv*kBI2q2?kQ*lFQFr(B@>B>PcK;p= zezB$UX-w9Ab%0A1^n@>8j@whu_IMG>eEqL~Z9bod$zEHAiqMs=rnuU9{d}H*^{K{> ztODJ1EzM0n41V*3(_vDQMFf|1`N*WRbeGM?!^_r9UlEvQV0ufBHAIDhn(BzKn32YO zOvBE0$KEn!>>nX%<&oAT*+{DgvZZ;QlenMHGm_Kku={kLM=xDv_o;M?=Ye7Sd+B`_ zRQIW%W+e?n;s~`vYftxw=9yeuYsA{a;6dADF?>H4ZSECavgtd0CT;JGF`V7o^Rn=~ zc0HZfu|ulK3M!2wOkFDovlcM70_IM@+zS|`>?;AY7BII0=1#!e`s5gTH zb!Tv({tOP(p}~QAbjqvg1=OcgUJVE8)hVxryLY-8)-F|Ssb>{i>RQE?`c|=}&Q)xw zcU=#<&Px63deE`O4s<=}*uWm#cAvoqcH#B|8GK+LZoiPh2X^B26FL3l@PW;^{YOqe zIsCn|7j)UUE=%zh-4JSE)dIBo4(3}6g3P))5Sd?bb{GiDz1GUT?zbQoinkHbP}z<` z4GcF7I#!!)N=>ZgnsC`+pj#8u(Mebr6VOxRNQ3pU(8C&;p<12Pa9b;r%&cA} z4nfV-u&8bbO9n-!Fn5r0Uu`x{o4(ZsVzb@cZ8uId1VOFVgo2xj13@R7EeoP zfx^0lmec};l?yGg1&VXnrQ|YLoXsy9kip`7e$j*sPR!IKjrcZ^N|2b$O4{*lC6%CP zi49K9K}ScI$GP~h!obmJ@PpHzt=j#`YS!p~UpwEYQPexVzk{rb+(gXH=stNZLh`nTY=EZsqibcE(rFVBF(E-+}2z)k(dt38(ES@e49v42a^dBn|G3ie4FVvPin^-chlqo z5L{ZK!Fq<-8Nxc6LU>c_-Z^g_AXaBY;3^mZ3J1lPP(eA;_^d*ZCspIVN6A<#Q({aF z^nr5kAY2nb3WrBO#Wee^(`PV^1V-3mME}3pUah-NJ4a~zeK5(NVE9uMIwzr%8)Z`C zdi~frQb?iVVGh{yhKR`o+AY(h{%atjm$D_|Ij^6(qJ-0B%t4W%Xatpgri4`#=pbM; zO{K9Mp+36VIOnA3#ZKARY|6d{Q}(qpJ-6T16>~UU?+73T83UG-6oAKpg#n|N^kn@( zh`}g=<###7p3X6*E=%iKh3coJ*UJY#J?HP`Z?_9#4=*f@%T3DE)lIh%5_y#oEm=-N zc2Fi!X$T7?6xE0MN~bddNa9rYGh}HdfbJB~y$|x1U?LEd(pz$gKu}t5i6sI{sT0fb z1la2zXduuAbJYBKG+dy+nw%njBH^efmKX{W`Sm5a`)&$< zalighZGJlb8%OLW**Vj7kRUN~tqZWVhg}C{j&=>S^S2|ki`!k7&GG%?%VG0)*D(oqwl3)B&A$|06^yd0ceNDR3XaS7@_5?4-5*cI73bsg zY4cb#ws}0e3mga%fWXzj7_#m${mxoPUOHdnRzy~)S7QS=NYgW@*uW{W!3MsO4OVcG zRCEFHg2SYu6NnewCl%d5yy#46bd*gax>g!pWz&d`mPTjUGzquMjP9tNU6U^1iCXI4;|~ZWln2WIith<8o2~3)wA~KF4dCj-HG7oq#yy;O+rd%#t#g!aw_fV=obI@H$+bJ<1q9Mg7M8(LdH}$! zWyyh`wR3vmPtM{AvDS(q22Bxb$IT`L@dgPs1iLe5WXM#3l*yxqn#iMJyF#YIy zPCDO-(*y@}(>m^?D5@Y#FXUf)#v%eQH%N~iYu!+PP=-hkz^hikxa{hMz-aN11eORR ztzRLG+}zK%eK^3fBiMbqwc!8@s{2%LMfa{H-goPbIN{BTW8PHqzLSN>%#Zu%a5`Rv z%ia#T>k!VA~4iQh@rX1K;XOt8kob#L;DG1>x; zt3a~{$W?}F3rwya%|@UAbdhF?jxd2YxhX8d1is{B!wH)MtX`e+YB+Ic0Qxzz_NijUivV+d zt61?Gz+CUT9(0|F`y+7j$J)292OXQ(8&Pb+ZT}g3;wpqr? zA^}30YXlQ8yhJbny({Jf`t@^K1eE@W=lkx=YyZz&z%^RX`kM-yCY(KqqXfqb*8}SVZE$sL}QC(S?q#NG5)?M}vvRc+oDu zF=(_;t3x9?g=*p>26UppDf%BK#dbh=_<<80pc_5W!4(!7nPgV^=LEcD`RhoEoE+;j z;DsYSMtT*54D<`)g%f0~XC66RUtfZ_5Dmh;J3SHI#iw(n+Afm3{Vn&IQ-GL{vAZrT zj${`x^txCv)Eh_;Ewoi+5HP)2V5-OczF`HN{KDnJElz7wI=IuN)!8KHpupdz^pqd5SZ0mWjLp3wuUI%tA%n(%T z%{&DV6+@zY$V{h);(2k8G&45>pjo0>Ta2S@&E%5+6b*;`f_XC4v?Oe(aW-2u_o+ zR+(KdVPABtI>Qch0rBcAJJ1Qlt26CDHxQqhZ8MJr#cuK;neu5ev+i1F`H)Pxxu&Q) zYG>DlWV$Y0b_Oo$lG>r?6S0m@0(sa#bN+%s$^o8y`XqV*0Wf*z;$eJEvy@;!)rx~ zF33bDhZn9_DS9CjnjBudeq(f$pHS!1h}V;hzVZ|5e44~{l#K4Eon1GfzAjzjI!{KI z)DB%Ive18soMaZF>pBEZGBa*I>gk@g2QdxGDpEb&({>*wHyQohOW%2*I)JW28Fc1! zPv3D^bdova%I!8Ry2+);p6=!L85TX|Qe1Pom)~KKbVlqdpX!>^z5LF?qC*?E(!S(g zWO7~FYG0BeB+o+ZNs9XoH4=ELR;uiIX)qB}G>X%RV$y4AEVW z1fH_~fa?)k7)=P`dbIXkkJ^w+9f7$XnJ`PuMgv|)+4MdFN5ScKGz9-9iBk|@Cdd_g zHD&nH5HC@=fg6_2QwgM^2$9i6kVCv4wX7qT9~G&rvL`nc6{)PSCwCNWy-CXtU3fBR zb!AO;0bO-6=**gW(M#4PESn;;nqG92bqUL+$gZO$eGz-Q#wF~URN3{jq(5SB?yS!` zOV~jjx{44!WRBo@#!y~p9z$GgLVgMP%*>4dR975LcN$xFHW+|1&NUL>j6Pk5lz6{7 znEiPE3)HtolG96=OL+8GxGGOsXh(k$w>bcl z6y;bxXp1>AFpu$(I%q3!tK++<63VT-@U@`$DWJOmao$@A5a)q3K-``Y!DB%Iphyus z78C%A3?v8$(T7EOImk-`>o^p?)0eHDRqbF%$v~rmwRfEvwg6XDg>2cg`iTd z5G2WTr-1H#P`$XPCA2_LN(%%fwLnl>3j`&$z*1^qYX~__mTtP_*PNy_}(a{2s2_qJe#k)hfMn6A$Y%H+SS7Wd;M{xO(Jt zGUASor<{(*+c|ssz6T}a!FJA@jBl{~Jti1Tp&oh+A4-uvJZZvbIYJ1Ti#5Iz3X0fB z;~Sx%u)TC*2{C^K5VoLBCQ`S7zInkaGU&5y!_JQ@` zM`(X#*GoYu7GDl_6qI1`gIGe$VgV#r{2-HJbH2WiBZG=mE#B1~7E=s?ECNM~A7li# z1?p$`8iJXXbX2~(Dzvc0o0iX3T5)rMoa5Px;VxaCxYc0pZbr{3gn&Jfm`(4t%&%(L zXXU!o;3h}wx@XkS56#SrZ{48?r`yModj59-e@?HLk{&&p!PIgnxTsIB~S*}oWI!SOY0xKlGFsfsNcNwAX7QEL8^q}i3IKhI|7wbXC2FF^R z9>N15#U>zLc-K*}4Tu-sc~oo!;)QoT6D_~LB(!}esXy6oY2@%{z7^_jrdNa zv8Vin^n4oeJ<)Ler1VF5z2(!0XRpTA@_LgGB{Mch`>*RR@?V!O@$O{C25J9xoj$Lt zhr~H|#wKb1cbz`3OIPeu4ge(V7B4rU6S0C4FVh&#p0hT8jA`=(UcZy07%WTr=pjI8bryL__x(km!jP;Ikk-_;=~{mrYSZ5oOz7Hbog%vh6RM zqP)I!P5ZkhRiHE2{;o+C=nl5OYf=>*+IiG{G|`$KKBITG!s*)d;sk}*d&~WOq5JS! zSA;n0A-vWVA@0tI;Nn*RC~^iDzXCv!k^+j5%FQqZ<*C)Xgr%T7^;(ppVqbXS{+IxA z)a^?if;_c1H=HP$*za(+iIR!s7BV*5Niuc^jZv|+PGVtdV5dlF=3ZC`3Ct5Iu!C^J z1d4QojV`f82yTp%r0$~l9$jh!V?m3NF15iCq2r}peKUCi+?~u1IFe3k2yUZhOkODo=Y$HQ~Jp5lGCP8$>dd%)lSUfPb&-~Udg zWpn=gjufYpE`64Z(Q$`9i%0KJQ8Mn$XW0n3h~_w48nf^&F(Ddo;xr~=^TV>#regEM zwAy5PS}=}5)A3}Bub0J`U<^a1+&J zzVNN}$ujSG?|m;>vof>R%{1tt$k>xHuNE(>f`g!)$UX&X!C7uWp$0@b zZGn3uqVR$R;fb0)3`n)7o?0kRKf^kj1cCaQDO8!l@=7R8OX%IPR;%a83{k|qd$3nE zA`(sL_`$5E3xOp?(9>%yoqR1HQKkZu{5r4w_hY$HG)EVfY7EP>Pi9P)1Pr&d!@4dG zrF+UQ8>2^?#;Qw5qCCd~h%+|U^c`EikYCvgJxRCZb-otguMK7$!HfsZZzZ(Q9W)(B zR&J>&ecdoRCzAef>i%)nSh0?E_TlVJ0+H@|ME%m-s43D{+S#3(_uml;m_-m}U3Z ze0|&`&>~4A%p;fiIa_JM!g}LtIuO0vRZl@iw=LFHPfkX+CE*EsUd2Hvxa_@p1t^@0 zwHUl#cH^X_kjNrQF2aBulFC#94<`+{^a5%qxT`c)|7vc#8WUwnHtKMxKqZz!P%rue za!P9b&KxPnlgw8&E{8_?fwtGZZo|V$BhAg{`>cmp7a3pQeDdn!gAb(?e!zaT`im$mF7Jm9{;@B}Mxn^@1xJ&0|9RaI1T zvU_bz{q+jy?jU8ibmpQ0B)K7vO3Yk)MwgQ|UV?aD_|T_vTDD$>X7j=1+chugSV-MMb0YS0M_zIA?5XJiT*ygTyY39> z^J5`0!Ao(S@CR{4qn3^8kGld=r2?lYZl`2J!npkxZ~G0!-HvY5AG-5pe+`^=?DjIi zVL;<#0#$;0F)S4T!Inv5oU*=0F05s62=qEwWgW}?+MT?3PpqsZEX4>!!c~{c+;9?s zBz#R`bTxEvdeZPtB$w2djb;Of+4FtH%AH5nY$)aGs|uTH*nymc3Q%9O#`RU1LEic< z4#n$*+P+WPf{#Z;1+SSRki99q^;ysvMS+kWxfR)E?pnmhypGc~zSFg;MK!F>o!0b{ zTV;0=R#SQF;YXMJr~;U7p!g0X=w04r zHwt3q$}&eUr&2+GEgF_(deI#^G3vdKS-F2;ES8J%-h+~9VVC9VgcP5zIPq6aQn*lt z!Hu_+yGD5Ru~#Ig()WVj6Sl|l!y6YoH@lf;u$zITL@A9vA^4@K~eF7f>7`sl?BIpf_nxuGNV zgP;tr!)Y%qCeni<2QB3)rRymZ>*CmnuKWQP&w4cHhIz3{ot>NJForD%&|u6)-J zP)OM>nSwN@rrIR3`Y|o?k~Lz)sYO)^TK&}+_gDvhLu;5+Xc5b6oB0Q6kUfH51(cIb zHcrC{mx~io@IMG>;AY|MZi9Anx+5$sY^sE`vG8>C(6n%Y+F5(K?`&JRTyVEUSRu~C z>6u$P0L1QDS0SO6h~Q5xL|pkiTwG<`N>a>XZh_?T@PtBRY0CQHc#FKRDg$*E*9V z-AmR}23(MRk0IKj^pB|M{}?T#VYyNzG2sIk0Fg18v567%>BwE9cnE;0tJ>>ARaEV- ztGOsxvYg|5NoGI*DhuacOw4;lgj&p9dQP|y&^ynN)@(}e@Ba+XO79e6@DR?rF`SjW ze{1DWZzft!UvbywI2;u*dHLivC&Pr^JN0eGnK#W$1!PCB+E$8GBLK4ecQDb86o zzxXVD;3%#q-~KSywkgE(;MVsHf`-pd?`mVbnT!~@=Opw>1Cvq}ZJ!PEA|Gw0+38Cx zc-L0@c+>eo&4av-KGu;R`X=D~K#z&twGmAG2g)~`6!$J(bem)FqaPTWkYL+;azpDP ztt2+;p}+#>`bmMs0?Fz*rvkDUHzHr!OqJ^vbyT)`dAF{HjbC-+kam41VkO%cS}XEO zw%x-ouRCgiWAWO1l3yL5WnG~n1t$qVP8!L-kp^ap8+jL`o2en%%EHpq(ZbEg)Bx$` zX0WHvQ^5ggA~5nYcBKG*94e4cA^pO&vmHbz?f$8l%%M8iRV^!x9+ za^vN1Wdlo#J%+O&Pnsoj((m1lCm^@!&H1=28;)tR>ZX^?!*s$wqGGwNz8Q-QiRlof zD(5)C#V~M*BDhpHTxK0E$8?X-g&=tL{L^*f@nUBkmdQ++8U-JZkj-V~1EI-GXdYwY zA)dX%X;M?WI6DP#5#xWc_os8@_9$pK3)8oMd%&LERFIiocX#G_D=^e$6EOh}zZr3> zPMfJcf|Fq#(M&Q2iHbUgbkIUR^*CNWzcR2 zpO2^olic;r+q+#o{w0Eo-?ERbKkT14RhaU4Q^l@|GQSSImXx>G$lD;?BJQCKWwI{6 z&mp1J_qHQWShzE`^QR}Y)0QrRqDcQs!0e3O7*LJIrQej zp=GCyDdc9xVPahbCK<-9bvNteX&-wa!fEYKi2(gZ2ce?%hi1MB-wRg{ug5OyijGVn zhIc2#PLP3sU%jW1$YqXK zu0mrh;s5dVnWwI4Lby}kMmjx@^1N*Shv>zu2$ zh7?Qa5(LQYJ9N19i0oL90YACRdG0+GBl zivc^x8PO5kx+j)8!uFgeUkn~~iD{=6ZQGU{1KTF@WT)XD4mRy$htfbcFYoR*NY>J+ z)-YH1$4JMpAAtA$uPark#P(V>7GkT_8`Nd+#3>~RA#KoOR)+Dc8^JvhlA(qwN=v}QBwthe!Wo%-oA8l=b(7tV(ZG@b!wl~n7QSwd-h`(yxL9#c3BY&t39r$CkC3F_F0f);$KA{w&dX* z#)++t6P_OTi_6+}&UVfyHD?>NDID$OWZ`Um8l$6QwY6J>L2X!7{|6KKH^)c9R3*v8 zZm!xXMZlqXqJs9vou0aY?b_EiaMSA1k2_ddzm1Y+j3U(tTk-%4@lfd9(BAP4sA(uz z&8~YQakDmBOLQYjYn85=(%mmpBCjT4+py$enaWJOOWeES2|@9-!FmY`g3CO81R-N; z{j(4HKJ>`k*9Mb(_eZhxaKh=E%kr-fL9YjmaD2Aa3VaxEll`g z=%F2vr%4Pa&Z!{!UOGUvV!m&k+8S!O=PNGgdFklrbf^-2dy`c+H$XW4sFjOPNb=e{ zF}`cOg92R5?*VGvlGpODij#BM18IvF76|!2hemvNtIp+loJgDNrnq8h4&BW@{6wfF zypqz~wUCgUnA&oI$EO+>8wd!g0o1q-0Rd%JA(|vsd164^)&<;hI%dCqei=bSq?@}P z+WEA@_>_l)926BO0N_(J0f6ckgdA>5#|>$1XXSx*JBu#(0Q?&%&ludc|KPStd>np3 zeufha|1IukOu(n%SEpw*rw9K|`>R?NpNwA-o{{w<{*L_H?&8-{XHe(JzlQ#0Wrg2& z{1WD@8S|+BzM0=zgP+^ankjnt51aWd8$UIku{$w;%l`jFiLVQO>N$fpME^VVw|&D8 sre_$D*x!d%{LbJ_O$TG9HV6O!00{s902lxO090~eX>Mt5XGv~mZ(LSo za&u{KZZ33qbTia506|_Z08mQ<1QY-O00;oRV}nhL0wNH$X8-`wjR61?0000`a%E&` zV{~tFTupCeWo#~MWqIt~2|QKn`Z(}SrjW=GDnsU>GG)wqkSSB4q(PCXGOLgVWDcc5 z14*W&!4OhpiOgb~qREtmD3X%GfA4ebavJWvC#>H+_w!$$v(C16wXD7N{;ubFpZ8tw z($~Z3uUFU5Bgc{9h%50w;zASp=MOYtQ;&bXjANX7PCNBnbEA=l2F^&8lJE~H2&<;W zzWp0rt*wnL9PO-KH9V{>r~cwEmddH^)lWTgPCcgo>wh+D3k6}dBK|O+uvUT33Ja4L z@aG>50yE+N#N8n$tn&yf6=9v2oSdX4JQ5xz#|Z1+`DM&gzi^!J@4gV$&xDnd@ca|u zag?xr{Eh9=PyJj**wMDB?GqO|!rJ?HHa2aW#P_^->QQ;>*zgio@u_3*cNXFoC&j2z z2Zsw>i^GMh;JQ*YaJjl#IG%axIP)XwxR(7I#J|TvSf3way*Q@b27msJcJ$9Jw4-_w zLyE;|?)hskjtgfsv;pzhPfoU>e|}ZkQS())Zzc-cram{)znlGqcx)K>#V1*xEK@0> z?{h*z=c&kNobfFQ83hUZ-UU}bKL?4uOHiuQbs&+1Z;cwo1Ae1)K?SZF%rnF8n|oEk zT<%+Z(Yc9B3H{2_NhJdO9%_)Xk<0&u+cBU_uMvqBs{oPacMqj%Dq#D)iI2tHcv!|( zk(o}}0C}88)Yiu~&*{FAmSNUL)8>g9c;W;>UP~TKa*bjyW1o#N>li$rcN%zeJ1X&%5T80 zwA0eVu_tTbO4>DXxE@v6ZhFFrfQPWD>#BwRL0TmEE<>^w!srt;Na5)DGk(pWlj>tz zz+VSRLCP2X`kp{yaB*{KXFc?sEPrpbpc(DZr-+1mPf(fz9|MaBk1py9PAL)|f8%Zc z$H#`|L|!XT^>UTNn_jI*IQ1oL0e{Ypv@&>Cq*leP?X`^umymQ&teOnfV zG5kbiyHp5>a8gM|E`NjydD8Qr^KU_G%TU^;Q;+8kKJg2>xDTm3`|{@zpJECtaUn(i zzxz8#ClcaGgm^L`owXYge@QEif+$=KKrI*WX#F{}S;F z3CFfu4TpDZ}d@s%3+&p(*jA_S3LRNb5dM*x?2;aNASAG5n5A=hp zn@+xm4Z1RCno~N!)%b0S@aD&$Vt+yT!m}*!K64U3799mx?Aw_L_8uCGX62Iom2x|e zxPjaQegj#bGjKHp_m-@!5>y@UsV*)`1Usi*O~j}f*f{4!v$DR#WZy{3=$hii(T>%S zX}RTikW~X18aNAlVSEmEKfU8^m%azpg4_$67uTb&c8Y}#+-#Wh&-9Nk=cBM&pSmtR z`(>I>1W);!;D43olhFU}_P$T^X9E6b=vU7GJaM7=KFvROQR92?uiw%>i7sdaJtikP z)s`moCC|#2j#iJ*dchjy<2a8sH!ewEjM?7^s<)d}nX8&m@pB2Z98a3&{!20Km^>fr zb@+0bQN>Y6b7xk?TZl95de2SS3p@-97g3J2LEKX+rz`K$0jC>(HAN-}u-vy}r?QuP zl+Uo%)hL(f{CSL#%wGoMrpYz~+dD{to+eR3OU4|!L*Ny4Ek(Q(6BUatZ z%i!K~8Sg9Mwa~<4SD&TR2rS7*yw|usM89O(Ma~z-WA>T;0e-o0SI|bs=D&camX~YzKgrPw8CUq*6L_2d$8U2^w8W^m4y|5idqpF7N-I@S5 zeEs3U>}2r%G7m9b?TslakFB8;$4~M0+Vq7~|HPM^c24+Ly$`vCVXJP?&#G-H%e*lxzx!3Cnb+#WFA{KLD37wQT7@5Z$ zA8B_1X5ii@ZgLiy^z53;^UGmN5xwY=@kZFwqAqyr*xcivj^3-v=-4xQ6jog|>uKzI z0IY3FC-yPbzzI!_upqPh@N7i4%&e#wrPO#LC7D(-_xOoOs&jj`mde%rGOZ_~C$;6N zeSw*F%Duwh+1O9gdgZD2PFks^j>B(P?}?TBKd?Ki{_#kx z+~*=+tM@x3-ZswbsD~{_DGskKtVcPmq>tY-Yd}vdD+-H|ss|xUkIme7Zlg+pTZcLL zYG9MWc3;ihdh`tMd!w@KdX%-+Og65zeol*cUk13jW0*$ZuGgZlGfGX+v+9_rYG?zz z5Z_7Oc(DTBkF?iZWW5F6nvE=>7taB1!Sge@;pTv&aXYcf=1+$$TPZPHb#jIkFen&TEyYV=`J z#G9wg?!ifW7xFZ?5~#cGL;Xiup<5<7P@icm{P<`QB-t-)VLV-k=D&Y+c&%O$8WPmC z*k+_?Zi{$h9ZS@M1gn3V-cL+nCBpr0i}u7q8*$;EA$rm_iHpwE`&5_#`oBbb^H_Ca zmTKZN=-($tkt@nC9~c)Hgl zgAnEUIu(<`0hbo6A)~PwaID(uE4CV85>aDKhV~X7kwAunVyPqI{J|lGM$aL3!Lq7HpobWM%i1MsdDO#r_3|GfBI-2$sU(1ipbw_e$ZzI$2fAz*NBPr9Ha% zP$1xVCc=fc6R@*B*RKuM!30(edVS*yV^t(TQ~SkB86Q}awqRR&@eYt%sj;;_pKI;RMX$ZvQkm9z2b&bX>f}Yqy^Q5QP4OqcYuC>-#s3-l-%as<)~=svivP2A z{mfGQpS0`0gMYp^k-OIZ7Wzr**#$GtI`j@3Etk>ldeouI_LYD`JKG0l$0;+HKDkYhffuQZl z-MvjE5I!g};@fr`$^+c#Wm5>zK92)w@*@=VK5j02^iU6QeOIp3+)$p=H2PrTGO9(JOIri`+?y66cb+9Lj;grUmI(T6{bXh5{8j9XiMZe&w zLzm}O-Ha#v{qFg6yPs>!E&MljsqtTBB3k zZj_nO5fXZS$F94;?le&T#U}$EUTEL(I`Af%=^x2=wJ#kF7qY&baV2AJ;ip$#6PL;& z4kG#Ym3-2$+}TX?e|KcwbogYm03Y`qeEJ!}|DlpUE!Ce6KW>x$=CuQO^c`a}|H6lv zgfMXPW^Xng9a&z%9C)-48flYg^iI`*9Os%h6-iC7Z9y(5zs95f$Yno=Aw256a#4=v zg1Lo1Uy4>sjH?`I$cOeuFy{e_nnNnobT{Wf{aRQ1G*~w8|e_&Dx%DqCK4^HDQe7ako z0d?#|K;QZ%5X~;L={(j0t0e8~_MF28{^L7M$}+bSpuBHK+fwG6z@-yK|AICL@az*c zd)ME9ZgangON*|eI>lwGZKc=HcDd_m(PTH~7QXP+rH5qO^M4AT)Mx*Bmf(LsBzGq8 ziS7H%Q}`8T3BJzMzAhrHRI>s9kM{irRh(I&CeEo-^JnnsndVgm7UI#4ST`EuZB^*9 zk|medB-F#(v2jJ7&_?wAJ@yC8ugXB}qSjho$y!ioR*l|vACH#noeich$Dpp;&6PH}k~F+F0EYDJ#7tyWq^fWP7L+1m-W`+&wIcxASwA0bnc ze$5$~2YK3OQ|go-!D;D4lliB5U`Ll>)*WhW;O|?>X})S*95~4~B(k%G!D}&|XS}N} zgPe6vUy`)~Wl}H}PTf-v!@abM+G!2w#$C=swpOK(pSg_I^xi|rk{jI58HYz- zCW!@lU&W(+6)witislx6ifyUX@;f>3f~LJ<;>LBbpLb4Dt2zm)55$GNwTp+E13?by zM^1yeQ1iB<54Qtue{1PebOqoz;uSfPh2|6&d5l)<3W@IfhYft?3k_>KXrn>*h|zxk z&Om51$?`qCJqGxwS2R5BO92FZ!R)d6GODs&Wcfqp1oYy^eZ2=hB+M;*#)qqXniRAC zQK|m2Dfq(F-+QcEbH9uD%Jiq1P%3IhYH_^;imheq2if{PNX(DrkxM_>8l(U=kS(Y{l7bXUN$ zBMpQTz;C?m|M=LP7+9&zbt(o~YkcYFHjN`|jhXmA^?Sgs%?-{BB3=;dU{|`d z>JYfrC(#UsdV{o5#7Ccs&+M72I%32f=XHp>cpyBW>|L#P5Y41p>{QHZs8}>Xo+w?AdIR0hZXl` z8$Q`A!~dhW_aDG_0mr@zNww(S9cN> z(2Tt)6=-za)?}PB9^I6(hX1%19^H`96%ZOdxA1qYtmg|JPlB`Sw)pS584WmF-r7$J zXCVuBl!^UzBxomTFqxG4L*d?+LrFLTzy(c|25(*txMX(UG)L|^#WiA)YS)-e4#(FC zbenb@2IiMl_Lgnlu*`{O(#Y)yEUgw*p1}EnNt;wtxor!$BUibP`1OF=62Uw^3T)uN zIZ+xFUwayopEEGICwhamY|aK5g9zx2ttjTtK;Z6YX3z1#vuOI~O}lIoE})sN8eChb zW9AmV==EgQDqPw>4nC>4M_fo_^NCfW?I$9)vMu_#!QXe z8&SheqM9W5@(I@U6JM^P8S!?3>m%@}^x2J1d@oj^v}~4f9Jw(-`~LFn*i{Yi{>1Rs zt?qcBRp-7zvlEZrdfFc@XoE*8Rk~xLwp3Ah?cFo5{}8uLomLoh z$)zw$Q~H8Rjulq}QwYQfJU)ESLleA3qCa=4@`1O}%aJV~mdz z_dbVVgI`;Q(z!?|vTptyoSp-fOK`b`VjF?i;zsScrWVN98|(gY=PS@My9B*4y_mqK zRESi67IYH6P=&d>Tsj9g*GdXAj$MZFNgwm=ap!<@uf$eOrU>+ll<}8xn<%t$r7msH zz}dMSv3L63;g9bMQvM_O|4ox zSB*yP*v8pFz+2!emv#1d9h`b?^Dt)312DQE6Jh(Z4w{;{@0{9ShDt;f%-?T_M|CVe zki92F{oj1Mf9tr=dM6$C?~R1f96#EUl_%jwt$xm^U=S#+mRvO&69}8wKNN_cI{S#(+ys+amHY4_)mgBj4V-o-6Ww)Pa3H{Fsdow|QZYtuF%_4eIVUKp! z&=Vp)v9L#s_}`(|7h}D><_3x$w5N>SYK%v7H+wU-8r6Z&Wbc9HQFZ9{0^gxim*OF$ za;W9zNy6)&`eVtp@aVmQN2Bh0u|a=bqlBzh<|Gt)EF9x>^aqRE%PNkR`v6Bna_4Gy zcaYV}>>HwW0iIm*F#kAt@J`T_VOoa`dZsJ%)C%=%upl)0Qt*CHK&Z+$tPb#nO+I_> zAE=Loi!YTO>~>@Ub1Zvb;)y4)RJKN^*|P^z^ih-alF7RRASi0Vq-u^U$X+elc5YP= zJP)c_wBu?7NcZ?fOZbPP0@Tlpsy>~>2K~{`ZKKU@@jsv^VkcdcGd>IGNi*xrR-`BH zBYrBS@0mjXJMDVXHp!;2QcXRdUd{jg2{s5(9v7~P6T78}d(-?!HQ#&lm9MkvNJpry zV|`tOs&Cl4$wG$EXHS1iManl1y|nSo<%PZ(C`EP9{Pr`1Nbe@Em!5$KwWQK!5g_#4 z4;aMG--iu)&!oFqY0d%Q?9w|roOl#mn|cB+N4SH!l0H9E=YF`RRh_eV(gmE!_KtaP zlLYU5!`Jv+7GMI)WZNOLe6eL9;L2N4>V6pFPMdl!;_`=;hmCLSNDc;?o4Jc$-rA~Y zcvSnDqSZ6-7k;16br4(hue1x<5(u>g+Lx)OC0Qq6rLLz!*I*dNFU6g9Q0U zF+z77TB8~5QjVq{8MvOYunw+#X1@MmT`>eLdx7)ZR0l&Gd(=k#i&37I!x0?vc=WDt z#Ump(Y|vZp`n0-Yg*QlCQhs)Lz#U9qEthPncLh43SZh`>S2%jPL|o6v3~=|@qn_Or z1)ON4uYe{t=yBOWksOYEfLr8K$W?U!_KZ+1G9?Ru3>(JX*)(?WfVFjqR_qL%3W^UZ zVtx*059Zs4+9`u8W8QX-anLuy)cmsj8(@NiRjQ7PsFi!VlK9B4w&!(Hk+oWXwR z`D>AfOIQN2L4PCR^K(xpLS^#Es6|FxJ`poSBV)c77f%aF>$ zHhw%RZW}!R4S5jy@b&#B$rTl7Nb6;rjN=h#=#yKN#|fkKPOf#b2p7Y{jys=rsY~Ng zB~LZFdW_Hu4!5mzJ>(9V+w8Bu>T-ts*U!Y~RqldwBTJt%j99_xO5F?S{9O>P#U>=z z$`6I|^3hLA=`n%zp>kbWjGO?Wi>^-mgR~ob^66jhrFsz@*R>=sIT;Brk2&yh+KM9j zvLhAldC%d>L*pWl9Fz_we%|U3|FIP4Y|SVu`#2l|rQ z9P?M=QOef1HSY-Be82g2|JHG-5_e4bFz5`$le^4srP;yQZS%$`IctzOCaW>;yb0Kx zQXZ@u-vr)W@fCX7T;RR!#mK_(d6>ZZBsoMDmADE%M6Lg{Z<7O*FMMxJZ{`Oqy0^*V zHpD}R9`8ZU%5&grme)zKwhbN~_G9q5fi3zm*V3zJwz$KSN2*soQ(1tD?FQw|IUcZ) zb}@ftu^)U~Cbqia_7U{P)0esCrQX<}=L{Y5ZQFI`-$we$-<$&LSCO805}7Ib>FNFE zX*1^j&NhFR-m^^|hnb=8`#9dOPj~b-H1Xk6c&{48QQm|w# zTTey1BgDV&jZ2dYfJHK4ZCu&9;O}JD;oN@-==%0j_6xNGx1pHOvi;ajnkh)PWQc}r zKhSLzdPMix41&$N^A0E-fI5}@eqU!LRLgF;&dmb{(UU4h8;j!hV}qVDuB%05bNIil z-cQT`{m-iRGeJ*3Ez*-sfuAY*>D7ChX?^tMGp^nfB0Vlu?T@LlzeB$~YExloNCLWj zw0G?~Lce>9_(7i&*YRNR<+W?f1w2ac%(=+0`!*bU@l=of4IZA|U-I-72Obr=9WpS) zj1BtIyh9C$=`L8r_wj<)`E3wgS@iCUsRI0AOgu$ac7LX4y z?UPtVh6${rmO+oZPD{aL*J52(S4Vj2J4nCB${*-EvLhCLAT?P>y%;|4Oh0Pb2In$Q zl(2PUi#}w$VzI?a7wAyUIEyxH1qovTJ_9#bkQ%yOa46s?WT@r6ef#_X>Lfj!BBAGk z4fpC0KK&kFi!k>2;=U(mnbt!T1$ycm5pn7k+1 z43ECo>sZ$rhlk|Lt61w^7J~F=36`<~LZr7p{A`$^09C!c{7S|mJnGTSTW7_D4f?@E ziz}szwn5IQgiD{`MhK=}x~8~%J#E&09{)RQm^aEfVszoeHmPfU~o~4$in5r z2p0SH%0jioLTxofUk=)N0>2mZQr+LU1O-BV!GI9gBPzJ~B-1nnmkN!aAE_kWYlX?z zPo?+W#x}h#SR}6!;^zQ0N-Md@*K7vd!Tr_&E>3{!e6r9oPyt<9>YbHc?urUNJ#cp5 zqCGa~i`E~NGPx1>k4ArF4$za$GWtKTe}~?B6SA{j>H!*5eq-<|0sSXOg?)mdcz7Sl zsLmFj1!r2!_+3-*KqXwy{!FzBO%OUJR=*IB2F16w*)U*({?7R$v8*y1;p5shPV)}x zL8-7r;*r(b;5m7hnS~JP(HfT^mylIpZZ>?`b|oXk$-jHPAiWkG35%Iz5+Efjs>$kG*@C)%qJ>M`@xM|%<=k}fZ(d*V5$ozb;)V#l`2&Wlf&zSL_E z*lZsWQSI3X^x}?T+4t>0b0cR<91lPGY(-M)d_`weiT@c{{Ddtw=qt^3rVMKM{bSJ! zOhZq6oDK9W-_4OfSLn}<*9zZhDncz;DS{_cFhzhdD~>g&b?{jo1u0_jI(V92K~x^VwC;MT5xMP}{; zEfji8Yu8#S-z>xH3RY*srxSxTiceZ$@6)w4pLMWB?=rMWn0m|_Y}+T6h^rXEPEJXa zy*F%OU4{qiI!*+oSZX3|GiryXWjXFU?_h-udW%37(^wOqe_OqujY8gRwdto{YD=0+ zZ?+0~WK&&sGfnURa;YuOb}bIKRPB58S4*^QcsAhC?GG0zaNMteiuI$)7W?qfC=XJG z26*%kQ|1oDvj`ZB>q_nQ(ol}Uq7yZAcr?b;+1pS`kC0Gmsk9vpYe7Vq+TrB zd+61AxO;S`&4EQ$uy=i@6`ExLS`@05l^na#h&`znHB~LJLC-2K)B8r}=s!Br<7PR% zCxTos>*@W?AJB`P3^-?HdLQ+36wnUSz@wJWjmH)`;vq&SefdrPbg;X7<@pfdd8>v{;C3lEf$J!(xU(7* zcT3swSu?{1H$Bg@JUECmWGJ+udyEPC!F#VumfQnSi@xce=YqhMUO6wtcCW7mJlcc! zp3{yM(EadwQ3=ux*@9bIZi!(#_TKe5=y(d7!)hm$J~EsR;F8aq)(BY;MiG)T#SsSQ z?9w(&j1IR#gBDI49$00D4fE2^WZ#1hsR0z_$3Ixku8v1_ z)}%(YS>d67pB%H7RSKMW?eM9?91na=+;k~x@MwOFo8WB%b}PR7^AT!n&|6hYKS@fG zhZ9T9?a59^f)RIS_|a7&AiT&VSE+6#G^*XnJL*dh8>IFycfb9Nw0Pv^oM@@X1bytP z#VbV*BVf5%kHLsyF?g|Q-#-;e3SgsN4jyH?rmw5Oz4c`4iq-9K^TDP8y&!DSzdau! zGa<4a&XgW7eA%lBF$VYcjdPkoz&$sX7#;=GCtTHbMZX0arFE#6CeIWb^rD-V?TL$X z`$whsKO3Ago9X?csqN1M{f~olx_^lDOU9aTX@E!1&dYv#vN!`x4!^nR`F1?)6q4Mk zD1k@Mos-c$+ZqjTcEqczZNfv7+^Wf70(Sa+k+@Y<*r4~ww|f_QO$v-1QrpG4M1XZs z$?of=ykM;ULCPj>8T88xen{EK2D{&XQ1RFJh!n24bVwwt8WZ$r&2`S6Qf0{LX7|=O zS&h)eZardTL;5a{dXXMqc4?TOb!>;z&R+(uJjE8hWpHY`xUVVjBz7MT+o%pg1M-(X zkDCHh!@H*Y+%{nQoNF{vYA2eL!FnTe!6t0bFZJ!zy?&T9LITP7)mq%NE~5 zy_du;(ji3hm&5PX`3Sg|bFEG?FG+?)E8Y&67~nydp=H%!VLX}~xnSfh0sGQOKW}YH zY|uOJGStZ4FAnbMG(0-we89E0@B_|%33#V08M{fz16M4aq_$UnM*5pXgvf1%5$!rH z+?LV@n4mA^7ivgw1-PMUF_2Hmi})*3GsfapbIdr!^h0P|o!|*nfHzxavm-Vzm>jpA2&N-YJcly4Oma9ciIQC^5!AUAw&RR(o zI8QfTvZ&7pdIM}K@F^VV)eHFItF@caGZ|xh$}sk55ntnC>C(x(S;9h4&HAs zfvr`}>1$VILGvlA`c49N=~vl1s^?*Yp6Tr^JH31!*q)cYztM*UvV_+p^C0w4px#{+ zXh#Euyz&=H@MwsMty)}r4q9g>N+T?|tVHx+)$mNZYeqsFwPv2p&Pn9j zwyXDz&BqYaq!W_|s$L<5UW&ms3(_$`f2lHBKdWZl56DT6mF*OzkH7`Qm3EEYrs{wP zTb{Zn+0WfwZ>H}XX-O`OuxIzv1AevGa;Y*CXz$)QuDC`ELRT1;&1W1!+D?)6_zD=I zA6NK{*i~p?gMMvvr+e1p-M^efgM@yj>HTb?r~d`|nWpzYLqDVR{@J^xlV@it$w`kK5YXWvJ{WrnM@&_} ztFK^!zA(Knj4f5?m*^8eklXOx(*sTKY|~}$+ToG#!=ewju|==g9nIp|rVZAs9{Eb1 z*$8X)lxCLdYr?5D-s(J?$C0JS`0g!CS&y#TGPFT}MGYJD^8!R{l!L7Q(dcor)MYmv z{j7|=pN{?~gL72WISduz?x0OOq|X|yBwk8ee(b~nIfCpnDLtLa>(A4Z&M zjg(#(yhij+46xiR?LanY1jh#AE@OiJ5Yxno1LMDr^xc8i?y@A{5x_Glafh>a6zfUv#MDQ_GC6Yi@1`*jftmL2P1V+nv0x|_bf)R zLGREsdD5@J;$L26H(P}~ia99c(USdngUi|KrzdWkG+XfORPTwmxgw?%J*dXHvX z1(T{;)Ow@w!KLDaVYZs~?b}!2;fs&n!YGwfFmfS#-DxBf*4_%Y`qEkrhUfQZ8WXUG zAHI#NBf|#$yEx%tI#nuY4ePnHV?7R*@3P)?>e5H#=~FbFsSO7T=COC}7rsZ{S3NUy z2!D>`s%y5AA3lo-`YJDN50xGIKcLSD=v%${4C(glqh3!RY11?7bU?0?=5l@0eoWDe zR(4#p?^lDaFmC7N0xMzRDU&*@4~nS!K=Fe0S{uM>2eFaET8Pam93S<;$^I@}#pkQ2hA^`{fh9~Z^LL#5F|#f!zz zVsAd)#fOJ0%dSU1xtWDJ5&B(ja?;|>I2cSbD=A$4896+8%}PXN z5INp@T7US+5aO?MR(x$uf_&_0DX$GV$HUHbfl-6#V$Kiq@TSmyQxTzGgbW_y>?{KxY)T`G_~)6 zQ=R}G4ep%`uk{1^*y7JzLcw)kU$+z*HN45t2{o~TW;X}1ML*mo@V0?p84RwJKlpfF z5YPif2bT{>p|l@!>U|Tnz+Un|T6KsT%C~S;=Qa~5Y|uA-WN%Ow+47G@FE9t`J?m_z z_k?KwJ$iKS_;c62wdkJ4`z2?1@#y7tE=zJkWKULLe@K7cALP%jQY+e#2J0wWD0r9Q zp)f~g=okTeG~FILDU8s&`+Fw)u)jzA5;xz~#1A4v?4qB&9=9Xm4Bp0j*m@CNWlo-c z+2_daE_IRo>ov%^fs0*bPrWfgZ+9h>JTzG2NA!u#w2KcketkVuwI`VNK_}d}OnpjP zaR5{F6+=qRrj80A>XOyQC9)i>bVsZ6C?(K6CvSQ0KeQG~*7)9jw^9YQ*-?x3R<6PZ z{U`m#(*nsR{}Mf^ubznY@BR+vY13#EPaeb*01^Cb%zIC|%8m%$bQWjMVVD9>nh|&A zGw0Z@{R4bCXiP`%AA|(g`IbOFRo=_~Wq33rZzE$G4<5q99JFlb<59Pc@%B1OJnY%I zj%yPEd6ZD|&3D&g(5DR?%#0YJ7mmC^JHYuC*&fE&)0+AkaS$jy5*^=+c%GQY{YbhI z@fhdrRxv0??!PGy+c{o@44OpxbPu{?f_^hyeYgpY+Aq_4vnR)5lefJ1`sW-=<6bl# z!4`ee$J44?cFMxLd{!1EQ7+J0m0D(dSsGR;9WdT{a1Hu_+v@{212Y7#*ibGWCx#9B zWwolW?Oz-JPel47a}ept<}lK4?Uyb-uUm_rWb}Cz#fe9?cUCC9AVl=rNrcK)T;v8mvrR>mC_!K=W;c&W}bE_13;q)3a3>xW6#Q zirTTY(LTIMsBR~ct7}cay08OT zcJBESSExe9=sXRU*VZAz_u3@y7?vTzr-~IWaiB^dK6CV)y`95RT| zk_RfwxQ%9yR->n!kpq$P{Meu`wWWBtF-rF@=t(o-lF*YkI8HpAzb3t@IlTx?m#cBpB~jaPIf!eW@Z!kq~b!nM8Y6CDYEo#G#qUGpPBmJY{egbggNcAA~h$MQDu|19Jxjfpt zw0mo?6fZXD)mV43eGu0Fmt*f|YYObyiuBBLkly2FIlccj_WswA{?_@sPXn3p=#I~w z=WAESj z)|4An`Q;HJR_Zi#v$PqxxoMr~r)U((=Z;heVJJqtE3EysC+;HhUEy-bu3HFRQA~Y6 zdkZG$F%Ooae@cE*@k*qZjk~vDWjZu>m|`91;nw3g9jV*EE3jbEtHx+jr2tPM5v@2 zU3~d3xx>x$NKcCLGg-cm^rR?HL{Hr3nMQh2lqW`f94=9nggyd7e0Rr}f*hp?Us|6q zbl7_^K4!(ke)s-W13Qb*4aaV@dJahEZ|h-_iRG+DZRSl1-y=l$97*0K`52*>truDw z!Bvk8%LwBNx0WDjUA9*xneHKaZysx-ORN9A%b`dXYGGmi|EVgmkWhv zA}a#Cb7d>D5Hx&Ma_hWwBqmTBpXi)|n6LP;Cg=k_Tf_WbbBB7G) z@t>rHh%A_==v)6U#@$%1C3NG=^}p@ziW1q>Qof+#)3N9lHuLRWk4QGLh6 zj1Bs;?xzJTJs_p)RD#n}7VNbkR&IcMhS{dY6xOpEk7R_oU7r6*wKDpX)|yp2Z5 zG^@O3z{4Fq?n|4ZO2Hz)afq4<4?X$=4vzOT(f(JP^PLFTi#YtB@5TtdcL4*FuWvq5 zafl6V6}gWzIAk8l(~3t_YO0*ZixQBs_{Q6DDX~aBn~L|HqVvc(>R@jETdJ6#4@(is z;JEYa!8wA(QCTlS+kkByn`QO=R}k`v`8fGyY|(dw)3dzS;ewbaWkG9mKOV@~ zV0*uc^tf4w^b*q|{qID1;zBho(o-}~i}cf?yvE(dcFy*B=rcCPu^ro8!A!7`WecG~ zzu(=E#*_|^N_1ww;cKdZo*nshW(43$>@E+tVgUUw*Q;}sWQOzKM(QW8IXuFpAP+c% zK6$lWM&7BMWP0Thhmb#HOgu$(9+6o8@KEAsIRwjnqgkzWGW^E3)V|C5d7)hTv#-ya z6usqga$W&hZp*F7s=0?gecwpSVeQum`J&=*g&wgz6{|ApT_@0}=N8T^WMORBYy&4X1h7$G-oXYfgpx-NT zXBRDDWS$6l!dc~VNbAm8Y)XLMYR>Cogf;kEDg_5cGm(I-`s5n=1SF=7e(MGG7^E*} z=jkVJqmbP52kIOt&mcTImQ`(eEQ=|44su-gX-1W=z>`8=#`ns$etpjT0&FY{dSKw* zde5aL|2N<%ILbZ!-V1|-nBEPVx&>&1%gdpX$j^v!$RiIHW*$`Be^?>io)$as#B0T$ zdL=mDl92u-c;c6r)Rv$8jY$8q2v0q&E&uU1wnvQcbW;&Ncv^%fMf>0VkYuLX@>J8> z^7Ov}pJoa?P9j+S&$j%a%Gb6$5&ty!UnK8-R7*MwN1j>fJgD9SXRBMDjnLvjR9)ED ziM1RpxaU=nEmVpI)r|ZB-b&l_B`Y-^b=|2`5Jo8GiTjLp>&)SafOu@UR;5Iqi@A=( zv!CzGFONkSjPi~i_liP}IrxoAu8u%h_D#HdiVH@r*WTpqWeP$NwrJ!Pr?!24e$ye*mtkoy+*wO;IzXIsW!zGd(o(SzaTnhc?DOJ#sMQP zi1IKq&D@;`h+j~_Hv>9--LgNe+*3@Q07&48X#ei-Aa0K|`wsEsK{}DlR^@&=_}Qx5 zFP?U*>RGAW|1ltE>Q>d$+VhSI@xhPxN264nOj}kEigWHw&N^oa-EuAQYJTH{zWCcu z&)2OZpuV(zq6U965#-$pl-%aw;nqSzc8yVczN5p9Ph!Cp#Qby4BIRXiNU8XZ4wV~W zh+qiwv15Y%gzmcB!ShMS5Th*D#L|=Ah^h4vrMq!+hkg3KMMn$o-roHUJ?ZhGXP@O# z!`>fW4_znJ#hr|8d;am7g}(dMm?0!*aP?!Aek6|iQa`sZJF2uH=1PP-EiB{AZb|7~ z3}>XC+H_o_#HNTRoxR@oJ;?l{(NDkG#cZJ`-Rxr4(35U{!9@o^AV(PpJlc{Ih@0Gv= zy_0H4s=A5h54ejKEd1!@`6GI}k)V^Ux!9sNu>8Pc!OjE*c8ML0mwJ)r^~?L;-e*PW z*QaLit)&8*59sjPYb-D%&b>d!iX6ox#n+`XI9@m+_R|!fh<^H*`^n#c|9$122x9tZ zyx&dnNkzQP)6#sk8K(H8?z+X(-ZyRrDgM{)x<4X4UgggezYwOU_=fWN7PoPDuyYYS zY|do}XX%*Q9SHCfpU_G9cUPh5cSQ~53BU!NFW>LGT8uujqd2ZbiAVjsPc1MJDg+gs zGfvWToZ_Q5qK-s`UPPLeM`SABg(J^a9NC+3EC@-A_kY{-`3R!Q|AvuG`Ut|v-KBJ3 z^e|$6NS^Av*EWP^H+|NTD4{t`@%f$0kM7L;R=xKO{m9`I)B(aC(f2v-euJ+j$skLQ z|CZuY9j*McpUy4KL6#if)P0Lck5YYdmpZ@bKwC#zQW}|KYv+6x*2A!G0C(VyX z^_zY5KfuQoeCwyj$*WF>-(JAa+^bxS($HSAWhplSmcr z?zW)(LTKtcd zTPqWRlRl;*^2j}SNvqxuqE)E=>b9B{Wku*>(I%r+1mLA(mkv1*;JX*zV*2s`g=-fE zE{M)C{N7`<2kTo;BQ*6IVtpC`$S2DxG?x(RlP-s&VR22SKbsH zx)x3cH%=U1%D!rB?%)%@pbV*1j!ehJ|0gEMoQ?MV_k(l!U0NWa}%dkl$_vvdh%7%Ujl70SDCn+}bzT6fFB^W)rMCoRM;_R&4X z{HjR;|FV5g8mRN%GBR&^7aj>E|5WxelMe}|uD0Wu0#7=@{O)jEoO6WwAJ^3r=fEdk zX}7SL0Dkw^tL>ofbf<-I1%zhJ57i|1hZcb^8{G)#U03QJp5IfAPFCEZxw^X+mD$lw zn?OK*+EY$H^5PxHRX9tTjuHC6y%sk=7zH7ZhEEG+FdsuS7ir3thVDZc?vH+%$F&#v zfUocO*s%w(_ZIg&m}!Mj?_5jX61pA}^g#kbk_-n(Q@)LQB?N@)CTVoR8*aV=y8dr% zd#ek%O>aK-0_A8Ji-Rz>x3|)`vYkxbjs-+FP}qlCj~09hvvJAHr9xGngIfhR6hWPcX+ zW_pP&@nont1)sPOkHhb_?f;xiW2qWWbidZ0m)H`|ErgVxbTdm`lP9U-4SC+?fpr%O9SXi3w)je2F$_FeGee+kVAd5dZ{_W|Rs z+(Fj6-->xljIQ6=j_U*I7mS+Co3Tmp<%3U-;erlL;`zvKZ%LRpw?)0og=yaN$GHCGwe_T~xBsSUJ}KIh zQhQPxpK%rj?2y{^lvB~(Xr=>pz83Td;0bMe*Yn?I(pfHo+pbM$+p}*fN-}HnK$Fia zxh){TU-ss{$k4?~)at5|Fc$&(=DHd!mU(!Tu{KF4wz3Gl?7Y&F@(>F9RZY}6=D2My z!lGqKWABaZ@NehV33W&2CmE*E-L*wxwp+bXBh3A!e4mu8si1y^lf8%swWR{;7=8+ruvq;qJ!7 z0ZSZ=m|1JxRv4VywteB6mxl&Dxc?{8eB2zQ`SWHy&Hpy3hVytWtF?U@c${0ZCaft4 zT8};JdrTO8$7B?|&~igHI^cV6A*XdM>eOlIY)!x&t#u&m%#s2)cX3l4XLSW8Y5teo zVTIHfPejdgn0>9kD-yP6MN)XGC1UV_`+bkvcI1iuZNmUj6C}uZ^s?)kjmS9qv_uC0 zlQbWf78T}JX!I3z(81>ORifRHp&f5Zo<~aON4=K0s@U?8y@qSL36va~*xu%fdvNzI zBN+w|Jm$x@p6*?N-H@oK^kE!&UWdPDcybVt4S93LKy$d@b!On=dBH7npXL+4oN&w7 z=eRbUo~V+{Vqpye_-xsU=Lv+Uo*b2|z)XNHgkRvq|GEr}_vz6t z!FW8r?0geujm!fGJ`uNLRk{b~hPl7$_b>)pxaOt|ai|RkyH`0=z_sXqE zgAQ(&CO&(uGg#nQI8ooUmJB^>-LN%-?J2U8L(VwPzHjcK|K)N}=sNV}I@>=EJl!0C zmzibo#Pt4G;Q2&q4H{Lm&|r7_{8L9g(bD6F^v#57ebpns}jX4QjC6Rln9X^!-GXv)PltrRqsD=BwsERu}6YM)~w6l zXoVzcb!f@n+K60Nx+S1+$pCqKQ)~TvN?l|H$4RHxvFeC99%VnNKdOvLcOpmH{^v5A zzxBVXX0K2=@d_lGNAh6 zt>bgSP(p!VAMYcCW*==z+@WD4A+m3mb=ga7y6l33<}YTUV*a<6*%A@V6g{anFdOKJ zQ64u1Uu~*#&ppEriCD4cm}<-8ezS7_m$rQR33Z&xiM3ZJOZEO3ic4rksq|d@iaw*Q zV9UPvGH{N&PPevH8&&Dp%(RYx{=-2wh6F-IUN<^{hJiRJ$EL!cSfM{Vq+lnTnGL>1 z=@kALO@+PDNV;yGV8LO{%uUq;&{4iOY=7Tt z&@fhUq7cQlpFZMo;Lhjy^T6O(vz0{TtAcCC`TOp>tS$I|?VSlU6@UN7uU$e4Wobbo zOUW*o@7PNyL|RmeNQCSXNw!p!Eu}1xq-=?b%-EGIN!ILJmMj%Xss1zli08LF&r>n| zJLkFg-1BwkUUTovxvzPDw(sYQY?4T9+WZB1D09BQ=5RMcEA;$ysNZ{HpkMS_R9!i1 z(SJ1UKg^=i|Lo+Osj57>8K(WJSx|bsDvx7Q>1igx{28UktMW*l0h2ATCyR7uD1BVI zN;o?PZ&{sO=^4W-uq;Q@;se|dVPlI(rUVS~MJ95E^ceIVks5KXqyA`D;6wdlEXwa@ zFSmq+sN$TR%2Yl%2SD)>k7Gyn?E*t$V%eNP9kjPwHED&agJwP6E%&okzybQI8+RmC z0R1D^p-tH%geX0Eb`jKUfUB*$;hG+|mbnP1~c%DRHTOlGuQN=UGbp};n%&KWzc&U86J}6 zRA{LJ$>uVtE?M(>cV`m@~}V#UW9erxkye-6xhhS@gn|A75p%=>Vy7AH#6 zVwgIpDM`%Q4h2I^YM~hPtZJ!2c^LHPoUaGZ#}xhW<(tQMVUUmJjb@A}GPwo` zQhFA((?#AFcY=gN^4qs8(*ryA-5J}*s03Cx)s)ki05Ghqm8Z=h4J`dh7FK)$z@y*7 zxHp}f5a`9j-e&VQ>rE^DF|+u9kskPx>5xVB`5ZX?LXZL1>jAjJGvG#Z9O>IN;yz-A3E^=`rprlzID>f#~-r=J;h`*4Tc$0ZGQ%OO!nnTuP`&;>8y&o z{*7LaKOVXN(-D?*hJzEg+OcGV(P`Gq`+zud(IPi zV&HIaa^>=uYk-H}!unFHg@izVU+gZq*p=!0d@@kn%jD7vzc0*KJTle*=USC@me3Bu zx%)g7*yLWp9jR5O)rG`D|0b^6&w1hla8<0C$Gf5x5qj+9({w5nkwl-=EPOVCFiN|7 z3wG8bPl70tUk|?|272dMX_1xtX@5JPe*PTz^s=+<)8o+p8=szBFl$c`2645Xze?a< zM|5Y?o(5J7dJ$WBsxrKbj=R!p8tiqQN9R00sRyD5#JJ}j#x4S4k11E-0lptR-0wOs zo{XoNs&uqLQ0|K__I#SaJH$Hc!k85B2$X2sQobH6b)p%ezqrF2>IO@)EQov4DV z>FtF0^krJLUW`f8Ir=br+Q{Rtd*P1fJ&OnW{_4{^AxC+%4-)Is2XVP&UfcT~EHOE) zF|w=)NeN7LR;(C8q#lMn5fOd>-ZvFAG@fb#0`sX;fkO>3&?`l<+*aj8Y7;-XCOJbRQcUQoLeOUX^7g=|@kL4B4!+eQY1ym^^5T8Lc)S~yRgU4(6a zYD!_1o?Al9lGVCrmZ%D902j8`)f==nf;awpO%boeK+^%XW!^i5K>VeeJQiyKaKoVd zec=Qb=(H69;?Wd@D8240*@0spwZ6gKOh-T7Bi;uGCJZj>c5Z{4E$@>Hi4DR+jRJ0_ z&tAdG0*ggGMu=7VqN7gpo8ONDH&LG62>Av?@6$xZHQP&wY6UlKSivxIWUIg3wq;dF zv=?W+^rd!Upg-pkP{ET#@!RF;=g&c&UUtSro7u?IgYaLVL^ae%j&<@+SgvlU%J_L>ye@c~y8Gw!Kf;7&%7}^xa1M?Ctjz z!PYpyCh!^pt3!>~M;5IH$k-Kbc5iMVD_zQSVFNe#FzW7M9yJf7w4LiT;T|Fc`gv<9 z=*}2SPqc|KIcq@n1cSa;U(k5_AT09sPNaT#2ONE}FLukP0hl5yuG(@xvCy-%=W&wv zy#{un8bb$6s}aA2y783K{YcJ|FIT!HFCwK5C)Ag5Rw8yMCBO6uwh{wR1l4JQ3bIR7`6EhOcbK}nWat)*mEEC7zbK{pkK-0?`T&b0rs-^yR$^E1qW>(b=L-PfH=Syf~c$j zDhqvAzk9+6QdC;C_idyE5)Wix%adh^@o-(s0$p^cjEyM+27SXJwm1D4)VsC1 zZ&BcoZ+KuYx1bQ^HI=fJ!pwV%>_f@8;{V{`e%E=?Pz<$x(7O(#$7Y>ATC*BFyR0pu zAI%D6dJM0$d9nkm(yK>{s;B`=#6oJ>rDT9j)?%5EJa$Xa}x}?S`aBtpDXT$abSay zC&$r`Pe9OyDg z@>)O#wR}#YGlD_AiXxu12HU<*BYh7J{l3uaC(nu9MvM25?H0gZuZr4$6*mzBJrK0t zQ0*@ST+f}UIw`;l=zUE!?Wz|73qw{d?lczQl$~4P@_`IE74k1drN}{;m-P6;z9K@P zuQH1kVtl{(8{9>O<;r5?{jf~T*>~cv{tCUg&lXWmH)5f0;lBOyWmXp$wp(`f(b!|e ztWsdHvq&RSay-KO$#5)iPwsa*Qd10`oHk2x^vEX$`ojlv(~IA}`LC4z|69i#(j@p( zRe2uYyR#Yl{IgF_F`1{IEua2tt{%(NlSnDw`s4XzIZOoac>pWYFU?HV(L#eH9etWG zs2_T^#o<$GS1~QEF2_bDpT*W#B_6yr)VpIF_WTLAjf3y~M}$Bhp)aMw=*S0L8JT?w z4Y@#~1X8JB!~i1p3dJzwQi1sw<*Rb!$borS&B8#+caZ!%#Q~a|&k2Fv({M+4cI~uz zf9PR;e=grEII{R9Ll-g#8`YiImSp}CZj3*oTTD3!Z&!}CyL^aP=r0PnfisaEV0^bQ z?P{B1MBPKKk14$gsm^G9ve5brGNEQky_Gr#S!AQc{vx=Xn8ujLHtty^p7!_9Pc^cf zs=NOY@Ml6lbpgSl$HDm7-uL7)`rq?TPO+u@$ys*fGZwX6Jh^>p{E^=GBvRpI|9i~5 z?`-+H+Ks!$wAv@+^HE`{4KrMIIlPGXGR%7{mb2bH3l@D-da?`yTzcKx?%;tk)ZF~O zL|u>bqiSX^nxbwofFelc}9>#^T+C`!kqUf-dti^b#o3Q`7^;YY#(o6Ee5`@xn{! zSFB+D_a^>S#2p8Jw$%P-XWB8(F!5Wk#}e#ACa(uSHPeoB(!~GTxpMNeesrc?iTOs2 zuS$=b_=gacKU(m0D}D>+(YBMP=@g?8%IbV4FnDh(Y}ZS|E&>kQ$(~-sVg3kF<52Hh z%&btWTMiph4yPCFy^UJZ@Mklgs~|+R`WrV?c{?lztp$vmz4q|~j|IF}!ZDNn`ZiIs z^ByGN<5XPs+;|LnDs|Ft-YpVHG-!IdbLS`_YM<87SGlKd=Qo%KoGdMlo_+=Qz74G>dhNzmlbHo-o44ebG8QNcf({B!%=rfzJHi|>_jwE^~0vKSqyJY-yYmnrY!I>gV+ z=v6idK@Q|NjJ{OJL^9aVU)6mcG55=V;pVWRf{Ce_9@Mo@e z^Rp&C`J``;AM?_m?R}4%_?X&%_3d%B=W6?7rJG6U=eZA=ZcV@PkQEiyZdC9=*ON&d zO~#;KvuA#1EC#!capUei*vRtk36ph00qUiGf)|hIpIf^+HAp3&5a>g~s4ayumH;CU zqcO$SCE$5b!1D7iK0!6Xi@06a^g-9kwjX`BxF1>%8Cb4dyFbG8yWNUcu$dEe@X7nuh$6F`MhUZwp-z3q5VTuU@%FBk+&W-n)%97l~*7 zWV-y;9poCk=I$EdFi-;(omBIFj6`sM7Q0}H5Ci?0t0x&p>iYg;FmEdIK3lo^*_>WG zm8<`e_SaMU{JW|=`WY6$>{R7_QVQ0eZhvj;_A-D0OWTcEs9$Cq_Cz(ppJ_*6(5tQ* zm%|Hijt|7zoyNuzw*3c&1Q5?8+2!6HOWx~T@5SsUD$f$HjNXZuf!fL2@s z)%=0?kjRKb>f7|YE^MvC~w^uj#e(#6JggRrd3JsB;C*g5q*ey&pNFY3VyolB>DI}xNH zEw@MCstT!Q?xbwV@I*#0U%vQ?`5tm_l-ICH#+ewuo=hWR#D%}-AC(^e0aN{i+W%SU zXKTTWDW%8veE@sVO)g_iISWdUUks;Qg%v-rd^PQ#l^$QKp2UR*@n8~7hpKz(8@XF?5h=wVl9pLHUg_pb^Y{qFGysplX6w2A zr`2A=czIM>!XO-|pE9M;i- zwH<5JJRS{0zA7i**(NkX@d+)Q=W80F;Y7ENV#ij9{yLM{!i9~5s6G20?*|H@(`~c1 zs23QKi4VZttK(jQzbVq;QVQ+a#Yt=wU${jtE>Ni&s4q;@BR}*239mZrRQ%u;!UI<8 zg{E8pm&84TCs@)!sqclGy1jYCr0yCvy?H?!-0?rZ{ulW5F(T}z^ka*p#hAklVz#-V zDYsy6I}B>G9(#9dyf~+%BwQXFjW;%Yyp&yljxuN0;}QJZ6jpb|a)|NkY2T4zZzoUz zi$fOC>4p^G7<`WBT6{mWX=PgL3W{22(8GNdwO%bG*XK22Z&Cvp>1I^0iIo!qJ^%du zZFgL!n`ta|-6PAVI{*vDmF#w@nuh+_8ve1c)kD9;um2(?$>6L~1s01|e>_cn57{xi zA)bxxCerLCkZ+#i51xPgz}`-li_B~Dd!<^NMvPyd;+-4j8PoQU&`ZgrO22f5+RqB~Ulo2*?I)oZj@>*TZ*q--B z`{o!4Us>F*kF*WLVPBBPd+G*DDr&LpvDpIbeLr}(-*sMmHQo=MEg=Ijj2RtS9ABUd zy8C6Bi@TxwT3Z9v`kp~&wUnL>L_dQ#j7{2@3oD_^$HhJ*mJpM#S1z#OJ)=1dy^(*g z&9LnN9Keez1q=+pvh4mE;i7+roml84;hH7B4wXPM|2ga7rn|^!ZhfqHMjW!%ts*?8 z5<$jKlB~NwoB?h8+a!d8Qiy?GP2Ok^Pif2ll>QeeJ;j3U-TfLR(&ifU5;Vb1=$?vV zj(r%+#uxqW7~reha7y2k#6~Lkg?s$t0#u#%`EDG3Hy5qvzJbIjy;mV`#Tv;^kosNG zM=vPGp$v<<9inJEq!_`J_jrl9~LNhBtEtR1T>Ds?s|C#*`FvX^XyzK z;={?>ETZTOA~w^row%2Us3d(>uE|O!2KrA%izK$+YW!^@_;hm+!RMUi2>$eJyMK+~ z7liOR+_Wr$?{5HzJG%qgy;62q4}-otaOq||N3RvWwon5bZl+9f+&b$;? zOo-K)Ry*xPDk0DZEa7^8IA-UABYFbs94xP4WObva4CI;ydLB;qszY^e4Ce zTJyf_$~M)^vBzLvYMNFybu#i;U~Q&KO%$@XuG%BzTm~|JWWs^_tp|8kAZNm}{U$L= zpLoKYTvM<1xA*DsTjBTaY{owS&Zj4zb)Vi^fkZm=ZPi*kM`xS6osVHMhR6Oddyb;F zK3o*XqV;fZ-mNuwDsSV>T|9WtJk|T^ML6^p#^o7>sZlVeb@(o#X4|nRyuZ7PXBa9S zvIaVeeURru0cq{p=a9aYeS+@Q9O!tA&TvX}4z%Jtc`pMw3R&niUeQ^fMhNt#cP$iU z`Zd2nT|%;n;q4xP9bU+C`{@kA4n!w4gVqk z>>Bm6ovWuOo6gna<~`X@u2C;LxsRz_J$~Hy_Z9i6T)n*fw_JVBStSxlLh*hTUX%X^ z_zU)!3F7Iu2{NwTlAP8kGljVWjDg?2lkx$;wtslLb`=IZ98OiNqMwlu`v_B5^EEONp_o+y{F6}Ckg31{ucBdm}Ka!z3Z*1mH_37Dr5M*3wnBx%6;+2R_Mg`%=l9BQfPv+o#r~ofOHxC+TT@YK<4Y% ze4C`wAQth0C-3&$Cq(VJINroj)=i=W`o`$4Ug=g&h2~CjRx$ zUrzU!=7X~kcTtka4J0IF?NYr>mk`sT9W+@jDab($A%4j$7a-bzDn>Fz6Jz2JtQI?H zD^&SEzy25a^;t!wT0@6|QMKDNTVC!pMzsX*cZy>$TgvNrU?aM{qOu)tmRYfDtpN_b z-m{f{ACDEF7aADX5(RyDg8wr$Qaf~uf#bAy&J<%7J2>B;{$t3DDs)k!s?n{62hsz)@W~+BUn`29D89nk{IZ- zE|kjeqpJ8Vm40S0Z?^n;q1pE9{{{Mg_3OPfTz6W&ehg=cCkz9<1LzTd=Haz?fllm> zo7jk%`+k+2*cegADu+X#YQEKd-NH!t8L2E4pMdj&hx=XU<*53G^g*{KNVSP`hqhS_ zR6u#|YH&jyR2tNGg6(!9w0B=G)Mk(baj2zyt~9*~<%k=|D{@5;(=X5QYL@=WY3NyK z5^SGu9E2kkv`>b|48n(fUMXeSO+$ZPiCfU|2(f;BqIuLik5UxOPYO>E_;L+#b9#EL zAte|QOp|k68FCBBNEti3%h>^J;MX}8%oIXQ1fMxiBa)e>^tXiGmSXnJd(y0%_s5j} z$-F-qkBH*wxAxZS6-(U>(Q*ztdqE8PtID60uo1lxRdpOs(+N^SD{$z^A2XzOc%-7~ zksV*>K`(vQdMjr~6;y6N}w_BGi-Lz>M`l^c4Z$}IU;ngI&T^}D{qd{_%+3{bi^xaV` zeAJH~f{na78xlpMk%@IS7klpoBI-28%O%p{kypU>oa|#85G^;)la?ZE?vS74Hf0B*!R=?KTt~p z1QY-O00;oRV}nhoQxZGK9RL7EGywo10000)VRT_kZ((F*a!qe!Wo%qbZ((F*a$HDd zVPs`;E_8Twon2jXTQ`z@)>iF*2`OokdCb`BDOV-2ckJvwrf7?fSu0Tub^D;vXf(bXjh{B3wvRV|KRmyDd;4(nkL~B}{_~$VKR*1i*?--IsTtKffFfXYls;+neq7;rRaRVRPKBZvL^^|KD!^ z`;o730n_s8AoaeMRXd4JmOPw!toJ?@TQUvEzRr=NHGm-ELTf8V~{55JE$uXo4K z&wp$W+lS_Jo3J7Myg7aTdZvHbJ(NNMjeLhR1-`Gr_wOO2?`~mbz%(nA{c5$o`R|wQ z;m@D8e{3ITp}t?=yt-88X;A0zd(`9j-^blkrJczN#+teCJm-1&_m}6>n{$Oe{PyGi z^VjFY`SXQbDtaQl-G4Y8wwtF3^7{Gn%hNfNH;3&%Uy1>q6>Gn(E{5=Gvwzq&`!~biU!D(77Y-LG2SWSr1@Ys<@nRky|NOR{iZB%D zr|tgt)7RMoG3 zhR4!)UmcSh(f)UCa6{6--UdFQ8Tq_`RA<=x?O{;CrGp*x(0(i0oK#pD=H<)Ot9|&k z*$+(HPZxW6{?=07?%y3A&RN(rHN{Q(;oO~`yv*kBI2q2?kQ*lFQFr(B@>B>PcK;p= zezB$UX-w9Ab%0A1^n@>8j@whu_IMG>eEqL~Z9bod$zEHAiqMs=rnuU9{d}H*^{K{> ztODJ1EzM0n41V*3(_vDQMFf|1`N*WRbeGM?!^_r9UlEvQV0ufBHAIDhn(BzKn32YO zOvBE0$KEn!>>nX%<&oAT*+{DgvZZ;QlenMHGm_Kku={kLM=xDv_o;M?=Ye7Sd+B`_ zRQIW%W+e?n;s~`vYftxw=9yeuYsA{a;6dADF?>H4ZSECavgtd0CT;JGF`V7o^Rn=~ zc0HZfu|ulK3M!2wOkFDovlcM70_IM@+zS|`>?;AY7BII0=1#!e`s5gTH zb!Tv({tOP(p}~QAbjqvg1=OcgUJVE8)hVxryLY-8)-F|Ssb>{i>RQE?`c|=}&Q)xw zcU=#<&Px63deE`O4s<=}*uWm#cAvoqcH#B|8GK+LZoiPh2X^B26FL3l@PW;^{YOqe zIsCn|7j)UUE=%zh-4JSE)dIBo4(3}6g3P))5Sd?bb{GiDz1GUT?zbQoinkHbP}z<` z4GcF7I#!!)N=>ZgnsC`+pj#8u(Mebr6VOxRNQ3pU(8C&;p<12Pa9b;r%&cA} z4nfV-u&8bbO9n-!Fn5r0Uu`x{o4(ZsVzb@cZ8uId1VOFVgo2xj13@R7EeoP zfx^0lmec};l?yGg1&VXnrQ|YLoXsy9kip`7e$j*sPR!IKjrcZ^N|2b$O4{*lC6%CP zi49K9K}ScI$GP~h!obmJ@PpHzt=j#`YS!p~UpwEYQPexVzk{rb+(gXH=stNZLh`nTY=EZsqibcE(rFVBF(E-+}2z)k(dt38(ES@e49v42a^dBn|G3ie4FVvPin^-chlqo z5L{ZK!Fq<-8Nxc6LU>c_-Z^g_AXaBY;3^mZ3J1lPP(eA;_^d*ZCspIVN6A<#Q({aF z^nr5kAY2nb3WrBO#Wee^(`PV^1V-3mME}3pUah-NJ4a~zeK5(NVE9uMIwzr%8)Z`C zdi~frQb?iVVGh{yhKR`o+AY(h{%atjm$D_|Ij^6(qJ-0B%t4W%Xatpgri4`#=pbM; zO{K9Mp+36VIOnA3#ZKARY|6d{Q}(qpJ-6T16>~UU?+73T83UG-6oAKpg#n|N^kn@( zh`}g=<###7p3X6*E=%iKh3coJ*UJY#J?HP`Z?_9#4=*f@%T3DE)lIh%5_y#oEm=-N zc2Fi!X$T7?6xE0MN~bddNa9rYGh}HdfbJB~y$|x1U?LEd(pz$gKu}t5i6sI{sT0fb z1la2zXduuAbJYBKG+dy+nw%njBH^efmKX{W`Sm5a`)&$< zalighZGJlb8%OLW**Vj7kRUN~tqZWVhg}C{j&=>S^S2|ki`!k7&GG%?%VG0)*D(oqwl3)B&A$|06^yd0ceNDR3XaS7@_5?4-5*cI73bsg zY4cb#ws}0e3mga%fWXzj7_#m${mxoPUOHdnRzy~)S7QS=NYgW@*uW{W!3MsO4OVcG zRCEFHg2SYu6NnewCl%d5yy#46bd*gax>g!pWz&d`mPTjUGzquMjP9tNU6U^1iCXI4;|~ZWln2WIith<8o2~3)wA~KF4dCj-HG7oq#yy;O+rd%#t#g!aw_fV=obI@H$+bJ<1q9Mg7M8(LdH}$! zWyyh`wR3vmPtM{AvDS(q22Bxb$IT`L@dgPs1iLe5WXM#3l*yxqn#iMJyF#YIy zPCDO-(*y@}(>m^?D5@Y#FXUf)#v%eQH%N~iYu!+PP=-hkz^hikxa{hMz-aN11eORR ztzRLG+}zK%eK^3fBiMbqwc!8@s{2%LMfa{H-goPbIN{BTW8PHqzLSN>%#Zu%a5`Rv z%ia#T>k!VA~4iQh@rX1K;XOt8kob#L;DG1>x; zt3a~{$W?}F3rwya%|@UAbdhF?jxd2YxhX8d1is{B!wH)MtX`e+YB+Ic0Qxzz_NijUivV+d zt61?Gz+CUT9(0|F`y+7j$J)292OXQ(8&Pb+ZT}g3;wpqr? zA^}30YXlQ8yhJbny({Jf`t@^K1eE@W=lkx=YyZz&z%^RX`kM-yCY(KqqXfqb*8}SVZE$sL}QC(S?q#NG5)?M}vvRc+oDu zF=(_;t3x9?g=*p>26UppDf%BK#dbh=_<<80pc_5W!4(!7nPgV^=LEcD`RhoEoE+;j z;DsYSMtT*54D<`)g%f0~XC66RUtfZ_5Dmh;J3SHI#iw(n+Afm3{Vn&IQ-GL{vAZrT zj${`x^txCv)Eh_;Ewoi+5HP)2V5-OczF`HN{KDnJElz7wI=IuN)!8KHpupdz^pqd5SZ0mWjLp3wuUI%tA%n(%T z%{&DV6+@zY$V{h);(2k8G&45>pjo0>Ta2S@&E%5+6b*;`f_XC4v?Oe(aW-2u_o+ zR+(KdVPABtI>Qch0rBcAJJ1Qlt26CDHxQqhZ8MJr#cuK;neu5ev+i1F`H)Pxxu&Q) zYG>DlWV$Y0b_Oo$lG>r?6S0m@0(sa#bN+%s$^o8y`XqV*0Wf*z;$eJEvy@;!)rx~ zF33bDhZn9_DS9CjnjBudeq(f$pHS!1h}V;hzVZ|5e44~{l#K4Eon1GfzAjzjI!{KI z)DB%Ive18soMaZF>pBEZGBa*I>gk@g2QdxGDpEb&({>*wHyQohOW%2*I)JW28Fc1! zPv3D^bdova%I!8Ry2+);p6=!L85TX|Qe1Pom)~KKbVlqdpX!>^z5LF?qC*?E(!S(g zWO7~FYG0BeB+o+ZNs9XoH4=ELR;uiIX)qB}G>X%RV$y4AEVW z1fH_~fa?)k7)=P`dbIXkkJ^w+9f7$XnJ`PuMgv|)+4MdFN5ScKGz9-9iBk|@Cdd_g zHD&nH5HC@=fg6_2QwgM^2$9i6kVCv4wX7qT9~G&rvL`nc6{)PSCwCNWy-CXtU3fBR zb!AO;0bO-6=**gW(M#4PESn;;nqG92bqUL+$gZO$eGz-Q#wF~URN3{jq(5SB?yS!` zOV~jjx{44!WRBo@#!y~p9z$GgLVgMP%*>4dR975LcN$xFHW+|1&NUL>j6Pk5lz6{7 znEiPE3)HtolG96=OL+8GxGGOsXh(k$w>bcl z6y;bxXp1>AFpu$(I%q3!tK++<63VT-@U@`$DWJOmao$@A5a)q3K-``Y!DB%Iphyus z78C%A3?v8$(T7EOImk-`>o^p?)0eHDRqbF%$v~rmwRfEvwg6XDg>2cg`iTd z5G2WTr-1H#P`$XPCA2_LN(%%fwLnl>3j`&$z*1^qYX~__mTtP_*PNy_}(a{2s2_qJe#k)hfMn6A$Y%H+SS7Wd;M{xO(Jt zGUASor<{(*+c|ssz6T}a!FJA@jBl{~Jti1Tp&oh+A4-uvJZZvbIYJ1Ti#5Iz3X0fB z;~Sx%u)TC*2{C^K5VoLBCQ`S7zInkaGU&5y!_JQ@` zM`(X#*GoYu7GDl_6qI1`gIGe$VgV#r{2-HJbH2WiBZG=mE#B1~7E=s?ECNM~A7li# z1?p$`8iJXXbX2~(Dzvc0o0iX3T5)rMoa5Px;VxaCxYc0pZbr{3gn&Jfm`(4t%&%(L zXXU!o;3h}wx@XkS56#SrZ{48?r`yModj59-e@?HLk{&&p!PIgnxTsIB~S*}oWI!SOY0xKlGFsfsNcNwAX7QEL8^q}i3IKhI|7wbXC2FF^R z9>N15#U>zLc-K*}4Tu-sc~oo!;)QoT6D_~LB(!}esXy6oY2@%{z7^_jrdNa zv8Vin^n4oeJ<)Ler1VF5z2(!0XRpTA@_LgGB{Mch`>*RR@?V!O@$O{C25J9xoj$Lt zhr~H|#wKb1cbz`3OIPeu4ge(V7B4rU6S0C4FVh&#p0hT8jA`=(UcZy07%WTr=pjI8bryL__x(km!jP;Ikk-_;=~{mrYSZ5oOz7Hbog%vh6RM zqP)I!P5ZkhRiHE2{;o+C=nl5OYf=>*+IiG{G|`$KKBITG!s*)d;sk}*d&~WOq5JS! zSA;n0A-vWVA@0tI;Nn*RC~^iDzXCv!k^+j5%FQqZ<*C)Xgr%T7^;(ppVqbXS{+IxA z)a^?if;_c1H=HP$*za(+iIR!s7BV*5Niuc^jZv|+PGVtdV5dlF=3ZC`3Ct5Iu!C^J z1d4QojV`f82yTp%r0$~l9$jh!V?m3NF15iCq2r}peKUCi+?~u1IFe3k2yUZhOkODo=Y$HQ~Jp5lGCP8$>dd%)lSUfPb&-~Udg zWpn=gjufYpE`64Z(Q$`9i%0KJQ8Mn$XW0n3h~_w48nf^&F(Ddo;xr~=^TV>#regEM zwAy5PS}=}5)A3}Bub0J`U<^a1 zXR-`oWcM0bvJAtRu@e(URFfry3K@(g%P_XcQnrvVq)-WE$vRr>WRG}@vTsR3$jH9? z#{athxy<+VzVG+VT<4l|uItS2x$ob(pWl5y^E`9T>Yp?Ys>9A8EiJiR8s=XHG1y64juxwaxA3g*RbINq%d2YqB|m(V>)6@L zLRi?ZTaB-FVup1vcCqdAigCp?9nmbdOa(^^+o*CUUE82%oHY$ic^RGcoDGTQvK*#a zy|vaQT)Ccg)_DvVz}{J71dx3lotme?GbS*Q1clwO)GW6^WEZhn6!CbHY-f%zn>x^m zuOnVm$|=|52t66~3_(Q&u>CQXVUE6^0yy_dPPNbGyTPnz&K-uSO4E|ey)la^Nz-M6 z@b;s>#x9LNK(gy$ducY}Zl?&J3|IvEV}^n!mJ=Jf zx9WGJ%9hV+&o7uAkxRQXdTY0Qu+Y%1?70>}J*UOYNRzOW!x-+Hq537fRSEzkp}iBx zKW;t|GpvbYT6GDLTp-l%G8Z#i2qHS~04KIotFO6ZglD1is*LBHIGg!(H6;o&%ZI|a zlz}qZxMLbx`8*;Z&1?A!7fC02Ouv}jiZi(qeucAK!v@ZC3{uKiM*h0F6xWnj(bIN} zW#*^K`TO;fjVhdQF=YDJOzmNe-NJkt5VzJ|!^+IPqS#&oV&+~}_C`D_7iN>1_MMRh z!m~)2o?FCodyRRtm?UJR2?&}BEBJzV_*{Q4r(>cvk=GH-QeUojV&(@5D4F^)n$9VT4Bp)n_6Y733h@GIu-Joi)TsQPZLX-x*E=(o>A)& zuW1+7I-O_W|DZW!w_ic(l!Z06Gex{612(258TuaiU42@#%KAfg>;4{Of3JM~G<=Vj&O^jYzLp~bV)1t^6~yjwM^4aB zrJ&?yOK|BI6FJc?;w+yYBVS3!d{VMyvj19V=l1cTzt>t4$LE+tYE+3}a`BoywkB%4 zBlp2tG}PAP_&5(zx0L79T||b(QBTo8`0(loMKM(f#kSBdfw zR(G~=l=-qwd%w6bNmgYvZ9U{2pe0TM5!Ji-@H|5lojdoOKCJR*{nyagX`&MNWh*Vj zI%lP5$NmF-w-7$`;~eH*vnW_!Wh)8=;knnwGFFZ5MaT(6)^(7yCz!qzXIzRV z4IhC|Udc{j@Z{d$I(mP@s%~#XU=MV78h8mk=zoNoib@wiMJ4$AY2g2x6NWis{jv<} z^&2!;1G)5A+20>m!3*jLHnFpz2s~vH2xfOn$FsnuR3mn^4c^_5;3!~53zJVLzP(KPfVD2s2$61S zjUVU7UDt}yos_~a#&oez_nNCV$C{Vm8y{t?J$|1VPM6(nZ&L8w@3eqxFD{O?IiA1l zZEav4TZ>8x){29Dk=AzOvs}JFWWQ_U8C6)tygB){4ictz2-UEH<1g(eZa;#qQy(U< zeTwFF4w<-eGKo_Efj|>H#?8|nhj+aShr=zjvGy2m7cV`GJIu+>$8&WB6H+OP7a+*;t2sY@`**5fc$1iY7^@*`nD@l3VVVZ%4MSo^ZHVySH%yUo+d5 zq~cjLm(uNy?Ya*&2;)gGz8Rg8@R8#qPhtZ{KhMoCG<203E`ZF80BS)lYMcA<-I*4# z@!io0Vt>)WbWU$7aU=(w^`aZpmK zmgD0-acsikBPV0!Z@yI(e!ko{U{=9CJ0Ge++y05eqa z_@u{MIa~F*uqwG1>MdS3vOA)`2~VDSM}L@_#yQQ>6fz|eWNK9Yh8lzgISAabcnedU zEyl*%1%vmqFu~$oJ@?mI@P9bT0zj~aen}-vvJfDpcWX!?M>^es&$6!E;)0Z`$DUN$ z*lnvAuH6be`EKa~a;W5udQfqpS6>G7L4(TEw40X+)FAuLryr)(BVy`pJ9yNyW7<$3 z9O6W6f11nnDjL$J>O6HtDaN3b3s9;ZD785h5_}WjP95@v=HVROP@$XQ@sV`3N=-kn z(8Xz;Et!#YST^6iZ3=(;9;uG5k^Ph+i^u=s@2}vd0Qy6J~utB`ufJC)(!OI(CFx0Xe%e%(9U4(kD*$*+Vevz0lK`GR$scR zJuxk?Z%T;=zj>V_sb|A-v25J@$nx_ulNsj44{(pRbr2eGq;u(&- zO!g6|u)~>ojS|?!Z`(F)_7>2KPqyz>N5&*0MD?#{>^t5U^%rjoznQ|!rDp)HopfYaKgx^bPhb$O%;bS+wqf}CQyD<(uTDa3!E zI^?c}v%|XVr>n>MXqFHSroiiLAnO>%7^qppyOfSFR?9eG6ah{n(!y7=99jE^moxFP z&*orXoN1?6bPo-6)5Kju6F8$#51<$=wwva!`eQ~F7qqt4SUcrFT3Gw~zV1z?hh_xb zap}T3X3t5EKpNc%cq0e>#Kfx!f0KS;yTG3cn7PgjkvAt~#Ghu8BKhX2KWCP1?hHq8hLifFwIyN$y{~{~;oNxmg&m zrw`C%4SzD-a2;@eUO`)4$F?C_D59TT?3{TCZ$t|#eZ+z-JYuU{9;O~4zBrp*PwcvD zX>7T2&C0B1z8~Utn%Vec+M}uug^Q}W;02GmKI&lFNAiggPN&x%F15MaJoQRT75*{g zB)Ibed2V){cl<&mE?8jaf%nbZgxwj~=eMBO`Cl!o)~v3j%N0oHKi83!B#KMRcje12 zS}5c__lM(0iw%%(o%3dOEQvjGCCvHy0b`#dKdG$nmOnkZ3VAdmE83IHfLhgcul=De z{4f`QixOxo@yEQ6GSzhO&R^0Uk}hBHf9QBgBHMG)SuQ{$hf$%iw#)q-DV8J`dL{j- z&J^R0(map1|KtMqf>aD*3oy4ll}5OEW5R9pao^x7&LQwEQ`LBI(iVuw~$w+@R7r$2S5fRN62KGkcsEn|HNaQ+??DT&bry-EKoRCSB#t8zDD1T*F!gIuttzV z_An$HM`sz>ME7Ne2@~$0M8g(4dGzX?Hf^)4y%9UUg&FeNAm34sF_&8m;lgk$coIF2 z`LXe10l4idBK75GMwoHc+o$x1Sn}Uyviu29L{1z>i}~vpgYUdyVAks*;0X_?QrW|m z87r@3O(lme0IDLfvKdr3^Ee2=EzPpIp-;-WpM7W9s5k(;A%xIu6k88rzXQh)SWvN~ zz9)|hCZ4u7CA zlmL_~D-SS49sj{POG-G(3jF|1D&n_rzm$Ozjj~)lKy$tQ?`VHjyiy`l)`bViR*`>) z{9AJ=E2;xfzo@?k{mc4_(ss&1=Aap;=>NW%-+Dut;SZWAyYmm5`7JhOf;_^c z|4)~cyig{f1JJg+{|@?F+bAXV00tTNdx51ihEjzNP%gw%pipR%A)1zM|7>RRTY`p) LYL$FG71h50npy}B literal 0 HcmV?d00001 From c210916d9cd0c371478b69520f5b8751b3122a6a Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Thu, 13 Dec 2018 18:14:02 -0800 Subject: [PATCH 09/16] ...updating... --- .../DataView/ArrayDataViewBuilder.cs | 14 + .../Transforms/ValueMappingTransform.cs | 458 +++++++++++++++--- .../TermLookupTransformer.cs | 3 + .../DataPipe/TestDataPipe.cs | 14 +- .../Transformers/ValueMappingTests.cs | 33 +- 5 files changed, 433 insertions(+), 89 deletions(-) diff --git a/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs b/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs index c11d49923c..d5139449b9 100644 --- a/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs +++ b/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs @@ -85,6 +85,20 @@ public void AddColumn(string name, ValueGetter>> ge _names.Add(name); } + /// + /// Constructs a new key column from an array where values are copied to output simply + /// by being assigned. + /// + public void AddColumn(string name, ValueGetter>> getKeyValues, ulong keyMin, int keyCount, params ulong[] values) + { + _host.CheckValue(getKeyValues, nameof(getKeyValues)); + _host.CheckParam(keyCount > 0, nameof(keyCount)); + CheckLength(name, values); + _columns.Add(new AssignmentColumn(new KeyType(DataKind.U8, keyMin, keyCount), values)); + _getKeyValues.Add(name, getKeyValues); + _names.Add(name); + } + /// /// Creates a column with slot names from arrays. The added column will be re-interpreted as a buffer. /// diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs index 452792018d..9b424b499a 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs @@ -28,6 +28,9 @@ [assembly: LoadableClass(ValueMappingTransform.Summary, typeof(ValueMappingTransform), null, typeof(SignatureLoadModel), "Value Mapping Transform", ValueMappingTransform.LoaderSignature)] +[assembly: LoadableClass(typeof(IRowMapper), typeof(ValueMappingTransform), null, typeof(SignatureLoadRowMapper), + ValueMappingTransform.UserName, ValueMappingTransform.LoaderSignature)] + [assembly: LoadableClass("", typeof(IDataTransform), typeof(ValueMappingTransform), null, typeof(SignatureLoadDataTransform), "", ValueMappingTransform.TermLookupLoaderSignature)] @@ -114,41 +117,27 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) } } - /// - /// The ValueMappingTransform is a 1-1 mapping from a key to value. The key type and value type are specified - /// through TKeyType and TValueType. Arrays are supported for vector types which can be used as either a key or a value - /// or both. The mapping is specified, not trained by providiing a list of keys and a list of values. - /// - /// Specifies the key type - /// Specifies the value type - public sealed class ValueMappingTransform : ValueMappingTransform + internal class DataViewHelper { - /// - /// Constructs a ValueMappingTransform with a key type to value type - /// - /// Instance of the host environment - /// The list of keys that are TKeyType - /// The list of values that are TValueType - /// Specifies to treat the values as a - /// The specified columns to apply - public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyTypes, (string Input, string Output)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), - ConvertToDataView(env, keys, values, treatValuesAsKeyTypes), KeyColumnName, ValueColumnName, columns) - { } + public static PrimitiveType GetPrimitiveType(Type rawType, out bool isVectorType) + { + Type type = rawType; + isVectorType = false; + if (type.IsArray) + { + type = rawType.GetElementType(); + isVectorType = true; + } - /// - /// Constructs a ValueMappingTransform with a key type to value array type - /// - /// Instance of the host environment - /// The list of keys that are TKeyType - /// The list of values that are TValueType[] - /// The specified columns to apply - public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), - ConvertToDataView(env, keys, values), KeyColumnName, ValueColumnName, columns) - { } + if (!type.TryGetDataKind(out DataKind kind)) + { + throw new InvalidOperationException($"Unsupported type {type} used in mapping."); + } + + return PrimitiveType.FromKind(kind); + } - private static ValueGetter>> GetKeyValueGetter(TValueType[] values) + private static ValueGetter>> GetKeyValueGetter(TValue[] values) { return (ref VBuffer> dst) => @@ -160,19 +149,36 @@ private static ValueGetter>> GetKeyValueGetter(TVal }; } - private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyValue) + public static IDataView CreateDataView(IHostEnvironment env, + IEnumerable keys, + IEnumerable values, + string keyColumnName, + string valueColumnName) { // Build DataView from the mapping - var keyType = ValueMappingTransform.GetPrimitiveType(typeof(TKeyType), out bool isKeyVectorType); - var valueType = ValueMappingTransform.GetPrimitiveType(typeof(TValueType), out bool isValueVectorType); + var keyType = GetPrimitiveType(typeof(TKey), out bool isKeyVectorType); + var valueType = GetPrimitiveType(typeof(TValue), out bool isValueVectorType); + var dataViewBuilder = new ArrayDataViewBuilder(env); + dataViewBuilder.AddColumn(keyColumnName, keyType, keys.ToArray()); + dataViewBuilder.AddColumn(valueColumnName, valueType, values.ToArray()); + return dataViewBuilder.GetDataView(); + } - // If treatValuesAsKeyValues can only be used with non-vector types - env.Check(!(treatValuesAsKeyValue && valueType.IsVector), "Treating values as key value types can only be used on non-vector types."); + public static IDataView CreateDataView(IHostEnvironment env, + IEnumerable keys, + IEnumerable values, + string keyColumnName, + string valueColumnName, + bool treatValuesAsKeyTypes) + { + // Build DataView from the mapping + var keyType = GetPrimitiveType(typeof(TKey), out bool isKeyVectorType); + var valueType = GetPrimitiveType(typeof(TValue), out bool isValueVectorType); var dataViewBuilder = new ArrayDataViewBuilder(env); - dataViewBuilder.AddColumn(ValueMappingTransform.KeyColumnName, keyType, keys.ToArray()); - var valuesArray = values.ToArray(); - if (treatValuesAsKeyValue) + dataViewBuilder.AddColumn(keyColumnName, keyType, keys.ToArray()); + //var valuesArray = values.ToArray(); + if (treatValuesAsKeyTypes) { // If the values are key values, there are two different ways in which they are handled: // 1) If the values are of type uint, then it is assumed that these values are the @@ -180,21 +186,32 @@ private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable indices = values.Select((x) => Convert.ToUInt32(x)); + uint[] indices = values.Select((x) => Convert.ToUInt32(x) - 1).ToArray(); + var min = indices.Min(); + var max = indices.Max(); + int count = (int)(max - min + 1); + dataViewBuilder.AddColumn(valueColumnName, GetKeyValueGetter(indices), min, count, indices); + } + else if (valueType.RawKind == DataKind.U8) + { + ulong[] indices = values.Select((x) => Convert.ToUInt64(x) - 1).ToArray(); var min = indices.Min(); - dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, GetKeyValueGetter(valuesArray), min, indices.Count(), indices.ToArray()); + var max = indices.Max(); + int count = (int)(max - min + 1); + dataViewBuilder.AddColumn(valueColumnName, GetKeyValueGetter(indices), min, count, indices); } else { // When generating the indices, treat each value as being unique, i.e. two values that are the same will // be assigned the same index. The dictionary is used to maintain uniqueness, indices will contain // the full list of indices (equal to the same length of values). - Dictionary keyTypeValueMapping = new Dictionary(); + Dictionary keyTypeValueMapping = new Dictionary(); uint[] indices = new uint[values.Count()]; - uint index = 0; + // Start the index at 1 since key types start at 1, 0 is invalid + uint index = 1; for(int i = 0; i < values.Count(); ++i) { - TValueType value = values.ElementAt(i); + TValue value = values.ElementAt(i); if(!keyTypeValueMapping.ContainsKey(value)) { keyTypeValueMapping.Add(value, index); @@ -205,27 +222,63 @@ private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable + /// The ValueMappingTransform is a 1-1 mapping from a key to value. The key type and value type are specified + /// through TKeyType and TValueType. Arrays are supported for vector types which can be used as either a key or a value + /// or both. The mapping is specified, not trained by providiing a list of keys and a list of values. + /// + /// Specifies the key type + /// Specifies the value type + public sealed class ValueMappingTransform : ValueMappingTransform + { + /// + /// Constructs a ValueMappingTransform with a key type to value type + /// + /// Instance of the host environment + /// The list of keys that are TKeyType + /// The list of values that are TValueType + /// Specifies to treat the values as a + /// The specified columns to apply + public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyTypes, (string Input, string Output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), + ConvertToDataView(env, keys, values, treatValuesAsKeyTypes), KeyColumnName, ValueColumnName, columns) + { } + + /// + /// Constructs a ValueMappingTransform with a key type to value array type + /// + /// Instance of the host environment + /// The list of keys that are TKeyType + /// The list of values that are TValueType[] + /// The specified columns to apply + public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), + ConvertToDataView(env, keys, values), KeyColumnName, ValueColumnName, columns) + { } + + private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyValue) + => DataViewHelper.CreateDataView(env, + keys, + values, + ValueMappingTransform.KeyColumnName, + ValueMappingTransform.ValueColumnName, + treatValuesAsKeyValue); + + // Handler for vector value types private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values) - { - // Build DataView from the mapping - var keyType = ValueMappingTransform.GetPrimitiveType(typeof(TKeyType), out bool isKeyVectorType); - var valueType = ValueMappingTransform.GetPrimitiveType(typeof(TValueType), out bool isValueVectorType); - var dataViewBuilder = new ArrayDataViewBuilder(env); - dataViewBuilder.AddColumn(ValueMappingTransform.KeyColumnName, keyType, keys.ToArray()); - dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName, valueType, values.ToArray()); - return dataViewBuilder.GetDataView(); - } + => DataViewHelper.CreateDataView(env, keys, values, ValueMappingTransform.KeyColumnName, ValueMappingTransform.ValueColumnName); } public class ValueMappingTransform : OneToOneTransformerBase @@ -242,6 +295,7 @@ public class ValueMappingTransform : OneToOneTransformerBase protected static string KeyColumnName = "Key"; protected static string ValueColumnName = "Value"; private ValueMap _valueMap; + private Schema.Metadata _valueMetadata; public ColumnType ValueColumnType => _valueMap.ValueType; @@ -301,14 +355,28 @@ public sealed class Arguments [Argument(ArgumentType.AtMostOnce, IsInputFileName = true, HelpText = "The data file containing the terms", ShortName = "data", SortOrder = 2)] public string DataFile; + + [Argument(ArgumentType.AtMostOnce, + HelpText = "Specifies whether the values are key values or numeric, only valid when loader is not specified and the type of data is not an idv.")] + public bool ValuesAsKeyType = true; } - protected ValueMappingTransform(IHostEnvironment env, IDataView lookupMap, string keyColumn, string valueColumn, (string Input, string Output)[] columns) + protected ValueMappingTransform(IHostEnvironment env, IDataView lookupMap, + string keyColumn, string valueColumn, (string Input, string Output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), columns) { env.CheckNonEmpty(keyColumn, nameof(keyColumn), "A key column must be specified when passing in an IDataView for the value mapping"); env.CheckNonEmpty(valueColumn, nameof(valueColumn), "A value column must be specified when passing in an IDataView for the value mapping"); _valueMap = CreateValueMapFromDataView(lookupMap, keyColumn, valueColumn); + env.Assert(lookupMap.Schema.TryGetColumnIndex(valueColumn, out int valueColumnIdx)); + _valueMetadata = CopyMetadata(lookupMap.Schema[valueColumnIdx].Metadata); + } + + private Schema.Metadata CopyMetadata(Schema.Metadata metadata) + { + var meta = new MetadataBuilder(); + meta.Add(metadata, x=> true); + return meta.GetMetadata(); } private ValueMap CreateValueMapFromDataView(IDataView dataView, string keyColumn, string valueColumn) @@ -318,19 +386,147 @@ private ValueMap CreateValueMapFromDataView(IDataView dataView, string keyColumn Host.Check(dataView.Schema.TryGetColumnIndex(valueColumn, out int valueIdx), "Value column " + valueColumn + " does not exist in the given dataview"); var keyType = dataView.Schema.GetColumnType(keyIdx); var valueType = dataView.Schema.GetColumnType(valueIdx); - var valueMap = ValueMap.Create(keyType, valueType); + var valueMap = ValueMap.Create(keyType, valueType, _valueMetadata); using (var cursor = dataView.GetRowCursor(c=> c == keyIdx || c == valueIdx)) valueMap.Train(Host, cursor); return valueMap; } + private static TextLoader.Column GenerateValueColumn(IHostEnvironment env, + IDataView loader, + string valueColumnName, + int keyIdx, + int valueIdx) + { + // Scan the source to determine the min max of the column + ulong keyMin = ulong.MinValue; + ulong keyMax = ulong.MinValue; + + // scan the input to create convert the values as key types + using (var cursor = loader.GetRowCursor(c => true)) + { + using(var ch = env.Start("Processing key values")) + { + var getKey = cursor.GetGetter>(keyIdx); + var getValue = cursor.GetGetter>(valueIdx); + int countNonKeys = 0; + + ReadOnlyMemory key = default; + ReadOnlyMemory value = default; + while(cursor.MoveNext()) + { + getKey(ref key); + getValue(ref value); + + ulong res; + // Try to parse the text as a key value between 1 and ulong.MaxValue. If this succeeds and res>0, + // we update max and min accordingly. If res==0 it means the value is missing, in which case we ignore it for + // computing max and min. + if (Microsoft.ML.Runtime.Data.Conversion.Conversions.Instance.TryParseKey(in value, 1, ulong.MaxValue, out res)) + { + if (res < keyMin && res != 0) + keyMin = res; + if (res > keyMax) + keyMax = res; + } + // If parsing as key did not succeed, the value can still be 0, so we try parsing it as a ulong. If it succeeds, + // then the value is 0, and we update min accordingly. + else if (Microsoft.ML.Runtime.Data.Conversion.Conversions.Instance.TryParse(in value, out res)) + { + keyMin = 0; + } + //If parsing as a ulong fails, we increment the counter for the non-key values. + else + { + if (countNonKeys < 5) + ch.Warning("Key '{0}' in mapping file is mapped to non key value '{1}'", key, value); + countNonKeys++; + } + } + } + } + + TextLoader.Column valueColumn = new TextLoader.Column(valueColumnName, DataKind.U4, 1); + if (keyMax - keyMin < (ulong)int.MaxValue) + { + valueColumn.KeyRange = new KeyRange(keyMin, keyMax); + } + else if (keyMax - keyMin < (ulong)uint.MaxValue) + { + valueColumn.KeyRange = new KeyRange(keyMin); + } + else + { + valueColumn.Type = DataKind.U8; + valueColumn.KeyRange = new KeyRange(keyMin); + } + + return valueColumn; + } + + private static ValueMappingTransform CreateTransformInvoke(IHostEnvironment env, + IDataView idv, + string keyColumnName, + string valueColumnName, + bool treatValuesAsKeyTypes, + (string Input, string Output)[] columns) + { + // Read in the data + // scan the input to create convert the values as key types + List keys = new List(); + List values = new List(); + + idv.Schema.TryGetColumnIndex(keyColumnName, out int keyIdx); + idv.Schema.TryGetColumnIndex(valueColumnName, out int valueIdx); + using (var cursor = idv.GetRowCursor(c => true)) + { + using(var ch = env.Start("Processing key values")) + { + TKeyType key = default; + TValueType value = default; + var getKey = cursor.GetGetter(keyIdx); + var getValue = cursor.GetGetter(valueIdx); + while(cursor.MoveNext()) + { + try + { + getKey(ref key); + } + catch(InvalidOperationException) + { + ch.Warning("Invalid key parsed, row will be skipped."); + continue; + } + + try + { + getValue(ref value); + } + catch(InvalidOperationException) + { + ch.Warning("Invalid value parsed for key {key}, row will be skipped."); + continue; + } + + keys.Add(key); + values.Add(value); + } + } + } + + return new ValueMappingTransform(env, keys, values, treatValuesAsKeyTypes, columns); + } + private static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(args, nameof(args)); env.Assert(!string.IsNullOrWhiteSpace(args.DataFile)); - env.AssertNonEmpty(args.KeyColumn); - env.AssertNonEmpty(args.ValueColumn); + env.CheckValueOrNull(args.KeyColumn); + env.CheckValueOrNull(args.ValueColumn); + + var keyColumnName = (string.IsNullOrEmpty(args.KeyColumn)) ? KeyColumnName : args.KeyColumn; + var valueColumnName = (string.IsNullOrEmpty(args.ValueColumn)) ? ValueColumnName : args.ValueColumn; IMultiStreamSource fileSource = new MultiFileSource(args.DataFile); IDataView loader; @@ -340,10 +536,104 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData } else { - loader = new BinaryLoader(env, new BinaryLoader.Arguments(), fileSource); + var extension = Path.GetExtension(args.DataFile); + if (extension.Equals(".idv", StringComparison.OrdinalIgnoreCase)) + loader = new BinaryLoader(env, new BinaryLoader.Arguments(), fileSource); + else if (extension.Equals(".tdv")) + loader = new TransposeLoader(env, new TransposeLoader.Arguments(), fileSource); + else + { + // The user has not specified how to load this file. This will attempt to load the + // data file as two text columns. If the user has also specified ValuesAsKeyTypes, + // this will default to the key column as a text column and the value column as a uint column + + // Set the keyColumnName and valueColumnName to the default values. + keyColumnName = KeyColumnName; + valueColumnName = ValueColumnName; + + TextLoader.Column keyColumn = new TextLoader.Column(keyColumnName, DataKind.TXT, 0); + TextLoader.Column valueColumn = new TextLoader.Column(valueColumnName, DataKind.TXT, 1); + /* + if (args.ValuesAsKeyType) + { + valueColumn = new TextLoader.Column(valueColumnName, DataKind.U8, 1); + }*/ + + var txtArgs = new TextLoader.Arguments() + { + Column=new TextLoader.Column[] + { + keyColumn, + valueColumn + } + }; + + //loader = TextLoader.ReadFile(env, txtArgs, fileSource); + var textLoader = TextLoader.ReadFile(env, txtArgs, fileSource); + //env.Assert(textLoader.Schema.TryGetColumnIndex(keyColumnName, out int keyColumnIndex)); + //env.Assert(textLoader.Schema.TryGetColumnIndex(valueColumnName, out int valueColumnIndex)); + + // Default to a text loader. KeyType and ValueType are assumed to be string + // types unless ValueAsKeyType is specified. + //TextLoader.Column keyColumn = new TextLoader.Column(keyColumnName, DataKind.TXT, keyColumnIndex); + //TextLoader.Column valueColumn = new TextLoader.Column(valueColumnName, DataKind.TXT, valueColumnIndex); + if (args.ValuesAsKeyType) + { + valueColumn = GenerateValueColumn(env, textLoader, valueColumnName, 0, 1); + // Change ValueColumn to be of type U4 + //valueColumn = new TextLoader.Column(valueColumnName, DataKind.U4, valueColumnIndex); + //GenerateKeyRangeAndMinFromValues(env, textLoader, keyColumnIndex, valueColumnIndex, out ulong min, out ulong max); + //valueColumn.KeyRange = new KeyRange(min, max); + } + + loader = TextLoader.Create( + env, + new TextLoader.Arguments() + { + Column = new TextLoader.Column[] + { + keyColumn, + valueColumn + } + }, + fileSource); + } } - var transformer = new ValueMappingTransform(env, loader, args.KeyColumn, args.ValueColumn, args.Column.Select(x => (x.Source, x.Name)).ToArray()); + env.AssertValue(loader); + env.Assert(loader.Schema.TryGetColumnIndex(keyColumnName, out int keyColumnIndex)); + env.Assert(loader.Schema.TryGetColumnIndex(valueColumnName, out int valueColumnIndex)); + + ValueMappingTransform transformer = null; + (string Source, string Name)[] columns = args.Column.Select(x => (x.Source, x.Name)).ToArray(); + /* + Func del = CreateTransformInvoke; + var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(loader.Schema[keyColumnIndex].Type.RawType, + loader.Schema[valueColumnIndex].Type.RawType); + transformer = (ValueMappingTransform)meth.Invoke(null, new object[] { env, + loader, + keyColumnName, + valueColumnName, + args.ValuesAsKeyType, + columns + }); + /* + if (args.ValuesAsKeyType) + { + Func del = CreateTransformInvoke; + var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(loader.Schema[keyColumnIndex].Type.RawType, + loader.Schema[valueColumnIndex].Type.RawType); + transformer = (ValueMappingTransform)meth.Invoke(null, new object[] { env, + loader, + keyColumnName, + valueColumnName, + args.ValuesAsKeyType, + columns + }); + } + else + */ + transformer = new ValueMappingTransform(env, loader, keyColumnName, valueColumnName, columns); return transformer.MakeDataTransform(input); } @@ -418,6 +708,9 @@ private static byte[] ReadAllBytes(IExceptionContext ectx, BinaryReader rdr) protected static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input) => Create(env, ctx).MakeDataTransform(input); + private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, ISchema inputSchema) + => Create(env, ctx).MakeRowMapper(Schema.Create(inputSchema)); + protected static PrimitiveType GetPrimitiveType(Type rawType, out bool isVectorType) { Type type = rawType; @@ -461,16 +754,18 @@ public ValueMap(ColumnType keyType, ColumnType valueType) ValueType = valueType; } - public static ValueMap Create(ColumnType keyType, ColumnType valueType) + public static ValueMap Create(ColumnType keyType, ColumnType valueType, Schema.Metadata valueMetadata) { - Func del = CreateValueMapInvoke; + Func del = CreateValueMapInvoke; var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(keyType.RawType, valueType.RawType); - return (ValueMap)meth.Invoke(null, new object[] { keyType, valueType }); + return (ValueMap)meth.Invoke(null, new object[] { keyType, valueType, valueMetadata }); } - private static ValueMap CreateValueMapInvoke(ColumnType keyType, ColumnType valueType) + private static ValueMap CreateValueMapInvoke(ColumnType keyType, + ColumnType valueType, + Schema.Metadata valueMetadata) { - return new ValueMap(keyType, valueType); + return new ValueMap(keyType, valueType, valueMetadata); } public abstract void Train(IHostEnvironment env, IRowCursor cursor); @@ -487,6 +782,7 @@ private class ValueMap : ValueMap { private Dictionary _mapping; private TValueType _missingValue; + private Schema.Metadata _valueMetadata; private Dictionary CreateDictionary() { @@ -495,10 +791,11 @@ private Dictionary CreateDictionary() return new Dictionary(); } - public ValueMap(ColumnType keyType, ColumnType valueType) + public ValueMap(ColumnType keyType, ColumnType valueType, Schema.Metadata valueMetadata) : base(keyType, valueType) { _mapping = CreateDictionary(); + _valueMetadata = valueMetadata; } public override void Train(IHostEnvironment env, IRowCursor cursor) @@ -515,7 +812,7 @@ public override void Train(IHostEnvironment env, IRowCursor cursor) // First check if there is a String->ValueType conversion method. If so, call the conversion method with an // empty string, the returned value will be the new missing value. // NOTE this will return NA for R4 and R8 types. - if (Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion, TValueType>( + if (Microsoft.ML.Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion, TValueType>( TextType.Instance, ValueType, out conv, @@ -565,14 +862,12 @@ public override Delegate GetGetter(IRow input, int index) } public override IDataView GetDataView(IHostEnvironment env) - { - var dataViewBuilder = new ArrayDataViewBuilder(env); - var keyType = ValueMappingTransform.GetPrimitiveType(typeof(TKeyType), out bool isKeyVectorType); - var valueType = ValueMappingTransform.GetPrimitiveType(typeof(TValueType), out bool isValueVectorType); - dataViewBuilder.AddColumn(ValueMappingTransform.KeyColumnName, keyType, _mapping.Keys.ToArray()); - dataViewBuilder.AddColumn(ValueMappingTransform.ValueColumnName,valueType, _mapping.Values.ToArray()); - return dataViewBuilder.GetDataView(); - } + => DataViewHelper.CreateDataView(env, + _mapping.Keys, + _mapping.Values, + ValueMappingTransform.KeyColumnName, + ValueMappingTransform.ValueColumnName, + ValueType.IsKey); private static TValueType GetVector(TValueType value) { @@ -633,23 +928,26 @@ private static BinaryLoader GetLoader(IHostEnvironment env, byte[] bytes) protected override IRowMapper MakeRowMapper(Schema schema) { - return new Mapper(this, Schema.Create(schema), _valueMap, ColumnPairs); + return new Mapper(this, Schema.Create(schema), _valueMap, _valueMetadata, ColumnPairs); } private sealed class Mapper : OneToOneMapperBase { private readonly Schema _inputSchema; private readonly ValueMap _valueMap; + private readonly Schema.Metadata _valueMetadata; private readonly (string Source, string Name)[] _columns; private readonly ValueMappingTransform _parent; internal Mapper(ValueMappingTransform transform, Schema inputSchema, ValueMap valueMap, + Schema.Metadata valueMetadata, (string input, string output)[] columns) : base(transform.Host.Register(nameof(Mapper)), transform, inputSchema) { _inputSchema = inputSchema; + _valueMetadata = valueMetadata; _valueMap = valueMap; _columns = columns; _parent = transform; @@ -666,11 +964,13 @@ protected override Delegate MakeGetter(IRow input, int iinfo, Func ac protected override Schema.DetachedColumn[] GetOutputColumnsCore() { + var md = new MetadataBuilder(); + var result = new Schema.DetachedColumn[_columns.Length]; for (int i = 0; i < _columns.Length; i++) { var srcCol = _inputSchema[_columns[i].Source]; - result[i] = new Schema.DetachedColumn(_columns[i].Name, _valueMap.ValueType, srcCol.Metadata); + result[i] = new Schema.DetachedColumn(_columns[i].Name, _valueMap.ValueType, md.GetMetadata()); } return result; } diff --git a/src/Microsoft.ML.Transforms/TermLookupTransformer.cs b/src/Microsoft.ML.Transforms/TermLookupTransformer.cs index 63be541419..a2fe8cba1c 100644 --- a/src/Microsoft.ML.Transforms/TermLookupTransformer.cs +++ b/src/Microsoft.ML.Transforms/TermLookupTransformer.cs @@ -19,6 +19,9 @@ [assembly: LoadableClass(TermLookupTransformer.Summary, typeof(TermLookupTransformer), typeof(TermLookupTransformer.Arguments), typeof(SignatureDataTransform), "Term Lookup Transform", "TermLookup", "Lookup", "LookupTransform", "TermLookupTransform")] +//[assembly: LoadableClass(TermLookupTransformer.Summary, typeof(TermLookupTransformer), null, typeof(SignatureLoadDataTransform), + //"Term Lookup Transform", TermLookupTransformer.LoaderSignature)] + namespace Microsoft.ML.Transforms.Categorical { using Conditional = System.Diagnostics.ConditionalAttribute; diff --git a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs index e2b35aeb1a..3cb18dcbe6 100644 --- a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs +++ b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs @@ -43,7 +43,7 @@ public void SavePipeLabelParsers() "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", "xf=AutoLabel{col=AutoLabel:RawLabel}", "xf=Term{col=StringLabel:RawLabel terms={Wirtschaft,Gesundheit,Deutschland,Ausland,Unterhaltung,Sport,Technik & Wissen}}", - string.Format("xf=TermLookup{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=ValueMap{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=AutoLabel keepcol=StringLabel keepcol=FileLabel hidden=-}" }); @@ -63,7 +63,7 @@ public void SavePipeLabelParsers() TestCore(pathData, true, new[] { "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", - string.Format("xf=TermLookup{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=ValueMap{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=FileLabel hidden=-}" }, suffix: "1"); @@ -83,7 +83,7 @@ public void SavePipeLabelParsers() TestCore(pathData, true, new[] { "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", - string.Format("xf=TermLookup{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=ValueMap{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=FileLabel hidden=-}" }, suffix: "2"); @@ -103,7 +103,7 @@ public void SavePipeLabelParsers() TestCore(pathData, true, new[] { "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", - string.Format("xf=TermLookup{{key=- col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=ValueMap{{valuesAsKeyType=- col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=FileLabel hidden=-}" }, suffix: "3"); @@ -128,8 +128,8 @@ public void SavePipeLabelParsers() TestCore(pathData, true, new[] { "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", - string.Format("xf=TermLookup{{key=- col=FileLabelNum:RawLabel data={{{0}}}}}", mappingPathData), - string.Format("xf=TermLookup{{col=FileLabelKey:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=ValueMap{{valuesAsKeyType=- col=FileLabelNum:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=ValueMap{{col=FileLabelKey:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=FileLabelNum keepcol=FileLabelKey hidden=-}" }, suffix: "4"); writer.WriteLine(ProgressLogLine); @@ -153,7 +153,7 @@ public void SavePipeLabelParsers() TestCore(pathData, true, new[] { "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", - string.Format("xf=TermLookup{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=ValueMap{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=FileLabel hidden=-}" }, suffix: "5"); diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index 9cc85c1a0e..6890adf157 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -256,13 +256,13 @@ public void ValueMappingValuesAsStringKeyTypes() // The expected values will contain the generated key type values. uint dValue = 1; getterD(ref dValue); - Assert.Equal(1, dValue); + Assert.Equal(2, dValue); uint eValue = 0; getterE(ref eValue); - Assert.Equal(0, eValue); + Assert.Equal(1, eValue); uint fValue = 0; getterF(ref fValue); - Assert.Equal(0, fValue); + Assert.Equal(1, fValue); } [Fact] @@ -290,6 +290,33 @@ void TestCommandLine() + @" col=A:B loader=Text{col=ID:U8:0 col=Text:TX:1 sep=, header=+} } in=f:\1.txt" }), (int)0); } + [Fact] + void TestCommandLineNoLoader() + { + var dataFile = GetDataPath("lm.labels.txt"); + Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{data=" + + dataFile + + @" col=A:B } in=f:\1.txt" }), (int)0); + } + + [Fact] + void TestCommandLineNoLoaderWithColumnNames() + { + var dataFile = GetDataPath("lm.labels.txt"); + Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{data=" + + dataFile + + @" col=A:B key=foo value=bar} in=f:\1.txt" }), (int)0); + } + + [Fact] + void TestCommandLineNoLoaderWithoutTreatValuesAsKeys() + { + var dataFile = GetDataPath("lm.labels.txt"); + Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{data=" + + dataFile + + @" col=A:B valuesAsKeyType=-} in=f:\1.txt" }), (int)0); + } + [Fact] void TestSavingAndLoading() { From 5445667d40553ca83091dec0076ce0fae0a2ed8f Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Thu, 13 Dec 2018 18:28:32 -0800 Subject: [PATCH 10/16] ...updating... --- .../Transforms/ValueMappingTransform.cs | 14 +++++++------- .../Transformers/ValueMappingTests.cs | 7 ++++--- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs index 9b424b499a..54e3f17667 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs @@ -99,7 +99,7 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { Host.CheckValue(inputSchema, nameof(inputSchema)); - var resultDic = inputSchema.Columns.ToDictionary(x => x.Name); + var resultDic = inputSchema.ToDictionary(x => x.Name); var vectorKind = Transformer.ValueColumnType.IsVector ? SchemaShape.Column.VectorKind.Vector : SchemaShape.Column.VectorKind.Scalar; var isKey = Transformer.ValueColumnType.IsKey; var columnType = (isKey) ? PrimitiveType.FromKind(DataKind.U4) : @@ -768,9 +768,9 @@ private static ValueMap CreateValueMapInvoke(ColumnType ke return new ValueMap(keyType, valueType, valueMetadata); } - public abstract void Train(IHostEnvironment env, IRowCursor cursor); + public abstract void Train(IHostEnvironment env, RowCursor cursor); - public abstract Delegate GetGetter(IRow input, int index); + public abstract Delegate GetGetter(Row input, int index); public abstract IDataView GetDataView(IHostEnvironment env); } @@ -798,7 +798,7 @@ public ValueMap(ColumnType keyType, ColumnType valueType, Schema.Metadata valueM _valueMetadata = valueMetadata; } - public override void Train(IHostEnvironment env, IRowCursor cursor) + public override void Train(IHostEnvironment env, RowCursor cursor) { // Validate that the conversion is supported for non-vector types bool identity; @@ -840,7 +840,7 @@ public override void Train(IHostEnvironment env, IRowCursor cursor) } } - public override Delegate GetGetter(IRow input, int index) + public override Delegate GetGetter(Row input, int index) { var src = default(TKeyType); ValueGetter getSrc = input.GetGetter(index); @@ -926,7 +926,7 @@ private static BinaryLoader GetLoader(IHostEnvironment env, byte[] bytes) return new BinaryLoader(env, new BinaryLoader.Arguments(), strm); } - protected override IRowMapper MakeRowMapper(Schema schema) + private protected override IRowMapper MakeRowMapper(Schema schema) { return new Mapper(this, Schema.Create(schema), _valueMap, _valueMetadata, ColumnPairs); } @@ -953,7 +953,7 @@ internal Mapper(ValueMappingTransform transform, _parent = transform; } - protected override Delegate MakeGetter(IRow input, int iinfo, Func activeOutput, out Action disposer) + protected override Delegate MakeGetter(Row input, int iinfo, Func activeOutput, out Action disposer) { Host.AssertValue(input); Host.Assert(0 <= iinfo && iinfo < _columns.Length); diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index 6890adf157..54d1882799 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -5,7 +5,6 @@ using Microsoft.ML.Core.Data; using Microsoft.ML.Data; -using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Model; using Microsoft.ML.Runtime.RunTests; @@ -154,7 +153,7 @@ public void ValueMappingOutputSchema() var estimator = new ValueMappingEstimator, int>(Env, keys, values, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); var outputSchema = estimator.GetOutputSchema(SchemaShape.Create(dataView.Schema)); - Assert.Equal(6, outputSchema.Columns.Length); + Assert.Equal(6, outputSchema.Count()); Assert.True(outputSchema.TryFindColumn("D", out SchemaShape.Column dColumn)); Assert.True(outputSchema.TryFindColumn("E", out SchemaShape.Column eColumn)); Assert.True(outputSchema.TryFindColumn("F", out SchemaShape.Column fColumn)); @@ -180,7 +179,7 @@ public void ValueMappingWithValuesAsKeyTypesOutputSchema() var estimator = new ValueMappingEstimator, ReadOnlyMemory>(Env, keys, values, true, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); var outputSchema = estimator.GetOutputSchema(SchemaShape.Create(dataView.Schema)); - Assert.Equal(6, outputSchema.Columns.Length); + Assert.Equal(6, outputSchema.Count()); Assert.True(outputSchema.TryFindColumn("D", out SchemaShape.Column dColumn)); Assert.True(outputSchema.TryFindColumn("E", out SchemaShape.Column eColumn)); Assert.True(outputSchema.TryFindColumn("F", out SchemaShape.Column fColumn)); @@ -371,10 +370,12 @@ void TestValueMapBackCompatTermLookupKeyTypeValue() Assert.True(result.Schema.TryGetColumnIndex("Label", out int labelIdx)); Assert.True(result.Schema.TryGetColumnIndex("GroupId", out int groupIdx)); + /* Assert.True(result.Schema[labelIdx].Type.IsKey); var keyType = result.Schema[labelIdx].Type.AsKey; Assert.Equal((ulong)0, keyType.Min); Assert.Equal(5, keyType.KeyCount); + */ var t = result.GetColumn(Env, "Label"); uint s = t.First(); From cefe1a8579f9ed16d100f624b411358dd7a0ab00 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Wed, 19 Dec 2018 14:43:48 -0800 Subject: [PATCH 11/16] - Added support for the values to be represented as key types - Removed TermLookup - Updated tests to use ValueMapping instead of TermLookup. --- .../DataView/ArrayDataViewBuilder.cs | 4 +- .../Transforms/ValueMappingTransform.cs | 393 +++++----- .../TermLookupTransformer.cs | 708 ------------------ .../SavePipe/SavePipeLabelParsers-Schema.txt | 2 +- .../SavePipe/SavePipeLabelParsers1-Schema.txt | 2 +- .../SavePipe/SavePipeLabelParsers2-Schema.txt | 2 +- .../SavePipe/SavePipeLabelParsers3-Schema.txt | 2 +- .../SavePipe/SavePipeLabelParsers4-Schema.txt | 4 +- .../SavePipe/SavePipeLabelParsers4-out.txt | 110 ++- .../SavePipe/SavePipeLabelParsers5-Schema.txt | 2 +- .../Transformers/ValueMappingTests.cs | 77 +- 11 files changed, 349 insertions(+), 957 deletions(-) delete mode 100644 src/Microsoft.ML.Transforms/TermLookupTransformer.cs diff --git a/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs b/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs index 3e9159978b..9872be3f85 100644 --- a/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs +++ b/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs @@ -78,7 +78,7 @@ public void AddColumn(string name, PrimitiveType type, params T[] values) public void AddColumn(string name, ValueGetter>> getKeyValues, ulong keyMin, int keyCount, params uint[] values) { _host.CheckValue(getKeyValues, nameof(getKeyValues)); - _host.CheckParam(keyCount > 0, nameof(keyCount)); + //_host.CheckParam(keyCount > 0, nameof(keyCount)); CheckLength(name, values); _columns.Add(new AssignmentColumn(new KeyType(DataKind.U4, keyMin, keyCount), values)); _getKeyValues.Add(name, getKeyValues); @@ -92,7 +92,7 @@ public void AddColumn(string name, ValueGetter>> ge public void AddColumn(string name, ValueGetter>> getKeyValues, ulong keyMin, int keyCount, params ulong[] values) { _host.CheckValue(getKeyValues, nameof(getKeyValues)); - _host.CheckParam(keyCount > 0, nameof(keyCount)); + //_host.CheckParam(keyCount > 0, nameof(keyCount)); CheckLength(name, values); _columns.Add(new AssignmentColumn(new KeyType(DataKind.U8, keyMin, keyCount), values)); _getKeyValues.Add(name, getKeyValues); diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs index 54e3f17667..9170e57ac4 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs @@ -17,33 +17,33 @@ using System.Linq; using System.Text; -[assembly: LoadableClass(ValueMappingTransform.Summary, typeof(IDataTransform), typeof(ValueMappingTransform), - typeof(ValueMappingTransform.Arguments), typeof(SignatureDataTransform), - ValueMappingTransform.UserName, "ValueMapping", "ValueMappingTransform", ValueMappingTransform.ShortName, - DocName = "transform/ValueMappingTransform.md")] +[assembly: LoadableClass(ValueMappingTransformer.Summary, typeof(IDataTransform), typeof(ValueMappingTransformer), + typeof(ValueMappingTransformer.Arguments), typeof(SignatureDataTransform), + ValueMappingTransformer.UserName, "ValueMapping", "ValueMappingTransformer", ValueMappingTransformer.ShortName, + DocName = "transform/ValueMappingTransformer.md")] -[assembly: LoadableClass(ValueMappingTransform.Summary, typeof(IDataTransform), typeof(ValueMappingTransform), null, typeof(SignatureLoadDataTransform), - "Value Mapping Transform", ValueMappingTransform.LoaderSignature)] +[assembly: LoadableClass(ValueMappingTransformer.Summary, typeof(IDataTransform), typeof(ValueMappingTransformer), null, typeof(SignatureLoadDataTransform), + "Value Mapping Transform", ValueMappingTransformer.LoaderSignature)] -[assembly: LoadableClass(ValueMappingTransform.Summary, typeof(ValueMappingTransform), null, typeof(SignatureLoadModel), - "Value Mapping Transform", ValueMappingTransform.LoaderSignature)] +[assembly: LoadableClass(ValueMappingTransformer.Summary, typeof(ValueMappingTransformer), null, typeof(SignatureLoadModel), + "Value Mapping Transform", ValueMappingTransformer.LoaderSignature)] -[assembly: LoadableClass(typeof(IRowMapper), typeof(ValueMappingTransform), null, typeof(SignatureLoadRowMapper), - ValueMappingTransform.UserName, ValueMappingTransform.LoaderSignature)] +[assembly: LoadableClass(typeof(IRowMapper), typeof(ValueMappingTransformer), null, typeof(SignatureLoadRowMapper), + ValueMappingTransformer.UserName, ValueMappingTransformer.LoaderSignature)] -[assembly: LoadableClass("", typeof(IDataTransform), typeof(ValueMappingTransform), null, typeof(SignatureLoadDataTransform), - "", ValueMappingTransform.TermLookupLoaderSignature)] +[assembly: LoadableClass("", typeof(IDataTransform), typeof(ValueMappingTransformer), null, typeof(SignatureLoadDataTransform), + "", ValueMappingTransformer.TermLookupLoaderSignature)] namespace Microsoft.ML.Transforms { /// /// The ValueMappingEstimator is a 1-1 mapping from a key to value. The key type and value type are specified - /// through TKeyType and TValueType. Arrays are supported for vector types which can be used as either a key or a value + /// through TKey and TValue. Arrays are supported for vector types which can be used as either a key or a value /// or both. The mapping is specified, not trained by providiing a list of keys and a list of values. /// - /// Specifies the key type - /// Specifies the value type - public sealed class ValueMappingEstimator : TrivialEstimator> + /// Specifies the key type + /// Specifies the value type + public sealed class ValueMappingEstimator : TrivialEstimator> { private (string input, string output)[] _columns; @@ -51,12 +51,12 @@ public sealed class ValueMappingEstimator : TrivialEstimat /// Constructs the ValueMappingEstimator, key type -> value type mapping /// /// Instance of the host environment - /// The list of keys of TKeyType - /// The list of values of TValueType + /// The list of keys of TKey + /// The list of values of TValue /// The list of columns to apply - public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), - new ValueMappingTransform(env, keys, values, false, columns)) + public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), + new ValueMappingTransformer(env, keys, values, false, columns)) { _columns = columns; } @@ -65,13 +65,13 @@ public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, I /// Constructs the ValueMappingEstimator, key type -> value type mapping /// /// Instance of the host environment - /// The list of keys of TKeyType - /// The list of values of TValueType + /// The list of keys of TKey + /// The list of values of TValue /// Specifies to treat the values as a /// The list of columns to apply - public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyType, params (string input, string output)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), - new ValueMappingTransform(env, keys, values, treatValuesAsKeyType, columns)) + public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyType, params (string input, string output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), + new ValueMappingTransformer(env, keys, values, treatValuesAsKeyType, columns)) { _columns = columns; } @@ -80,12 +80,12 @@ public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, I /// Constructs the ValueMappingEstimator, key type -> value array type mapping /// /// Instance of the host environment - /// The list of keys of TKeyType - /// The list of values of TValueType[] + /// The list of keys of TKey + /// The list of values of TValue[] /// The list of columns to apply - public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), - new ValueMappingTransform(env, keys, values, columns)) + public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), + new ValueMappingTransformer(env, keys, values, columns)) { _columns = columns; } @@ -117,9 +117,15 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) } } + /// + /// The DataViewHelper provides a set of static functions to create a DataView given a list of keys and values. + /// internal class DataViewHelper { - public static PrimitiveType GetPrimitiveType(Type rawType, out bool isVectorType) + /// + /// Helper function to retrieve the Primitie type given a Type + /// + internal static PrimitiveType GetPrimitiveType(Type rawType, out bool isVectorType) { Type type = rawType; isVectorType = false; @@ -137,25 +143,31 @@ public static PrimitiveType GetPrimitiveType(Type rawType, out bool isVectorType return PrimitiveType.FromKind(kind); } - private static ValueGetter>> GetKeyValueGetter(TValue[] values) + /// + /// Helper function for a reverse lookup given value. This is used for generating the metadata of the value column. + /// + + private static ValueGetter>> GetKeyValueGetter(TKey[] keys) { return (ref VBuffer> dst) => { - var editor = VBufferEditor.Create(ref dst, values.Length); - for (int i = 0; i < values.Length; i++) - editor.Values[i] = values[i].ToString().AsMemory(); + var editor = VBufferEditor.Create(ref dst, keys.Length); + for (int i = 0; i < keys.Length; i++) + editor.Values[i] = keys[i].ToString().AsMemory(); dst = editor.Commit(); }; } - public static IDataView CreateDataView(IHostEnvironment env, + /// + /// Helper function to create an IDataView given a list of key and vector-based values + /// + internal static IDataView CreateDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values, string keyColumnName, string valueColumnName) { - // Build DataView from the mapping var keyType = GetPrimitiveType(typeof(TKey), out bool isKeyVectorType); var valueType = GetPrimitiveType(typeof(TValue), out bool isValueVectorType); var dataViewBuilder = new ArrayDataViewBuilder(env); @@ -164,41 +176,52 @@ public static IDataView CreateDataView(IHostEnvironment env, return dataViewBuilder.GetDataView(); } - public static IDataView CreateDataView(IHostEnvironment env, + /// + /// Helper function that builds the IDataView given a list of keys and non-vector values + /// + internal static IDataView CreateDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values, string keyColumnName, string valueColumnName, bool treatValuesAsKeyTypes) { - // Build DataView from the mapping var keyType = GetPrimitiveType(typeof(TKey), out bool isKeyVectorType); var valueType = GetPrimitiveType(typeof(TValue), out bool isValueVectorType); var dataViewBuilder = new ArrayDataViewBuilder(env); dataViewBuilder.AddColumn(keyColumnName, keyType, keys.ToArray()); - //var valuesArray = values.ToArray(); if (treatValuesAsKeyTypes) { - // If the values are key values, there are two different ways in which they are handled: - // 1) If the values are of type uint, then it is assumed that these values are the - // key values. In this case, the values are used for the key values. - // 2) If the values are not of type uint. Then key type values are generated as a number range starting at 0. + // When treating the values as KeyTypes, generate the unique + // set of values. This is used for generating the metadata of + // the column. + HashSet valueSet = new HashSet(); + HashSet keySet = new HashSet(); + for(int i = 0; i < values.Count(); ++i) + { + var v = values.ElementAt(i); + if (valueSet.Contains(v)) + continue; + valueSet.Add(v); + + var k = keys.ElementAt(i); + keySet.Add(k); + } + var metaKeys = keySet.ToArray(); + + // Key Values are treated in one of two ways: + // If the values are of type uint or ulong, these values are used directly as the keys types and no new keys are created. + // If the values are not of uint or ulong, then key values are generated as uints starting from 1, since 0 is missing key. if (valueType.RawKind == DataKind.U4) { - uint[] indices = values.Select((x) => Convert.ToUInt32(x) - 1).ToArray(); - var min = indices.Min(); - var max = indices.Max(); - int count = (int)(max - min + 1); - dataViewBuilder.AddColumn(valueColumnName, GetKeyValueGetter(indices), min, count, indices); + uint[] indices = values.Select((x) => Convert.ToUInt32(x)).ToArray(); + dataViewBuilder.AddColumn(valueColumnName, GetKeyValueGetter(metaKeys), 0, metaKeys.Length, indices); } else if (valueType.RawKind == DataKind.U8) { - ulong[] indices = values.Select((x) => Convert.ToUInt64(x) - 1).ToArray(); - var min = indices.Min(); - var max = indices.Max(); - int count = (int)(max - min + 1); - dataViewBuilder.AddColumn(valueColumnName, GetKeyValueGetter(indices), min, count, indices); + ulong[] indices = values.Select((x) => Convert.ToUInt64(x)).ToArray(); + dataViewBuilder.AddColumn(valueColumnName, GetKeyValueGetter(metaKeys), 0, metaKeys.Length, indices); } else { @@ -207,7 +230,7 @@ public static IDataView CreateDataView(IHostEnvironment env, // the full list of indices (equal to the same length of values). Dictionary keyTypeValueMapping = new Dictionary(); uint[] indices = new uint[values.Count()]; - // Start the index at 1 since key types start at 1, 0 is invalid + // Start the index at 1 uint index = 1; for(int i = 0; i < values.Count(); ++i) { @@ -222,7 +245,7 @@ public static IDataView CreateDataView(IHostEnvironment env, indices[i] = keyValue; } - dataViewBuilder.AddColumn(valueColumnName, GetKeyValueGetter(values.ToArray()), 0, indices.Count(), indices); + dataViewBuilder.AddColumn(valueColumnName, GetKeyValueGetter(metaKeys), 0, metaKeys.Count(), indices); } } else @@ -235,56 +258,56 @@ public static IDataView CreateDataView(IHostEnvironment env, } /// - /// The ValueMappingTransform is a 1-1 mapping from a key to value. The key type and value type are specified - /// through TKeyType and TValueType. Arrays are supported for vector types which can be used as either a key or a value + /// The ValueMappingTransformer is a 1-1 mapping from a key to value. The key type and value type are specified + /// through TKey and TValue. Arrays are supported for vector types which can be used as either a key or a value /// or both. The mapping is specified, not trained by providiing a list of keys and a list of values. /// - /// Specifies the key type - /// Specifies the value type - public sealed class ValueMappingTransform : ValueMappingTransform + /// Specifies the key type + /// Specifies the value type + public sealed class ValueMappingTransformer : ValueMappingTransformer { /// - /// Constructs a ValueMappingTransform with a key type to value type + /// Constructs a ValueMappingTransformer with a key type to value type /// /// Instance of the host environment - /// The list of keys that are TKeyType - /// The list of values that are TValueType + /// The list of keys that are TKey + /// The list of values that are TValue /// Specifies to treat the values as a /// The specified columns to apply - public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyTypes, (string Input, string Output)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), + public ValueMappingTransformer(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyTypes, (string Input, string Output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransformer)), ConvertToDataView(env, keys, values, treatValuesAsKeyTypes), KeyColumnName, ValueColumnName, columns) { } /// - /// Constructs a ValueMappingTransform with a key type to value array type + /// Constructs a ValueMappingTransformer with a key type to value array type /// /// Instance of the host environment - /// The list of keys that are TKeyType - /// The list of values that are TValueType[] + /// The list of keys that are TKey + /// The list of values that are TValue[] /// The specified columns to apply - public ValueMappingTransform(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), + public ValueMappingTransformer(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransformer)), ConvertToDataView(env, keys, values), KeyColumnName, ValueColumnName, columns) { } - private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyValue) + private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyValue) => DataViewHelper.CreateDataView(env, keys, values, - ValueMappingTransform.KeyColumnName, - ValueMappingTransform.ValueColumnName, + ValueMappingTransformer.KeyColumnName, + ValueMappingTransformer.ValueColumnName, treatValuesAsKeyValue); // Handler for vector value types - private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values) - => DataViewHelper.CreateDataView(env, keys, values, ValueMappingTransform.KeyColumnName, ValueMappingTransform.ValueColumnName); + private static IDataView ConvertToDataView(IHostEnvironment env, IEnumerable keys, IEnumerable values) + => DataViewHelper.CreateDataView(env, keys, values, ValueMappingTransformer.KeyColumnName, ValueMappingTransformer.ValueColumnName); } - public class ValueMappingTransform : OneToOneTransformerBase + public class ValueMappingTransformer : OneToOneTransformerBase { internal const string Summary = "Maps text values columns to new columns using a map dataset."; - internal const string LoaderSignature = "ValueMappingTransform"; + internal const string LoaderSignature = "ValueMappingTransformer"; internal const string UserName = "Value Mapping Transform"; internal const string ShortName = "ValueMap"; @@ -296,8 +319,10 @@ public class ValueMappingTransform : OneToOneTransformerBase protected static string ValueColumnName = "Value"; private ValueMap _valueMap; private Schema.Metadata _valueMetadata; + private byte[] _dataView; public ColumnType ValueColumnType => _valueMap.ValueType; + public Schema.Metadata ValueColumnMetadata => _valueMetadata; private static VersionInfo GetVersionInfo() { @@ -307,7 +332,7 @@ private static VersionInfo GetVersionInfo() verReadableCur: 0x00010001, verWeCanReadBack: 0x00010001, loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(ValueMappingTransform).Assembly.FullName); + loaderAssemblyName: typeof(ValueMappingTransformer).Assembly.FullName); } private static VersionInfo GetTermLookupVersionInfo() @@ -319,7 +344,7 @@ private static VersionInfo GetTermLookupVersionInfo() verReadableCur: 0x00010002, verWeCanReadBack: 0x00010002, loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(ValueMappingTransform).Assembly.FullName); + loaderAssemblyName: typeof(ValueMappingTransformer).Assembly.FullName); } public sealed class Column : OneToOneColumn @@ -361,15 +386,18 @@ public sealed class Arguments public bool ValuesAsKeyType = true; } - protected ValueMappingTransform(IHostEnvironment env, IDataView lookupMap, + protected ValueMappingTransformer(IHostEnvironment env, IDataView lookupMap, string keyColumn, string valueColumn, (string Input, string Output)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransform)), columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransformer)), columns) { env.CheckNonEmpty(keyColumn, nameof(keyColumn), "A key column must be specified when passing in an IDataView for the value mapping"); env.CheckNonEmpty(valueColumn, nameof(valueColumn), "A value column must be specified when passing in an IDataView for the value mapping"); _valueMap = CreateValueMapFromDataView(lookupMap, keyColumn, valueColumn); env.Assert(lookupMap.Schema.TryGetColumnIndex(valueColumn, out int valueColumnIdx)); _valueMetadata = CopyMetadata(lookupMap.Schema[valueColumnIdx].Metadata); + + // Create the byte array of the original IDataView, this is used for saving out the data. + _dataView = GetBytesFromDataView(Host, lookupMap, keyColumn, valueColumn); } private Schema.Metadata CopyMetadata(Schema.Metadata metadata) @@ -396,16 +424,17 @@ private static TextLoader.Column GenerateValueColumn(IHostEnvironment env, IDataView loader, string valueColumnName, int keyIdx, - int valueIdx) + int valueIdx, + string fileName) { // Scan the source to determine the min max of the column - ulong keyMin = ulong.MinValue; + ulong keyMin = ulong.MaxValue; ulong keyMax = ulong.MinValue; // scan the input to create convert the values as key types using (var cursor = loader.GetRowCursor(c => true)) { - using(var ch = env.Start("Processing key values")) + using(var ch = env.Start($"Processing key values from file {fileName}")) { var getKey = cursor.GetGetter>(keyIdx); var getValue = cursor.GetGetter>(valueIdx); @@ -439,10 +468,21 @@ private static TextLoader.Column GenerateValueColumn(IHostEnvironment env, else { if (countNonKeys < 5) - ch.Warning("Key '{0}' in mapping file is mapped to non key value '{1}'", key, value); + ch.Warning($"Key '{key}' in mapping file is mapped to non key value '{value}'"); countNonKeys++; } } + + if (countNonKeys > 0) + ch.Warning($"Found {countNonKeys} non key values in the file '{fileName}'"); + if (keyMin > keyMax) + { + keyMin = 0; + keyMax = uint.MaxValue - 1; + ch.Warning($"Did not find any valid key values in the file '{fileName}'"); + } + else + ch.Info($"Found key values in the range {keyMin} to {keyMax} in the file '{fileName}'"); } } @@ -464,7 +504,7 @@ private static TextLoader.Column GenerateValueColumn(IHostEnvironment env, return valueColumn; } - private static ValueMappingTransform CreateTransformInvoke(IHostEnvironment env, + private static ValueMappingTransformer CreateTransformInvoke(IHostEnvironment env, IDataView idv, string keyColumnName, string valueColumnName, @@ -473,8 +513,8 @@ private static ValueMappingTransform CreateTransformInvoke { // Read in the data // scan the input to create convert the values as key types - List keys = new List(); - List values = new List(); + List keys = new List(); + List values = new List(); idv.Schema.TryGetColumnIndex(keyColumnName, out int keyIdx); idv.Schema.TryGetColumnIndex(valueColumnName, out int valueIdx); @@ -482,10 +522,10 @@ private static ValueMappingTransform CreateTransformInvoke { using(var ch = env.Start("Processing key values")) { - TKeyType key = default; - TValueType value = default; - var getKey = cursor.GetGetter(keyIdx); - var getValue = cursor.GetGetter(valueIdx); + TKey key = default; + TValue value = default; + var getKey = cursor.GetGetter(keyIdx); + var getValue = cursor.GetGetter(valueIdx); while(cursor.MoveNext()) { try @@ -514,7 +554,7 @@ private static ValueMappingTransform CreateTransformInvoke } } - return new ValueMappingTransform(env, keys, values, treatValuesAsKeyTypes, columns); + return new ValueMappingTransformer(env, keys, values, treatValuesAsKeyTypes, columns); } private static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) @@ -550,40 +590,38 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData // Set the keyColumnName and valueColumnName to the default values. keyColumnName = KeyColumnName; valueColumnName = ValueColumnName; + TextLoader.Column keyColumn = default; + TextLoader.Column valueColumn = default; - TextLoader.Column keyColumn = new TextLoader.Column(keyColumnName, DataKind.TXT, 0); - TextLoader.Column valueColumn = new TextLoader.Column(valueColumnName, DataKind.TXT, 1); - /* + // Default to a text loader. KeyType and ValueType are assumed to be string + // types unless ValueAsKeyType is specified. if (args.ValuesAsKeyType) { - valueColumn = new TextLoader.Column(valueColumnName, DataKind.U8, 1); - }*/ + keyColumn = new TextLoader.Column(keyColumnName, DataKind.TXT, 0); + valueColumn = new TextLoader.Column(valueColumnName, DataKind.TXT, 1); + var txtArgs = new TextLoader.Arguments() + { + Column=new TextLoader.Column[] + { + keyColumn, + valueColumn + } + }; - var txtArgs = new TextLoader.Arguments() - { - Column=new TextLoader.Column[] + try { - keyColumn, - valueColumn + var textLoader = TextLoader.ReadFile(env, txtArgs, fileSource); + valueColumn = GenerateValueColumn(env, textLoader, valueColumnName, 0, 1, args.DataFile); } - }; - - //loader = TextLoader.ReadFile(env, txtArgs, fileSource); - var textLoader = TextLoader.ReadFile(env, txtArgs, fileSource); - //env.Assert(textLoader.Schema.TryGetColumnIndex(keyColumnName, out int keyColumnIndex)); - //env.Assert(textLoader.Schema.TryGetColumnIndex(valueColumnName, out int valueColumnIndex)); - - // Default to a text loader. KeyType and ValueType are assumed to be string - // types unless ValueAsKeyType is specified. - //TextLoader.Column keyColumn = new TextLoader.Column(keyColumnName, DataKind.TXT, keyColumnIndex); - //TextLoader.Column valueColumn = new TextLoader.Column(valueColumnName, DataKind.TXT, valueColumnIndex); - if (args.ValuesAsKeyType) + catch(Exception ex) + { + throw env.Except(ex, "Failed to parse the lookup file '{args.DataFile}' in ValueMappingTransformerer"); + } + } + else { - valueColumn = GenerateValueColumn(env, textLoader, valueColumnName, 0, 1); - // Change ValueColumn to be of type U4 - //valueColumn = new TextLoader.Column(valueColumnName, DataKind.U4, valueColumnIndex); - //GenerateKeyRangeAndMinFromValues(env, textLoader, keyColumnIndex, valueColumnIndex, out ulong min, out ulong max); - //valueColumn.KeyRange = new KeyRange(min, max); + keyColumn = new TextLoader.Column(keyColumnName, DataKind.TXT, 0); + valueColumn = new TextLoader.Column(valueColumnName, DataKind.R4, 1); } loader = TextLoader.Create( @@ -604,36 +642,9 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData env.Assert(loader.Schema.TryGetColumnIndex(keyColumnName, out int keyColumnIndex)); env.Assert(loader.Schema.TryGetColumnIndex(valueColumnName, out int valueColumnIndex)); - ValueMappingTransform transformer = null; + ValueMappingTransformer transformer = null; (string Source, string Name)[] columns = args.Column.Select(x => (x.Source, x.Name)).ToArray(); - /* - Func del = CreateTransformInvoke; - var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(loader.Schema[keyColumnIndex].Type.RawType, - loader.Schema[valueColumnIndex].Type.RawType); - transformer = (ValueMappingTransform)meth.Invoke(null, new object[] { env, - loader, - keyColumnName, - valueColumnName, - args.ValuesAsKeyType, - columns - }); - /* - if (args.ValuesAsKeyType) - { - Func del = CreateTransformInvoke; - var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(loader.Schema[keyColumnIndex].Type.RawType, - loader.Schema[valueColumnIndex].Type.RawType); - transformer = (ValueMappingTransform)meth.Invoke(null, new object[] { env, - loader, - keyColumnName, - valueColumnName, - args.ValuesAsKeyType, - columns - }); - } - else - */ - transformer = new ValueMappingTransform(env, loader, keyColumnName, valueColumnName, columns); + transformer = new ValueMappingTransformer(env, loader, keyColumnName, valueColumnName, columns); return transformer.MakeDataTransform(input); } @@ -654,7 +665,7 @@ private static bool CheckModelVersion(ModelLoadContext ctx, VersionInfo versionI } } - protected static ValueMappingTransform Create(IHostEnvironment env, ModelLoadContext ctx) + protected static ValueMappingTransformer Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); @@ -686,7 +697,7 @@ protected static ValueMappingTransform Create(IHostEnvironment env, ModelLoadCon var binaryLoader = GetLoader(env, rgb); var keyColumnName = (termLookupModel) ? "Term" : KeyColumnName; - return new ValueMappingTransform(env, binaryLoader, keyColumnName, ValueColumnName, columns); + return new ValueMappingTransformer(env, binaryLoader, keyColumnName, ValueColumnName, columns); } private static byte[] ReadAllBytes(IExceptionContext ectx, BinaryReader rdr) @@ -735,9 +746,8 @@ public override void Save(ModelSaveContext ctx) ctx.SetVersionInfo(GetVersionInfo()); SaveColumns(ctx); - // convert value map to a dataview and serialize as bytes - var bytes = GetBytesFromDataView(Host, _valueMap.GetDataView(Host), KeyColumnName, ValueColumnName); - ctx.SaveBinaryStream(DefaultMapName, w => w.Write(bytes)); + // Save out the byte stream of the IDataView of the data source + ctx.SaveBinaryStream(DefaultMapName, w => w.Write(_dataView)); } /// @@ -761,11 +771,11 @@ public static ValueMap Create(ColumnType keyType, ColumnType valueType, Schema.M return (ValueMap)meth.Invoke(null, new object[] { keyType, valueType, valueMetadata }); } - private static ValueMap CreateValueMapInvoke(ColumnType keyType, + private static ValueMap CreateValueMapInvoke(ColumnType keyType, ColumnType valueType, Schema.Metadata valueMetadata) { - return new ValueMap(keyType, valueType, valueMetadata); + return new ValueMap(keyType, valueType, valueMetadata); } public abstract void Train(IHostEnvironment env, RowCursor cursor); @@ -776,19 +786,19 @@ private static ValueMap CreateValueMapInvoke(ColumnType ke } /// - /// Implementation mapping class that maps a key of TKeyType to a specified value of TValueType. + /// Implementation mapping class that maps a key of TKey to a specified value of TValue. /// - private class ValueMap : ValueMap + private class ValueMap : ValueMap { - private Dictionary _mapping; - private TValueType _missingValue; + private Dictionary _mapping; + private TValue _missingValue; private Schema.Metadata _valueMetadata; - private Dictionary CreateDictionary() + private Dictionary CreateDictionary() { - if (typeof(TKeyType) == typeof(ReadOnlyMemory)) - return new Dictionary, TValueType>(new ReadOnlyMemoryUtils.ReadonlyMemoryCharComparer()) as Dictionary; - return new Dictionary(); + if (typeof(TKey) == typeof(ReadOnlyMemory)) + return new Dictionary, TValue>(new ReadOnlyMemoryUtils.ReadonlyMemoryCharComparer()) as Dictionary; + return new Dictionary(); } public ValueMap(ColumnType keyType, ColumnType valueType, Schema.Metadata valueMetadata) @@ -798,11 +808,14 @@ public ValueMap(ColumnType keyType, ColumnType valueType, Schema.Metadata valueM _valueMetadata = valueMetadata; } + /// + /// Generates the mapping based on the IDataView + /// public override void Train(IHostEnvironment env, RowCursor cursor) { // Validate that the conversion is supported for non-vector types bool identity; - ValueMapper, TValueType> conv; + ValueMapper, TValue> conv; // For keys that are not in the mapping, the missingValue will be returned. _missingValue = default; @@ -812,24 +825,24 @@ public override void Train(IHostEnvironment env, RowCursor cursor) // First check if there is a String->ValueType conversion method. If so, call the conversion method with an // empty string, the returned value will be the new missing value. // NOTE this will return NA for R4 and R8 types. - if (Microsoft.ML.Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion, TValueType>( + if (Microsoft.ML.Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion, TValue>( TextType.Instance, ValueType, out conv, out identity)) { - TValueType value = default; + TValue value = default; conv(string.Empty.AsMemory(), ref value); _missingValue = value; } } - var keyGetter = cursor.GetGetter(0); - var valueGetter = cursor.GetGetter(1); + var keyGetter = cursor.GetGetter(0); + var valueGetter = cursor.GetGetter(1); while(cursor.MoveNext()) { - TKeyType key = default; - TValueType value = default; + TKey key = default; + TValue value = default; keyGetter(ref key); valueGetter(ref value); if (_mapping.ContainsKey(key)) @@ -842,10 +855,10 @@ public override void Train(IHostEnvironment env, RowCursor cursor) public override Delegate GetGetter(Row input, int index) { - var src = default(TKeyType); - ValueGetter getSrc = input.GetGetter(index); - ValueGetter retVal = - (ref TValueType dst) => + var src = default(TKey); + ValueGetter getSrc = input.GetGetter(index); + ValueGetter retVal = + (ref TValue dst) => { getSrc(ref src); if (_mapping.ContainsKey(src)) @@ -865,27 +878,31 @@ public override IDataView GetDataView(IHostEnvironment env) => DataViewHelper.CreateDataView(env, _mapping.Keys, _mapping.Values, - ValueMappingTransform.KeyColumnName, - ValueMappingTransform.ValueColumnName, + ValueMappingTransformer.KeyColumnName, + ValueMappingTransformer.ValueColumnName, ValueType.IsKey); - private static TValueType GetVector(TValueType value) + private static TValue GetVector(TValue value) { if (value is VBuffer valueRef) { VBuffer dest = default; valueRef.CopyTo(ref dest); - if (dest is TValueType destRef) + if (dest is TValue destRef) return destRef; } return default; } - private static TValueType GetValue(TValueType value) + private static TValue GetValue(TValue value) => value; } + /// + /// Retrieves the byte array given a dataview and columns + /// + private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string keyColumn, string valueColumn) { Contracts.AssertValue(host); @@ -896,9 +913,9 @@ private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string var schema = lookup.Schema; if (!schema.TryGetColumnIndex(keyColumn, out int colKey)) - throw host.ExceptUserArg(nameof(Arguments.KeyColumn), "column not found: '{0}'", keyColumn); + throw host.ExceptUserArg(nameof(Arguments.KeyColumn), $"Key column not found: '{keyColumn}'"); if (!schema.TryGetColumnIndex(valueColumn, out int colValue)) - throw host.ExceptUserArg(nameof(Arguments.ValueColumn), "column not found: '{0}'", valueColumn); + throw host.ExceptUserArg(nameof(Arguments.ValueColumn), $"Value column not found: '{valueColumn}'"); var cols = new List<(string Source, string Name)>() { @@ -937,9 +954,9 @@ private sealed class Mapper : OneToOneMapperBase private readonly ValueMap _valueMap; private readonly Schema.Metadata _valueMetadata; private readonly (string Source, string Name)[] _columns; - private readonly ValueMappingTransform _parent; + private readonly ValueMappingTransformer _parent; - internal Mapper(ValueMappingTransform transform, + internal Mapper(ValueMappingTransformer transform, Schema inputSchema, ValueMap valueMap, Schema.Metadata valueMetadata, @@ -964,13 +981,11 @@ protected override Delegate MakeGetter(Row input, int iinfo, Func act protected override Schema.DetachedColumn[] GetOutputColumnsCore() { - var md = new MetadataBuilder(); - var result = new Schema.DetachedColumn[_columns.Length]; for (int i = 0; i < _columns.Length; i++) { var srcCol = _inputSchema[_columns[i].Source]; - result[i] = new Schema.DetachedColumn(_columns[i].Name, _valueMap.ValueType, md.GetMetadata()); + result[i] = new Schema.DetachedColumn(_columns[i].Name, _valueMap.ValueType, _valueMetadata); } return result; } diff --git a/src/Microsoft.ML.Transforms/TermLookupTransformer.cs b/src/Microsoft.ML.Transforms/TermLookupTransformer.cs deleted file mode 100644 index 31f252cc4c..0000000000 --- a/src/Microsoft.ML.Transforms/TermLookupTransformer.cs +++ /dev/null @@ -1,708 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.CommandLine; -using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.Data.IO; -using Microsoft.ML.Runtime.Internal.Utilities; -using Microsoft.ML.Runtime.Model; -using Microsoft.ML.Transforms.Categorical; -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Reflection; -using System.Text; - -[assembly: LoadableClass(TermLookupTransformer.Summary, typeof(TermLookupTransformer), typeof(TermLookupTransformer.Arguments), typeof(SignatureDataTransform), - "Term Lookup Transform", "TermLookup", "Lookup", "LookupTransform", "TermLookupTransform")] - -//[assembly: LoadableClass(TermLookupTransformer.Summary, typeof(TermLookupTransformer), null, typeof(SignatureLoadDataTransform), - //"Term Lookup Transform", TermLookupTransformer.LoaderSignature)] - -namespace Microsoft.ML.Transforms.Categorical -{ - using Conditional = System.Diagnostics.ConditionalAttribute; - - /// - /// This transform maps text values columns to new columns using a map dataset provided through its arguments. - /// - public sealed class TermLookupTransformer : OneToOneTransformBase - { - public sealed class Column : OneToOneColumn - { - public static Column Parse(string str) - { - var res = new Column(); - if (res.TryParse(str)) - return res; - return null; - } - - public bool TryUnparse(StringBuilder sb) - { - Contracts.AssertValue(sb); - return TryUnparseCore(sb); - } - } - - public sealed class Arguments - { - [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] - public Column[] Column; - - [Argument(ArgumentType.AtMostOnce, IsInputFileName = true, HelpText = "The data file containing the terms", ShortName = "data", SortOrder = 2)] - public string DataFile; - - [Argument(ArgumentType.Multiple, HelpText = "The data loader", NullName = "", SignatureType = typeof(SignatureDataLoader))] - public IComponentFactory Loader; - - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the text column containing the terms", ShortName = "term")] - public string TermColumn; - - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the column containing the values", ShortName = "value")] - public string ValueColumn; - - [Argument(ArgumentType.AtMostOnce, - HelpText = "If term and value columns are unspecified, specifies whether the values are key values or numeric.", ShortName = "key")] - public bool KeyValues = true; - } - - /// - /// Holds the values that the terms map to. - /// - private abstract class ValueMap - { - public readonly ColumnType Type; - - protected ValueMap(ColumnType type) - { - Type = type; - } - - public static ValueMap Create(ColumnType type) - { - Contracts.AssertValue(type); - - if (!type.IsVector) - { - Func> del = CreatePrimitive; - var meth = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(type.RawType); - return (ValueMap)meth.Invoke(null, new object[] { type }); - } - else - { - Func> del = CreateVector; - var meth = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(type.ItemType.RawType); - return (ValueMap)meth.Invoke(null, new object[] { type }); - } - } - - public static OneValueMap CreatePrimitive(PrimitiveType type) - { - Contracts.AssertValue(type); - Contracts.Assert(type.RawType == typeof(TVal)); - return new OneValueMap(type); - } - - public static VecValueMap CreateVector(VectorType type) - { - Contracts.AssertValue(type); - Contracts.Assert(type.ItemType.RawType == typeof(TVal)); - return new VecValueMap(type); - } - - public abstract void Train(IExceptionContext ectx, RowCursor cursor, int colTerm, int colValue); - - public abstract Delegate GetGetter(ValueGetter> getSrc); - } - - /// - /// Holds the values that the terms map to - where the destination type is TRes. - /// - private abstract class ValueMap : ValueMap - { - private NormStr.Pool _terms; - private TRes[] _values; - - protected ValueMap(ColumnType type) - : base(type) - { - Contracts.Assert(type.RawType == typeof(TRes)); - } - - /// - /// Bind this value map to the given cursor for "training". - /// - public override void Train(IExceptionContext ectx, RowCursor cursor, int colTerm, int colValue) - { - Contracts.AssertValue(ectx); - ectx.Assert(_terms == null); - ectx.Assert(_values == null); - ectx.AssertValue(cursor); - ectx.Assert(0 <= colTerm && colTerm < cursor.Schema.ColumnCount); - ectx.Assert(cursor.Schema.GetColumnType(colTerm).IsText); - ectx.Assert(0 <= colValue && colValue < cursor.Schema.ColumnCount); - ectx.Assert(cursor.Schema.GetColumnType(colValue).Equals(Type)); - - var getTerm = cursor.GetGetter>(colTerm); - var getValue = cursor.GetGetter(colValue); - var terms = new NormStr.Pool(); - var values = new List(); - - ReadOnlyMemory term = default; - while (cursor.MoveNext()) - { - getTerm(ref term); - // REVIEW: Should we trim? - term = ReadOnlyMemoryUtils.TrimSpaces(term); - var nstr = terms.Add(term); - if (nstr.Id != values.Count) - throw ectx.Except("Duplicate term in lookup data: '{0}'", nstr); - - TRes res = default(TRes); - getValue(ref res); - values.Add(res); - ectx.Assert(terms.Count == values.Count); - } - - _terms = terms; - _values = values.ToArray(); - ectx.Assert(_terms.Count == _values.Length); - } - - /// - /// Given the term getter, produce a value getter from this value map. - /// - public override Delegate GetGetter(ValueGetter> getTerm) - { - Contracts.Assert(_terms != null); - Contracts.Assert(_values != null); - Contracts.Assert(_terms.Count == _values.Length); - - return GetGetterCore(getTerm); - } - - private ValueGetter GetGetterCore(ValueGetter> getTerm) - { - var src = default(ReadOnlyMemory); - return - (ref TRes dst) => - { - getTerm(ref src); - src = ReadOnlyMemoryUtils.TrimSpaces(src); - var nstr = _terms.Get(src); - if (nstr == null) - GetMissing(ref dst); - else - { - Contracts.Assert(0 <= nstr.Id && nstr.Id < _values.Length); - CopyValue(in _values[nstr.Id], ref dst); - } - }; - } - - protected abstract void GetMissing(ref TRes dst); - - protected abstract void CopyValue(in TRes src, ref TRes dst); - } - - /// - /// Holds the values that the terms map to when the destination type is a PrimitiveType (non-vector). - /// - private sealed class OneValueMap : ValueMap - { - private readonly TRes _badValue; - - public OneValueMap(PrimitiveType type) - : base(type) - { - // REVIEW: This uses the fact that standard conversions map NA to NA to get the NA for TRes. - // We should probably have a mapping from type to its bad value somewhere, perhaps in Conversions. - bool identity; - ValueMapper, TRes> conv; - if (Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion, TRes>(TextType.Instance, type, - out conv, out identity)) - { - //Empty string will map to NA for R4 and R8, the only two types that can - //handle missing values. - var bad = String.Empty.AsMemory(); - conv(in bad, ref _badValue); - } - } - - protected override void GetMissing(ref TRes dst) - { - dst = _badValue; - } - - protected override void CopyValue(in TRes src, ref TRes dst) - { - dst = src; - } - } - - /// - /// Holds the values that the terms map to when the destination type is a VectorType. - /// TItem is the represtation type for the vector's ItemType. - /// - private sealed class VecValueMap : ValueMap> - { - public VecValueMap(VectorType type) - : base(type) - { - } - - protected override void GetMissing(ref VBuffer dst) - { - VBufferUtils.Resize(ref dst, Type.VectorSize, 0); - } - - protected override void CopyValue(in VBuffer src, ref VBuffer dst) - { - src.CopyTo(ref dst); - } - } - - public const string LoaderSignature = "TermLookupTransform"; - - internal const string Summary = "Maps text values columns to new columns using a map dataset."; -/* - private static VersionInfo GetVersionInfo() - { - return new VersionInfo( - modelSignature: "TXTLOOKT", - // verWrittenCur: 0x00010001, // Initial. - verWrittenCur: 0x00010002, // Dropped sizeof(Float). - verReadableCur: 0x00010002, - verWeCanReadBack: 0x00010002, - loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(TermLookupTransformer).Assembly.FullName); - } -*/ - // This is the byte array containing the binary .idv file contents for the lookup data. - // This is persisted; the _termMap and _valueMap are constructed from it. - private readonly byte[] _bytes; - - // The BinaryLoader over the byte array above. We keep this - // active simply for metadata requests. - private readonly BinaryLoader _ldr; - - // The value map. - private readonly ValueMap _valueMap; - - // Stream names for the binary idv streams. - private const string DefaultMapName = "DefaultMap.idv"; - - private const string RegistrationName = "TextLookup"; - - /// - /// Public constructor corresponding to SignatureDataTransform. - /// - public TermLookupTransformer(IHostEnvironment env, Arguments args, IDataView input) - : base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column, - input, TestIsText) - { - Host.AssertNonEmpty(Infos); - Host.Assert(Infos.Length == Utils.Size(args.Column)); - - Host.CheckUserArg(!string.IsNullOrWhiteSpace(args.DataFile), nameof(args.DataFile), "must specify dataFile"); - Host.CheckUserArg(string.IsNullOrEmpty(args.TermColumn) == string.IsNullOrEmpty(args.ValueColumn), nameof(args.TermColumn), - "Either both term and value column should be specified, or neither."); - - using (var ch = Host.Start("Training")) - { - _bytes = GetBytes(Host, Infos, args); - _ldr = GetLoader(Host, _bytes); - _valueMap = Train(ch, _ldr); - SetMetadata(); - } - } - - public TermLookupTransformer(IHostEnvironment env, IDataView input, IDataView lookup, string sourceTerm, string sourceValue, string targetTerm, string targetValue) - : base(env, RegistrationName, new[] { new Column { Name = sourceValue, Source = sourceTerm } }, input, TestIsText) - { - Host.AssertNonEmpty(Infos); - Host.CheckValue(input, nameof(input)); - Host.CheckValue(lookup, nameof(lookup)); - Host.Assert(Infos.Length == 1); - Host.CheckNonEmpty(targetTerm, nameof(targetTerm), "Term column must be specified when passing in a data view as lookup table."); - Host.CheckNonEmpty(targetValue, nameof(targetValue), "Value column must be specified when passing in a data view as lookup table."); - - using (var ch = Host.Start("Training")) - { - _bytes = GetBytesFromDataView(Host, lookup, targetTerm, targetValue); - _ldr = GetLoader(Host, _bytes); - _valueMap = Train(ch, _ldr); - SetMetadata(); - } - } - - // This method is called if only a datafile is specified, without a loader/term and value columns. - // It determines the type of the Value column and returns the appropriate TextLoader component factory. - private static IComponentFactory GetLoaderFactory(string filename, bool keyValues, IHost host) - { - Contracts.AssertValue(host); - - // If the user specified non-key values, we define the value column to be numeric. - if (!keyValues) - return ComponentFactoryUtils.CreateFromFunction( - (env, files) => new TextLoader( - env, new[] - { - new TextLoader.Column("Term", DataKind.TX, 0), - new TextLoader.Column("Value", DataKind.Num, 1) - }, dataSample: files).Read(files) as IDataLoader); - - // If the user specified key values, we scan the values to determine the range of the key type. - ulong min = ulong.MaxValue; - ulong max = ulong.MinValue; - try - { - var file = new MultiFileSource(filename); - var data = new TextLoader(host, new[] - { - new TextLoader.Column("Term", DataKind.TX, 0), - new TextLoader.Column("Value", DataKind.TX, 1) - }, - dataSample: file - ).Read(file); - - using (var cursor = data.GetRowCursor(c => true)) - { - var getTerm = cursor.GetGetter>(0); - var getVal = cursor.GetGetter>(1); - ReadOnlyMemory txt = default; - - using (var ch = host.Start("Creating Text Lookup Loader")) - { - long countNonKeys = 0; - while (cursor.MoveNext()) - { - getVal(ref txt); - ulong res; - // Try to parse the text as a key value between 1 and ulong.MaxValue. If this succeeds and res>0, - // we update max and min accordingly. If res==0 it means the value is missing, in which case we ignore it for - // computing max and min. - if (Runtime.Data.Conversion.Conversions.Instance.TryParseKey(in txt, 1, ulong.MaxValue, out res)) - { - if (res < min && res != 0) - min = res; - if (res > max) - max = res; - } - // If parsing as key did not succeed, the value can still be 0, so we try parsing it as a ulong. If it succeeds, - // then the value is 0, and we update min accordingly. - else if (Runtime.Data.Conversion.Conversions.Instance.TryParse(in txt, out res)) - { - ch.Assert(res == 0); - min = 0; - } - //If parsing as a ulong fails, we increment the counter for the non-key values. - else - { - var term = default(ReadOnlyMemory); - getTerm(ref term); - if (countNonKeys < 5) - ch.Warning("Term '{0}' in mapping file is mapped to non key value '{1}'", term, txt); - countNonKeys++; - } - } - if (countNonKeys > 0) - ch.Warning("Found {0} non key values in the file '{1}'", countNonKeys, filename); - if (min > max) - { - min = 0; - max = uint.MaxValue - 1; - ch.Warning("did not find any valid key values in the file '{0}'", filename); - } - else - ch.Info("Found key values in the range {0} to {1} in the file '{2}'", min, max, filename); - } - } - } - catch (Exception e) - { - throw host.Except(e, "Failed to parse the lookup file '{0}' in TermLookupTransform", filename); - } - - TextLoader.Column valueColumn = new TextLoader.Column("Value", DataKind.U4, 1); - if (max - min < (ulong)int.MaxValue) - { - valueColumn.KeyRange = new KeyRange(min, max); - } - else if (max - min < (ulong)uint.MaxValue) - { - valueColumn.KeyRange = new KeyRange(min); - } - else - { - valueColumn.Type = DataKind.U8; - valueColumn.KeyRange = new KeyRange(min); - } - - return ComponentFactoryUtils.CreateFromFunction( - (env, files) => new TextLoader( - env, - columns: new[] - { - new TextLoader.Column("Term", DataKind.TX, 0), - valueColumn - }, - dataSample: files).Read(files) as IDataLoader); - } - - // This saves the lookup data as a byte array encoded as a binary .idv file. - private static byte[] GetBytes(IHost host, ColInfo[] infos, Arguments args) - { - Contracts.AssertValue(host); - host.AssertNonEmpty(infos); - host.AssertValue(args); - - string dataFile = args.DataFile; - IComponentFactory loaderFactory = args.Loader; - string termColumn; - string valueColumn; - if (!string.IsNullOrEmpty(args.TermColumn)) - { - host.Assert(!string.IsNullOrEmpty(args.ValueColumn)); - termColumn = args.TermColumn; - valueColumn = args.ValueColumn; - } - else - { - var ext = Path.GetExtension(dataFile); - if (loaderFactory != null || string.Equals(ext, ".idv", StringComparison.OrdinalIgnoreCase)) - throw host.ExceptUserArg(nameof(args.TermColumn), "Term and value columns needed."); - loaderFactory = GetLoaderFactory(args.DataFile, args.KeyValues, host); - termColumn = "Term"; - valueColumn = "Value"; - } - return GetBytesOne(host, dataFile, loaderFactory, termColumn, valueColumn); - } - - private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string termColumn, string valueColumn) - { - Contracts.AssertValue(host); - host.AssertValue(lookup); - host.AssertNonEmpty(termColumn); - host.AssertNonEmpty(valueColumn); - - int colTerm; - int colValue; - var schema = lookup.Schema; - - if (!schema.TryGetColumnIndex(termColumn, out colTerm)) - throw host.ExceptUserArg(nameof(Arguments.TermColumn), "column not found: '{0}'", termColumn); - if (!schema.TryGetColumnIndex(valueColumn, out colValue)) - throw host.ExceptUserArg(nameof(Arguments.ValueColumn), "column not found: '{0}'", valueColumn); - - // REVIEW: Should we allow term to be a vector of text (each term in the vector - // would map to the same value)? - var typeTerm = schema.GetColumnType(colTerm); - host.CheckUserArg(typeTerm.IsText, nameof(Arguments.TermColumn), "term column must contain text"); - var typeValue = schema.GetColumnType(colValue); - var cols = new List<(string Source, string Name)>() - { - (termColumn, "Term"), - (valueColumn, "Value") - }; - - var view = new ColumnCopyingTransformer(host, cols.ToArray()).Transform(lookup); - view = ColumnSelectingTransformer.CreateKeep(host, view, cols.Select(x=>x.Name).ToArray()); - - var saver = new BinarySaver(host, new BinarySaver.Arguments()); - using (var strm = new MemoryStream()) - { - saver.SaveData(strm, view, 0, 1); - return strm.ToArray(); - } - } - - private static byte[] GetBytesOne(IHost host, string dataFile, IComponentFactory loaderFactory, - string termColumn, string valueColumn) - { - Contracts.AssertValue(host); - host.Assert(!string.IsNullOrWhiteSpace(dataFile)); - host.AssertNonEmpty(termColumn); - host.AssertNonEmpty(valueColumn); - - IMultiStreamSource fileSource = new MultiFileSource(dataFile); - IDataLoader loader; - if (loaderFactory == null) - { - // REVIEW: Should there be defaults for loading from text? - var ext = Path.GetExtension(dataFile); - bool isBinary = string.Equals(ext, ".idv", StringComparison.OrdinalIgnoreCase); - bool isTranspose = string.Equals(ext, ".tdv", StringComparison.OrdinalIgnoreCase); - if (!isBinary && !isTranspose) - throw host.ExceptUserArg(nameof(Arguments.Loader), "must specify the loader"); - host.Assert(isBinary != isTranspose); // One or the other must be true. - if (isBinary) - { - loader = new BinaryLoader(host, new BinaryLoader.Arguments(), fileSource); - } - else - { - loader = new TransposeLoader(host, new TransposeLoader.Arguments(), fileSource); - } - } - else - { - loader = loaderFactory.CreateComponent(host, fileSource); - } - - return GetBytesFromDataView(host, loader, termColumn, valueColumn); - } - - private static BinaryLoader GetLoader(IHostEnvironment env, byte[] bytes) - { - env.AssertValue(env); - env.AssertValue(bytes); - - var strm = new MemoryStream(bytes, writable: false); - return new BinaryLoader(env, new BinaryLoader.Arguments(), strm); - } - - private static ValueMap Train(IExceptionContext ectx, BinaryLoader ldr) - { - Contracts.AssertValue(ectx); - ectx.AssertValue(ldr); - ectx.Assert(ldr.Schema.ColumnCount == 2); - - // REVIEW: Should we allow term to be a vector of text (each term in the vector - // would map to the same value)? - ectx.Assert(ldr.Schema.GetColumnType(0).IsText); - - var schema = ldr.Schema; - var typeValue = schema.GetColumnType(1); - - // REVIEW: We should know the number of rows - use that info to set initial capacity. - var values = ValueMap.Create(typeValue); - using (var cursor = ldr.GetRowCursor(c => true)) - values.Train(ectx, cursor, 0, 1); - return values; - } - - private TermLookupTransformer(IChannel ch, ModelLoadContext ctx, IHost host, IDataView input) - : base(host, ctx, input, TestIsText) - { - Host.AssertValue(ch); - - // *** Binary format *** - // - ch.AssertNonEmpty(Infos); - - // Extra streams: - // DefaultMap.idv - byte[] rgb = null; - Action fn = r => rgb = ReadAllBytes(ch, r); - - if (!ctx.TryLoadBinaryStream(DefaultMapName, fn)) - throw ch.ExceptDecode(); - _bytes = rgb; - - // Process the bytes into the loader and map. - _ldr = GetLoader(Host, _bytes); - ValidateLoader(ch, _ldr); - _valueMap = Train(ch, _ldr); - SetMetadata(); - } - - private static byte[] ReadAllBytes(IExceptionContext ectx, BinaryReader rdr) - { - Contracts.AssertValue(ectx); - ectx.AssertValue(rdr); - ectx.Assert(rdr.BaseStream.CanSeek); - - long size = rdr.BaseStream.Length; - ectx.CheckDecode(size <= int.MaxValue); - - var rgb = new byte[(int)size]; - int cb = rdr.Read(rgb, 0, rgb.Length); - ectx.CheckDecode(cb == rgb.Length); - - return rgb; - } - /* - public static TermLookupTransformer Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input) - { - Contracts.CheckValue(env, nameof(env)); - var h = env.Register(RegistrationName); - h.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(GetVersionInfo()); - h.CheckValue(input, nameof(input)); - return h.Apply("Loading Model", ch => new TermLookupTransformer(ch, ctx, h, input)); - } - */ - - public override void Save(ModelSaveContext ctx) - { - /* - Host.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(); - ctx.SetVersionInfo(GetVersionInfo()); - - // *** Binary format *** - // - SaveBase(ctx); - - // Extra streams: - // DefaultMap.idv - Host.Assert(_ldr != null); - Host.AssertValue(_bytes); - DebugValidateLoader(_ldr); - ctx.SaveBinaryStream(DefaultMapName, w => w.Write(_bytes)); - */ - } - - [Conditional("DEBUG")] - private static void DebugValidateLoader(BinaryLoader ldr) - { - Contracts.Assert(ldr != null); - Contracts.Assert(ldr.Schema.ColumnCount == 2); - Contracts.Assert(ldr.Schema.GetColumnType(0).IsText); - } - - private static void ValidateLoader(IExceptionContext ectx, BinaryLoader ldr) - { - if (ldr == null) - return; - ectx.CheckDecode(ldr.Schema.ColumnCount == 2); - ectx.CheckDecode(ldr.Schema.GetColumnType(0).IsText); - } - - protected override ColumnType GetColumnTypeCore(int iinfo) - { - Contracts.Assert(0 <= iinfo & iinfo < Infos.Length); - return _valueMap.Type; - } - - private void SetMetadata() - { - // Metadata is passed through from the Value column of the map data view. - var md = Metadata; - for (int iinfo = 0; iinfo < Infos.Length; iinfo++) - { - using (var bldr = md.BuildMetadata(iinfo, _ldr.Schema, 1)) - { - // No additional metadata. - } - } - md.Seal(); - } - - protected override Delegate GetGetterCore(IChannel ch, Row input, int iinfo, out Action disposer) - { - Host.AssertValueOrNull(ch); - Host.AssertValue(input); - Host.Assert(0 <= iinfo && iinfo < Infos.Length); - disposer = null; - - var getSrc = GetSrcGetter>(input, iinfo); - return _valueMap.GetGetter(getSrc); - } - } -} diff --git a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers-Schema.txt b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers-Schema.txt index 6402981be7..f45353c79a 100644 --- a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers-Schema.txt +++ b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers-Schema.txt @@ -34,7 +34,7 @@ StringLabel: Key Metadata 'KeyValues': Vec: Length=7, Count=7 [0] 'Wirtschaft', [1] 'Gesundheit', [2] 'Deutschland', [3] 'Ausland', [4] 'Unterhaltung', [5] 'Sport', [6] 'Technik & Wissen' ----- TermLookupTransformer ---- +---- RowToRowMapperTransform ---- 6 columns: RawLabel: Text Names: Vec diff --git a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers1-Schema.txt b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers1-Schema.txt index 1e8446cc3e..17c269f1d8 100644 --- a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers1-Schema.txt +++ b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers1-Schema.txt @@ -7,7 +7,7 @@ Features: Vec Metadata 'SlotNames': Vec: Length=2, Count=2 [0] 'weg fuer milliardenhilfe frei', [1] 'vor dem parlamentsgebaeude toben strassenkaempfe zwischen demonstranten drinnen haben die griechischen abgeordneten das drastische sparpaket am abend endgueltig beschlossen die entscheidung ist eine wichtige voraussetzung fuer die auszahlung von weiteren acht milliarden euro hilfsgeldern athen das griechische parlament hat einem umfassenden sparpaket endgueltig zugestimmt' ----- TermLookupTransformer ---- +---- RowToRowMapperTransform ---- 4 columns: RawLabel: Text Names: Vec diff --git a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers2-Schema.txt b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers2-Schema.txt index f40e727ef0..2ad6cfab86 100644 --- a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers2-Schema.txt +++ b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers2-Schema.txt @@ -7,7 +7,7 @@ Features: Vec Metadata 'SlotNames': Vec: Length=2, Count=2 [0] 'weg fuer milliardenhilfe frei', [1] 'vor dem parlamentsgebaeude toben strassenkaempfe zwischen demonstranten drinnen haben die griechischen abgeordneten das drastische sparpaket am abend endgueltig beschlossen die entscheidung ist eine wichtige voraussetzung fuer die auszahlung von weiteren acht milliarden euro hilfsgeldern athen das griechische parlament hat einem umfassenden sparpaket endgueltig zugestimmt' ----- TermLookupTransformer ---- +---- RowToRowMapperTransform ---- 4 columns: RawLabel: Text Names: Vec diff --git a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers3-Schema.txt b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers3-Schema.txt index eb6fccd5db..64ac99b379 100644 --- a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers3-Schema.txt +++ b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers3-Schema.txt @@ -7,7 +7,7 @@ Features: Vec Metadata 'SlotNames': Vec: Length=2, Count=2 [0] 'weg fuer milliardenhilfe frei', [1] 'vor dem parlamentsgebaeude toben strassenkaempfe zwischen demonstranten drinnen haben die griechischen abgeordneten das drastische sparpaket am abend endgueltig beschlossen die entscheidung ist eine wichtige voraussetzung fuer die auszahlung von weiteren acht milliarden euro hilfsgeldern athen das griechische parlament hat einem umfassenden sparpaket endgueltig zugestimmt' ----- TermLookupTransformer ---- +---- RowToRowMapperTransform ---- 4 columns: RawLabel: Text Names: Vec diff --git a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers4-Schema.txt b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers4-Schema.txt index 750b267e78..f35b76301f 100644 --- a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers4-Schema.txt +++ b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers4-Schema.txt @@ -7,7 +7,7 @@ Features: Vec Metadata 'SlotNames': Vec: Length=2, Count=2 [0] 'weg fuer milliardenhilfe frei', [1] 'vor dem parlamentsgebaeude toben strassenkaempfe zwischen demonstranten drinnen haben die griechischen abgeordneten das drastische sparpaket am abend endgueltig beschlossen die entscheidung ist eine wichtige voraussetzung fuer die auszahlung von weiteren acht milliarden euro hilfsgeldern athen das griechische parlament hat einem umfassenden sparpaket endgueltig zugestimmt' ----- TermLookupTransformer ---- +---- RowToRowMapperTransform ---- 4 columns: RawLabel: Text Names: Vec @@ -17,7 +17,7 @@ Metadata 'SlotNames': Vec: Length=2, Count=2 [0] 'weg fuer milliardenhilfe frei', [1] 'vor dem parlamentsgebaeude toben strassenkaempfe zwischen demonstranten drinnen haben die griechischen abgeordneten das drastische sparpaket am abend endgueltig beschlossen die entscheidung ist eine wichtige voraussetzung fuer die auszahlung von weiteren acht milliarden euro hilfsgeldern athen das griechische parlament hat einem umfassenden sparpaket endgueltig zugestimmt' FileLabelNum: R4 ----- TermLookupTransformer ---- +---- RowToRowMapperTransform ---- 5 columns: RawLabel: Text Names: Vec diff --git a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers4-out.txt b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers4-out.txt index bf71ea5899..4d443c426b 100644 --- a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers4-out.txt +++ b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers4-out.txt @@ -1,15 +1,25 @@ Bad value at line 5 in column Value Processed 7 rows with 1 bad values and 0 format errors Bad value at line 5 in column Value +Processed 7 rows with 1 bad values and 0 format errors + Bad value at line 5 in column Value Processed 7 rows with 1 bad values and 0 format errors Wrote 7 rows across 2 columns in %Time% -Warning: Term 'Wirtschaft' in mapping file is mapped to non key value '3.14' -Warning: Term 'Gesundheit' in mapping file is mapped to non key value '0.1' -Warning: Term 'Deutschland' in mapping file is mapped to non key value '1.5' -Warning: Term 'Ausland' in mapping file is mapped to non key value '0.5' -Warning: Term 'Unterhaltung' in mapping file is mapped to non key value '1a' +Warning: Key 'Wirtschaft' in mapping file is mapped to non key value '3.14' +Warning: Key 'Gesundheit' in mapping file is mapped to non key value '0.1' +Warning: Key 'Deutschland' in mapping file is mapped to non key value '1.5' +Warning: Key 'Ausland' in mapping file is mapped to non key value '0.5' +Warning: Key 'Unterhaltung' in mapping file is mapped to non key value '1a' Warning: Found 7 non key values in the file '%Output% -Warning: did not find any valid key values in the file '%Output% +Warning: Did not find any valid key values in the file '%Output% + Bad value at line 1 in column Value + Bad value at line 2 in column Value + Bad value at line 3 in column Value + Bad value at line 4 in column Value + Bad value at line 5 in column Value + Bad value at line 6 in column Value + Bad value at line 7 in column Value +Processed 7 rows with 7 bad values and 0 format errors Bad value at line 1 in column Value Bad value at line 2 in column Value Bad value at line 3 in column Value @@ -27,6 +37,8 @@ Processed 7 rows with 7 bad values and 0 format errors Bad value at line 7 in column Value Processed 7 rows with 7 bad values and 0 format errors Wrote 7 rows across 2 columns in %Time% +Wrote 7 rows across 2 columns in %Time% +Wrote 7 rows across 2 columns in %Time% Wrote 119 rows of length 3 Wrote 119 rows across 3 columns in %Time% --- Progress log --- @@ -36,48 +48,66 @@ Wrote 119 rows across 3 columns in %Time% [2] 'BinarySaver' started. [2] (%Time%) 7 rows [2] 'BinarySaver' finished in %Time%. -[3] 'TextSaver: saving data' started. -[3] (%Time%) 119 rows -[3] 'TextSaver: saving data' finished in %Time%. -[4] 'BinarySaver #2' started. +[3] 'BinarySaver #2' started. +[3] (%Time%) 7 rows +[3] 'BinarySaver #2' finished in %Time%. +[4] 'TextSaver: saving data' started. [4] (%Time%) 119 rows -[4] 'BinarySaver #2' finished in %Time%. +[4] 'TextSaver: saving data' finished in %Time%. [5] 'BinarySaver #3' started. -[5] (%Time%) 7 rows +[5] (%Time%) 119 rows [5] 'BinarySaver #3' finished in %Time%. -[6] 'TextSaver: saving data #2' started. -[6] (%Time%) 119 rows -[6] 'TextSaver: saving data #2' finished in %Time%. -[7] 'BinarySaver #4' started. -[7] (%Time%) 119 rows -[7] 'BinarySaver #4' finished in %Time%. -[8] 'BinarySaver #5' started. -[8] (%Time%) 7 rows -[8] 'BinarySaver #5' finished in %Time%. -[9] 'TextSaver: saving data #3' started. +[6] 'BinarySaver #4' started. +[6] (%Time%) 7 rows +[6] 'BinarySaver #4' finished in %Time%. +[7] 'BinarySaver #5' started. +[7] (%Time%) 7 rows +[7] 'BinarySaver #5' finished in %Time%. +[8] 'TextSaver: saving data #2' started. +[8] (%Time%) 119 rows +[8] 'TextSaver: saving data #2' finished in %Time%. +[9] 'BinarySaver #6' started. [9] (%Time%) 119 rows -[9] 'TextSaver: saving data #3' finished in %Time%. -[10] 'BinarySaver #6' started. -[10] (%Time%) 119 rows -[10] 'BinarySaver #6' finished in %Time%. -[11] 'BinarySaver #7' started. +[9] 'BinarySaver #6' finished in %Time%. +[10] 'BinarySaver #7' started. +[10] (%Time%) 7 rows +[10] 'BinarySaver #7' finished in %Time%. +[11] 'BinarySaver #8' started. [11] (%Time%) 7 rows -[11] 'BinarySaver #7' finished in %Time%. -[12] 'TextSaver: saving data #4' started. +[11] 'BinarySaver #8' finished in %Time%. +[12] 'TextSaver: saving data #3' started. [12] (%Time%) 119 rows -[12] 'TextSaver: saving data #4' finished in %Time%. -[13] 'BinarySaver #8' started. +[12] 'TextSaver: saving data #3' finished in %Time%. +[13] 'BinarySaver #9' started. [13] (%Time%) 119 rows -[13] 'BinarySaver #8' finished in %Time%. -[14] 'BinarySaver #9' started. +[13] 'BinarySaver #9' finished in %Time%. +[14] 'BinarySaver #10' started. [14] (%Time%) 7 rows -[14] 'BinarySaver #9' finished in %Time%. -[15] 'BinarySaver #10' started. +[14] 'BinarySaver #10' finished in %Time%. +[15] 'BinarySaver #11' started. [15] (%Time%) 7 rows -[15] 'BinarySaver #10' finished in %Time%. -[16] 'TextSaver: saving data #5' started. +[15] 'BinarySaver #11' finished in %Time%. +[16] 'TextSaver: saving data #4' started. [16] (%Time%) 119 rows -[16] 'TextSaver: saving data #5' finished in %Time%. -[17] 'BinarySaver #11' started. +[16] 'TextSaver: saving data #4' finished in %Time%. +[17] 'BinarySaver #12' started. [17] (%Time%) 119 rows -[17] 'BinarySaver #11' finished in %Time%. +[17] 'BinarySaver #12' finished in %Time%. +[18] 'BinarySaver #13' started. +[18] (%Time%) 7 rows +[18] 'BinarySaver #13' finished in %Time%. +[19] 'BinarySaver #14' started. +[19] (%Time%) 7 rows +[19] 'BinarySaver #14' finished in %Time%. +[20] 'BinarySaver #15' started. +[20] (%Time%) 7 rows +[20] 'BinarySaver #15' finished in %Time%. +[21] 'BinarySaver #16' started. +[21] (%Time%) 7 rows +[21] 'BinarySaver #16' finished in %Time%. +[22] 'TextSaver: saving data #5' started. +[22] (%Time%) 119 rows +[22] 'TextSaver: saving data #5' finished in %Time%. +[23] 'BinarySaver #17' started. +[23] (%Time%) 119 rows +[23] 'BinarySaver #17' finished in %Time%. diff --git a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers5-Schema.txt b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers5-Schema.txt index 68614d1599..f46173577b 100644 --- a/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers5-Schema.txt +++ b/test/BaselineOutput/Common/SavePipe/SavePipeLabelParsers5-Schema.txt @@ -7,7 +7,7 @@ Features: Vec Metadata 'SlotNames': Vec: Length=2, Count=2 [0] 'weg fuer milliardenhilfe frei', [1] 'vor dem parlamentsgebaeude toben strassenkaempfe zwischen demonstranten drinnen haben die griechischen abgeordneten das drastische sparpaket am abend endgueltig beschlossen die entscheidung ist eine wichtige voraussetzung fuer die auszahlung von weiteren acht milliarden euro hilfsgeldern athen das griechische parlament hat einem umfassenden sparpaket endgueltig zugestimmt' ----- TermLookupTransformer ---- +---- RowToRowMapperTransform ---- 4 columns: RawLabel: Text Names: Vec diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index 54d1882799..a98bba9d92 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -142,6 +142,18 @@ public void ValueMappingMissingKey() Assert.Equal(1, fValue); } + [Fact] + void TestDuplicateKeys() + { + var data = new[] { new TestClass() { A = "barTest", B = "test", C = "foo" } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + + IEnumerable> keys = new List>() { "foo".AsMemory(), "foo".AsMemory() }; + IEnumerable values = new List() { 1, 2 }; + + Assert.Throws(() => new ValueMappingEstimator, int>(Env, keys, values, new[] { ("A", "D"), ("B", "E"), ("C", "F") })); + } + [Fact] public void ValueMappingOutputSchema() { @@ -199,7 +211,7 @@ public void ValueMappingWithValuesAsKeyTypesOutputSchema() [Fact] public void ValueMappingValuesAsUintKeyTypes() { - var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; + var data = new[] { new TestClass() { A = "bar", B = "test2", C = "wahoo" } }; var dataView = ComponentCreation.CreateDataView(Env, data); IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; @@ -222,19 +234,61 @@ public void ValueMappingValuesAsUintKeyTypes() uint dValue = 1; getterD(ref dValue); Assert.Equal(25, dValue); + + // Should be 0 as test2 is a missing key uint eValue = 0; getterE(ref eValue); - Assert.Equal(42, eValue); + Assert.Equal(0, eValue); + + // Testing the last key uint fValue = 0; getterF(ref fValue); - Assert.Equal(51, fValue); + Assert.Equal(61, fValue); } + [Fact] + public void ValueMappingValuesAsUlongKeyTypes() + { + var data = new[] { new TestClass() { A = "bar", B = "test2", C = "wahoo" } }; + var dataView = ComponentCreation.CreateDataView(Env, data); + + IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; + + // These are the expected key type values + IEnumerable values = new List() { 51, Int32.MaxValue, 42, 61 }; + + var estimator = new ValueMappingEstimator, ulong>(Env, keys, values, true, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); + + var t = estimator.Fit(dataView); + + var result = t.Transform(dataView); + var cursor = result.GetRowCursor((col) => true); + var getterD = cursor.GetGetter(3); + var getterE = cursor.GetGetter(4); + var getterF = cursor.GetGetter(5); + cursor.MoveNext(); + + // The expected values will contain the actual uints and are not generated. + ulong dValue = 1; + getterD(ref dValue); + Assert.Equal(Int32.MaxValue, dValue); + + // Should be 0 as test2 is a missing key + ulong eValue = 0; + getterE(ref eValue); + Assert.Equal(0, eValue); + + // Testing the last key + ulong fValue = 0; + getterF(ref fValue); + Assert.Equal(61, fValue); + } + [Fact] public void ValueMappingValuesAsStringKeyTypes() { - var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; + var data = new[] { new TestClass() { A = "bar", B = "test", C = "notfound" } }; var dataView = ComponentCreation.CreateDataView(Env, data); IEnumerable> keys = new List>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; @@ -252,16 +306,20 @@ public void ValueMappingValuesAsStringKeyTypes() var getterF = cursor.GetGetter(5); cursor.MoveNext(); - // The expected values will contain the generated key type values. + // The expected values will contain the generated key type values starting from 1. uint dValue = 1; getterD(ref dValue); Assert.Equal(2, dValue); + + // eValue will equal 1 since foo1 occurs first. uint eValue = 0; getterE(ref eValue); Assert.Equal(1, eValue); + + // fValue will be 0 since its missing uint fValue = 0; getterF(ref fValue); - Assert.Equal(1, fValue); + Assert.Equal(0, fValue); } [Fact] @@ -338,6 +396,7 @@ void TestSavingAndLoading() } } + [Fact] void TestValueMapBackCompatTermLookup() { @@ -370,12 +429,8 @@ void TestValueMapBackCompatTermLookupKeyTypeValue() Assert.True(result.Schema.TryGetColumnIndex("Label", out int labelIdx)); Assert.True(result.Schema.TryGetColumnIndex("GroupId", out int groupIdx)); - /* Assert.True(result.Schema[labelIdx].Type.IsKey); - var keyType = result.Schema[labelIdx].Type.AsKey; - Assert.Equal((ulong)0, keyType.Min); - Assert.Equal(5, keyType.KeyCount); - */ + Assert.Equal(5, result.Schema[labelIdx].Type.ItemType.KeyCount); var t = result.GetColumn(Env, "Label"); uint s = t.First(); From 671832288d7ea647372d304e73fb466937c29720 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Wed, 19 Dec 2018 14:48:23 -0800 Subject: [PATCH 12/16] - Renaming ValueMappingTransform to ValueMappingTransformer --- .../{ValueMappingTransform.cs => ValueMappingTransformer.cs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/Microsoft.ML.Data/Transforms/{ValueMappingTransform.cs => ValueMappingTransformer.cs} (100%) diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs similarity index 100% rename from src/Microsoft.ML.Data/Transforms/ValueMappingTransform.cs rename to src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs From 47b605d149cdebb2b3efbeaad25b04631b43e190 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Wed, 19 Dec 2018 16:47:47 -0800 Subject: [PATCH 13/16] - Updates after merging to master --- .../Transforms/ValueMappingTransformer.cs | 10 +- .../TermLookupTransformer.cs | 705 ------------------ .../Transformers/ValueMappingTests.cs | 2 +- 3 files changed, 6 insertions(+), 711 deletions(-) delete mode 100644 src/Microsoft.ML.Transforms/TermLookupTransformer.cs diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs index 9170e57ac4..c15f3cb2ff 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs @@ -412,8 +412,8 @@ private ValueMap CreateValueMapFromDataView(IDataView dataView, string keyColumn // Confirm that the key and value columns exist in the dataView Host.Check(dataView.Schema.TryGetColumnIndex(keyColumn, out int keyIdx), "Key column " + keyColumn + " does not exist in the given dataview"); Host.Check(dataView.Schema.TryGetColumnIndex(valueColumn, out int valueIdx), "Value column " + valueColumn + " does not exist in the given dataview"); - var keyType = dataView.Schema.GetColumnType(keyIdx); - var valueType = dataView.Schema.GetColumnType(valueIdx); + var keyType = dataView.Schema[keyIdx].Type; + var valueType = dataView.Schema[valueIdx].Type; var valueMap = ValueMap.Create(keyType, valueType, _valueMetadata); using (var cursor = dataView.GetRowCursor(c=> c == keyIdx || c == valueIdx)) valueMap.Train(Host, cursor); @@ -719,8 +719,8 @@ private static byte[] ReadAllBytes(IExceptionContext ectx, BinaryReader rdr) protected static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input) => Create(env, ctx).MakeDataTransform(input); - private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, ISchema inputSchema) - => Create(env, ctx).MakeRowMapper(Schema.Create(inputSchema)); + private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Schema inputSchema) + => Create(env, ctx).MakeRowMapper(inputSchema); protected static PrimitiveType GetPrimitiveType(Type rawType, out bool isVectorType) { @@ -945,7 +945,7 @@ private static BinaryLoader GetLoader(IHostEnvironment env, byte[] bytes) private protected override IRowMapper MakeRowMapper(Schema schema) { - return new Mapper(this, Schema.Create(schema), _valueMap, _valueMetadata, ColumnPairs); + return new Mapper(this, schema, _valueMap, _valueMetadata, ColumnPairs); } private sealed class Mapper : OneToOneMapperBase diff --git a/src/Microsoft.ML.Transforms/TermLookupTransformer.cs b/src/Microsoft.ML.Transforms/TermLookupTransformer.cs deleted file mode 100644 index 0b3b1a2c22..0000000000 --- a/src/Microsoft.ML.Transforms/TermLookupTransformer.cs +++ /dev/null @@ -1,705 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.CommandLine; -using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.Data.IO; -using Microsoft.ML.Runtime.Internal.Utilities; -using Microsoft.ML.Runtime.Model; -using Microsoft.ML.Transforms.Categorical; -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Reflection; -using System.Text; - -[assembly: LoadableClass(TermLookupTransformer.Summary, typeof(TermLookupTransformer), typeof(TermLookupTransformer.Arguments), typeof(SignatureDataTransform), - "Term Lookup Transform", "TermLookup", "Lookup", "LookupTransform", "TermLookupTransform")] - -[assembly: LoadableClass(TermLookupTransformer.Summary, typeof(TermLookupTransformer), null, typeof(SignatureLoadDataTransform), - "Term Lookup Transform", TermLookupTransformer.LoaderSignature)] - -namespace Microsoft.ML.Transforms.Categorical -{ - using Conditional = System.Diagnostics.ConditionalAttribute; - - /// - /// This transform maps text values columns to new columns using a map dataset provided through its arguments. - /// - public sealed class TermLookupTransformer : OneToOneTransformBase - { - public sealed class Column : OneToOneColumn - { - public static Column Parse(string str) - { - var res = new Column(); - if (res.TryParse(str)) - return res; - return null; - } - - public bool TryUnparse(StringBuilder sb) - { - Contracts.AssertValue(sb); - return TryUnparseCore(sb); - } - } - - public sealed class Arguments - { - [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] - public Column[] Column; - - [Argument(ArgumentType.AtMostOnce, IsInputFileName = true, HelpText = "The data file containing the terms", ShortName = "data", SortOrder = 2)] - public string DataFile; - - [Argument(ArgumentType.Multiple, HelpText = "The data loader", NullName = "", SignatureType = typeof(SignatureDataLoader))] - public IComponentFactory Loader; - - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the text column containing the terms", ShortName = "term")] - public string TermColumn; - - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the column containing the values", ShortName = "value")] - public string ValueColumn; - - [Argument(ArgumentType.AtMostOnce, - HelpText = "If term and value columns are unspecified, specifies whether the values are key values or numeric.", ShortName = "key")] - public bool KeyValues = true; - } - - /// - /// Holds the values that the terms map to. - /// - private abstract class ValueMap - { - public readonly ColumnType Type; - - protected ValueMap(ColumnType type) - { - Type = type; - } - - public static ValueMap Create(ColumnType type) - { - Contracts.AssertValue(type); - - if (!type.IsVector) - { - Func> del = CreatePrimitive; - var meth = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(type.RawType); - return (ValueMap)meth.Invoke(null, new object[] { type }); - } - else - { - Func> del = CreateVector; - var meth = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(type.ItemType.RawType); - return (ValueMap)meth.Invoke(null, new object[] { type }); - } - } - - public static OneValueMap CreatePrimitive(PrimitiveType type) - { - Contracts.AssertValue(type); - Contracts.Assert(type.RawType == typeof(TVal)); - return new OneValueMap(type); - } - - public static VecValueMap CreateVector(VectorType type) - { - Contracts.AssertValue(type); - Contracts.Assert(type.ItemType.RawType == typeof(TVal)); - return new VecValueMap(type); - } - - public abstract void Train(IExceptionContext ectx, RowCursor cursor, int colTerm, int colValue); - - public abstract Delegate GetGetter(ValueGetter> getSrc); - } - - /// - /// Holds the values that the terms map to - where the destination type is TRes. - /// - private abstract class ValueMap : ValueMap - { - private NormStr.Pool _terms; - private TRes[] _values; - - protected ValueMap(ColumnType type) - : base(type) - { - Contracts.Assert(type.RawType == typeof(TRes)); - } - - /// - /// Bind this value map to the given cursor for "training". - /// - public override void Train(IExceptionContext ectx, RowCursor cursor, int colTerm, int colValue) - { - Contracts.AssertValue(ectx); - ectx.Assert(_terms == null); - ectx.Assert(_values == null); - ectx.AssertValue(cursor); - ectx.Assert(0 <= colTerm && colTerm < cursor.Schema.Count); - ectx.Assert(cursor.Schema[colTerm].Type.IsText); - ectx.Assert(0 <= colValue && colValue < cursor.Schema.Count); - ectx.Assert(cursor.Schema[colValue].Type.Equals(Type)); - - var getTerm = cursor.GetGetter>(colTerm); - var getValue = cursor.GetGetter(colValue); - var terms = new NormStr.Pool(); - var values = new List(); - - ReadOnlyMemory term = default; - while (cursor.MoveNext()) - { - getTerm(ref term); - // REVIEW: Should we trim? - term = ReadOnlyMemoryUtils.TrimSpaces(term); - var nstr = terms.Add(term); - if (nstr.Id != values.Count) - throw ectx.Except("Duplicate term in lookup data: '{0}'", nstr); - - TRes res = default(TRes); - getValue(ref res); - values.Add(res); - ectx.Assert(terms.Count == values.Count); - } - - _terms = terms; - _values = values.ToArray(); - ectx.Assert(_terms.Count == _values.Length); - } - - /// - /// Given the term getter, produce a value getter from this value map. - /// - public override Delegate GetGetter(ValueGetter> getTerm) - { - Contracts.Assert(_terms != null); - Contracts.Assert(_values != null); - Contracts.Assert(_terms.Count == _values.Length); - - return GetGetterCore(getTerm); - } - - private ValueGetter GetGetterCore(ValueGetter> getTerm) - { - var src = default(ReadOnlyMemory); - return - (ref TRes dst) => - { - getTerm(ref src); - src = ReadOnlyMemoryUtils.TrimSpaces(src); - var nstr = _terms.Get(src); - if (nstr == null) - GetMissing(ref dst); - else - { - Contracts.Assert(0 <= nstr.Id && nstr.Id < _values.Length); - CopyValue(in _values[nstr.Id], ref dst); - } - }; - } - - protected abstract void GetMissing(ref TRes dst); - - protected abstract void CopyValue(in TRes src, ref TRes dst); - } - - /// - /// Holds the values that the terms map to when the destination type is a PrimitiveType (non-vector). - /// - private sealed class OneValueMap : ValueMap - { - private readonly TRes _badValue; - - public OneValueMap(PrimitiveType type) - : base(type) - { - // REVIEW: This uses the fact that standard conversions map NA to NA to get the NA for TRes. - // We should probably have a mapping from type to its bad value somewhere, perhaps in Conversions. - bool identity; - ValueMapper, TRes> conv; - if (Runtime.Data.Conversion.Conversions.Instance.TryGetStandardConversion, TRes>(TextType.Instance, type, - out conv, out identity)) - { - //Empty string will map to NA for R4 and R8, the only two types that can - //handle missing values. - var bad = String.Empty.AsMemory(); - conv(in bad, ref _badValue); - } - } - - protected override void GetMissing(ref TRes dst) - { - dst = _badValue; - } - - protected override void CopyValue(in TRes src, ref TRes dst) - { - dst = src; - } - } - - /// - /// Holds the values that the terms map to when the destination type is a VectorType. - /// TItem is the represtation type for the vector's ItemType. - /// - private sealed class VecValueMap : ValueMap> - { - public VecValueMap(VectorType type) - : base(type) - { - } - - protected override void GetMissing(ref VBuffer dst) - { - VBufferUtils.Resize(ref dst, Type.VectorSize, 0); - } - - protected override void CopyValue(in VBuffer src, ref VBuffer dst) - { - src.CopyTo(ref dst); - } - } - - public const string LoaderSignature = "TermLookupTransform"; - - internal const string Summary = "Maps text values columns to new columns using a map dataset."; - - private static VersionInfo GetVersionInfo() - { - return new VersionInfo( - modelSignature: "TXTLOOKT", - // verWrittenCur: 0x00010001, // Initial. - verWrittenCur: 0x00010002, // Dropped sizeof(Float). - verReadableCur: 0x00010002, - verWeCanReadBack: 0x00010002, - loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(TermLookupTransformer).Assembly.FullName); - } - - // This is the byte array containing the binary .idv file contents for the lookup data. - // This is persisted; the _termMap and _valueMap are constructed from it. - private readonly byte[] _bytes; - - // The BinaryLoader over the byte array above. We keep this - // active simply for metadata requests. - private readonly BinaryLoader _ldr; - - // The value map. - private readonly ValueMap _valueMap; - - // Stream names for the binary idv streams. - private const string DefaultMapName = "DefaultMap.idv"; - - private const string RegistrationName = "TextLookup"; - - /// - /// Public constructor corresponding to SignatureDataTransform. - /// - public TermLookupTransformer(IHostEnvironment env, Arguments args, IDataView input) - : base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column, - input, TestIsText) - { - Host.AssertNonEmpty(Infos); - Host.Assert(Infos.Length == Utils.Size(args.Column)); - - Host.CheckUserArg(!string.IsNullOrWhiteSpace(args.DataFile), nameof(args.DataFile), "must specify dataFile"); - Host.CheckUserArg(string.IsNullOrEmpty(args.TermColumn) == string.IsNullOrEmpty(args.ValueColumn), nameof(args.TermColumn), - "Either both term and value column should be specified, or neither."); - - using (var ch = Host.Start("Training")) - { - _bytes = GetBytes(Host, Infos, args); - _ldr = GetLoader(Host, _bytes); - _valueMap = Train(ch, _ldr); - SetMetadata(); - } - } - - public TermLookupTransformer(IHostEnvironment env, IDataView input, IDataView lookup, string sourceTerm, string sourceValue, string targetTerm, string targetValue) - : base(env, RegistrationName, new[] { new Column { Name = sourceValue, Source = sourceTerm } }, input, TestIsText) - { - Host.AssertNonEmpty(Infos); - Host.CheckValue(input, nameof(input)); - Host.CheckValue(lookup, nameof(lookup)); - Host.Assert(Infos.Length == 1); - Host.CheckNonEmpty(targetTerm, nameof(targetTerm), "Term column must be specified when passing in a data view as lookup table."); - Host.CheckNonEmpty(targetValue, nameof(targetValue), "Value column must be specified when passing in a data view as lookup table."); - - using (var ch = Host.Start("Training")) - { - _bytes = GetBytesFromDataView(Host, lookup, targetTerm, targetValue); - _ldr = GetLoader(Host, _bytes); - _valueMap = Train(ch, _ldr); - SetMetadata(); - } - } - - // This method is called if only a datafile is specified, without a loader/term and value columns. - // It determines the type of the Value column and returns the appropriate TextLoader component factory. - private static IComponentFactory GetLoaderFactory(string filename, bool keyValues, IHost host) - { - Contracts.AssertValue(host); - - // If the user specified non-key values, we define the value column to be numeric. - if (!keyValues) - return ComponentFactoryUtils.CreateFromFunction( - (env, files) => new TextLoader( - env, new[] - { - new TextLoader.Column("Term", DataKind.TX, 0), - new TextLoader.Column("Value", DataKind.Num, 1) - }, dataSample: files).Read(files) as IDataLoader); - - // If the user specified key values, we scan the values to determine the range of the key type. - ulong min = ulong.MaxValue; - ulong max = ulong.MinValue; - try - { - var file = new MultiFileSource(filename); - var data = new TextLoader(host, new[] - { - new TextLoader.Column("Term", DataKind.TX, 0), - new TextLoader.Column("Value", DataKind.TX, 1) - }, - dataSample: file - ).Read(file); - - using (var cursor = data.GetRowCursor(c => true)) - { - var getTerm = cursor.GetGetter>(0); - var getVal = cursor.GetGetter>(1); - ReadOnlyMemory txt = default; - - using (var ch = host.Start("Creating Text Lookup Loader")) - { - long countNonKeys = 0; - while (cursor.MoveNext()) - { - getVal(ref txt); - ulong res; - // Try to parse the text as a key value between 1 and ulong.MaxValue. If this succeeds and res>0, - // we update max and min accordingly. If res==0 it means the value is missing, in which case we ignore it for - // computing max and min. - if (Runtime.Data.Conversion.Conversions.Instance.TryParseKey(in txt, 1, ulong.MaxValue, out res)) - { - if (res < min && res != 0) - min = res; - if (res > max) - max = res; - } - // If parsing as key did not succeed, the value can still be 0, so we try parsing it as a ulong. If it succeeds, - // then the value is 0, and we update min accordingly. - else if (Runtime.Data.Conversion.Conversions.Instance.TryParse(in txt, out res)) - { - ch.Assert(res == 0); - min = 0; - } - //If parsing as a ulong fails, we increment the counter for the non-key values. - else - { - var term = default(ReadOnlyMemory); - getTerm(ref term); - if (countNonKeys < 5) - ch.Warning("Term '{0}' in mapping file is mapped to non key value '{1}'", term, txt); - countNonKeys++; - } - } - if (countNonKeys > 0) - ch.Warning("Found {0} non key values in the file '{1}'", countNonKeys, filename); - if (min > max) - { - min = 0; - max = uint.MaxValue - 1; - ch.Warning("did not find any valid key values in the file '{0}'", filename); - } - else - ch.Info("Found key values in the range {0} to {1} in the file '{2}'", min, max, filename); - } - } - } - catch (Exception e) - { - throw host.Except(e, "Failed to parse the lookup file '{0}' in TermLookupTransform", filename); - } - - TextLoader.Column valueColumn = new TextLoader.Column("Value", DataKind.U4, 1); - if (max - min < (ulong)int.MaxValue) - { - valueColumn.KeyRange = new KeyRange(min, max); - } - else if (max - min < (ulong)uint.MaxValue) - { - valueColumn.KeyRange = new KeyRange(min); - } - else - { - valueColumn.Type = DataKind.U8; - valueColumn.KeyRange = new KeyRange(min); - } - - return ComponentFactoryUtils.CreateFromFunction( - (env, files) => new TextLoader( - env, - columns: new[] - { - new TextLoader.Column("Term", DataKind.TX, 0), - valueColumn - }, - dataSample: files).Read(files) as IDataLoader); - } - - // This saves the lookup data as a byte array encoded as a binary .idv file. - private static byte[] GetBytes(IHost host, ColInfo[] infos, Arguments args) - { - Contracts.AssertValue(host); - host.AssertNonEmpty(infos); - host.AssertValue(args); - - string dataFile = args.DataFile; - IComponentFactory loaderFactory = args.Loader; - string termColumn; - string valueColumn; - if (!string.IsNullOrEmpty(args.TermColumn)) - { - host.Assert(!string.IsNullOrEmpty(args.ValueColumn)); - termColumn = args.TermColumn; - valueColumn = args.ValueColumn; - } - else - { - var ext = Path.GetExtension(dataFile); - if (loaderFactory != null || string.Equals(ext, ".idv", StringComparison.OrdinalIgnoreCase)) - throw host.ExceptUserArg(nameof(args.TermColumn), "Term and value columns needed."); - loaderFactory = GetLoaderFactory(args.DataFile, args.KeyValues, host); - termColumn = "Term"; - valueColumn = "Value"; - } - return GetBytesOne(host, dataFile, loaderFactory, termColumn, valueColumn); - } - - private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string termColumn, string valueColumn) - { - Contracts.AssertValue(host); - host.AssertValue(lookup); - host.AssertNonEmpty(termColumn); - host.AssertNonEmpty(valueColumn); - - int colTerm; - int colValue; - var schema = lookup.Schema; - - if (!schema.TryGetColumnIndex(termColumn, out colTerm)) - throw host.ExceptUserArg(nameof(Arguments.TermColumn), "column not found: '{0}'", termColumn); - if (!schema.TryGetColumnIndex(valueColumn, out colValue)) - throw host.ExceptUserArg(nameof(Arguments.ValueColumn), "column not found: '{0}'", valueColumn); - - // REVIEW: Should we allow term to be a vector of text (each term in the vector - // would map to the same value)? - var typeTerm = schema[colTerm].Type; - host.CheckUserArg(typeTerm.IsText, nameof(Arguments.TermColumn), "term column must contain text"); - var typeValue = schema[colValue].Type; - var cols = new List<(string Source, string Name)>() - { - (termColumn, "Term"), - (valueColumn, "Value") - }; - - var view = new ColumnCopyingTransformer(host, cols.ToArray()).Transform(lookup); - view = ColumnSelectingTransformer.CreateKeep(host, view, cols.Select(x=>x.Name).ToArray()); - - var saver = new BinarySaver(host, new BinarySaver.Arguments()); - using (var strm = new MemoryStream()) - { - saver.SaveData(strm, view, 0, 1); - return strm.ToArray(); - } - } - - private static byte[] GetBytesOne(IHost host, string dataFile, IComponentFactory loaderFactory, - string termColumn, string valueColumn) - { - Contracts.AssertValue(host); - host.Assert(!string.IsNullOrWhiteSpace(dataFile)); - host.AssertNonEmpty(termColumn); - host.AssertNonEmpty(valueColumn); - - IMultiStreamSource fileSource = new MultiFileSource(dataFile); - IDataLoader loader; - if (loaderFactory == null) - { - // REVIEW: Should there be defaults for loading from text? - var ext = Path.GetExtension(dataFile); - bool isBinary = string.Equals(ext, ".idv", StringComparison.OrdinalIgnoreCase); - bool isTranspose = string.Equals(ext, ".tdv", StringComparison.OrdinalIgnoreCase); - if (!isBinary && !isTranspose) - throw host.ExceptUserArg(nameof(Arguments.Loader), "must specify the loader"); - host.Assert(isBinary != isTranspose); // One or the other must be true. - if (isBinary) - { - loader = new BinaryLoader(host, new BinaryLoader.Arguments(), fileSource); - } - else - { - loader = new TransposeLoader(host, new TransposeLoader.Arguments(), fileSource); - } - } - else - { - loader = loaderFactory.CreateComponent(host, fileSource); - } - - return GetBytesFromDataView(host, loader, termColumn, valueColumn); - } - - private static BinaryLoader GetLoader(IHostEnvironment env, byte[] bytes) - { - env.AssertValue(env); - env.AssertValue(bytes); - - var strm = new MemoryStream(bytes, writable: false); - return new BinaryLoader(env, new BinaryLoader.Arguments(), strm); - } - - private static ValueMap Train(IExceptionContext ectx, BinaryLoader ldr) - { - Contracts.AssertValue(ectx); - ectx.AssertValue(ldr); - ectx.Assert(ldr.Schema.Count == 2); - - // REVIEW: Should we allow term to be a vector of text (each term in the vector - // would map to the same value)? - ectx.Assert(ldr.Schema[0].Type.IsText); - - var schema = ldr.Schema; - var typeValue = schema[1].Type; - - // REVIEW: We should know the number of rows - use that info to set initial capacity. - var values = ValueMap.Create(typeValue); - using (var cursor = ldr.GetRowCursor(c => true)) - values.Train(ectx, cursor, 0, 1); - return values; - } - - private TermLookupTransformer(IChannel ch, ModelLoadContext ctx, IHost host, IDataView input) - : base(host, ctx, input, TestIsText) - { - Host.AssertValue(ch); - - // *** Binary format *** - // - ch.AssertNonEmpty(Infos); - - // Extra streams: - // DefaultMap.idv - byte[] rgb = null; - Action fn = r => rgb = ReadAllBytes(ch, r); - - if (!ctx.TryLoadBinaryStream(DefaultMapName, fn)) - throw ch.ExceptDecode(); - _bytes = rgb; - - // Process the bytes into the loader and map. - _ldr = GetLoader(Host, _bytes); - ValidateLoader(ch, _ldr); - _valueMap = Train(ch, _ldr); - SetMetadata(); - } - - private static byte[] ReadAllBytes(IExceptionContext ectx, BinaryReader rdr) - { - Contracts.AssertValue(ectx); - ectx.AssertValue(rdr); - ectx.Assert(rdr.BaseStream.CanSeek); - - long size = rdr.BaseStream.Length; - ectx.CheckDecode(size <= int.MaxValue); - - var rgb = new byte[(int)size]; - int cb = rdr.Read(rgb, 0, rgb.Length); - ectx.CheckDecode(cb == rgb.Length); - - return rgb; - } - - public static TermLookupTransformer Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input) - { - Contracts.CheckValue(env, nameof(env)); - var h = env.Register(RegistrationName); - h.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(GetVersionInfo()); - h.CheckValue(input, nameof(input)); - return h.Apply("Loading Model", ch => new TermLookupTransformer(ch, ctx, h, input)); - } - - public override void Save(ModelSaveContext ctx) - { - Host.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(); - ctx.SetVersionInfo(GetVersionInfo()); - - // *** Binary format *** - // - SaveBase(ctx); - - // Extra streams: - // DefaultMap.idv - Host.Assert(_ldr != null); - Host.AssertValue(_bytes); - DebugValidateLoader(_ldr); - ctx.SaveBinaryStream(DefaultMapName, w => w.Write(_bytes)); - } - - [Conditional("DEBUG")] - private static void DebugValidateLoader(BinaryLoader ldr) - { - Contracts.Assert(ldr != null); - Contracts.Assert(ldr.Schema.Count == 2); - Contracts.Assert(ldr.Schema[0].Type.IsText); - } - - private static void ValidateLoader(IExceptionContext ectx, BinaryLoader ldr) - { - if (ldr == null) - return; - ectx.CheckDecode(ldr.Schema.Count == 2); - ectx.CheckDecode(ldr.Schema[0].Type.IsText); - } - - protected override ColumnType GetColumnTypeCore(int iinfo) - { - Contracts.Assert(0 <= iinfo & iinfo < Infos.Length); - return _valueMap.Type; - } - - private void SetMetadata() - { - // Metadata is passed through from the Value column of the map data view. - var md = Metadata; - for (int iinfo = 0; iinfo < Infos.Length; iinfo++) - { - using (var bldr = md.BuildMetadata(iinfo, _ldr.Schema, 1)) - { - // No additional metadata. - } - } - md.Seal(); - } - - protected override Delegate GetGetterCore(IChannel ch, Row input, int iinfo, out Action disposer) - { - Host.AssertValueOrNull(ch); - Host.AssertValue(input); - Host.Assert(0 <= iinfo && iinfo < Infos.Length); - disposer = null; - - var getSrc = GetSrcGetter>(input, iinfo); - return _valueMap.GetGetter(getSrc); - } - } -} diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index a98bba9d92..a1eb5a1523 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -390,7 +390,7 @@ void TestSavingAndLoading() ms.Position = 0; var loadedTransformer = TransformerChain.LoadFrom(Env, ms); var result = loadedTransformer.Transform(dataView); - Assert.Equal(5, result.Schema.ColumnCount); + Assert.Equal(5, result.Schema.Count); Assert.True(result.Schema.TryGetColumnIndex("D", out int col)); Assert.True(result.Schema.TryGetColumnIndex("E", out col)); } From 658864455edff2ea057d017d09b8ba028fcd68c2 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Wed, 19 Dec 2018 17:27:24 -0800 Subject: [PATCH 14/16] - Fixing release build. --- src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs index c15f3cb2ff..93bb1f5cb3 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs @@ -393,7 +393,8 @@ protected ValueMappingTransformer(IHostEnvironment env, IDataView lookupMap, env.CheckNonEmpty(keyColumn, nameof(keyColumn), "A key column must be specified when passing in an IDataView for the value mapping"); env.CheckNonEmpty(valueColumn, nameof(valueColumn), "A value column must be specified when passing in an IDataView for the value mapping"); _valueMap = CreateValueMapFromDataView(lookupMap, keyColumn, valueColumn); - env.Assert(lookupMap.Schema.TryGetColumnIndex(valueColumn, out int valueColumnIdx)); + int valueColumnIdx = 0; + env.Assert(lookupMap.Schema.TryGetColumnIndex(valueColumn, out valueColumnIdx)); _valueMetadata = CopyMetadata(lookupMap.Schema[valueColumnIdx].Metadata); // Create the byte array of the original IDataView, this is used for saving out the data. From ce8c95bd24e13babcb1c18525a173a2740580f43 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Thu, 20 Dec 2018 16:35:20 -0800 Subject: [PATCH 15/16] - Updating based upon feedback." --- .../DataView/ArrayDataViewBuilder.cs | 14 ++++++++++++-- .../Transforms/ConversionsExtensionsCatalog.cs | 15 +++++++++++++++ .../Transforms/ExtensionsCatalog.cs | 2 -- .../Transforms/ValueMappingTransformer.cs | 2 +- .../DataPipe/TestDataPipe.cs | 14 +++++++------- 5 files changed, 35 insertions(+), 12 deletions(-) diff --git a/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs b/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs index f74aff277f..a6d9f754d9 100644 --- a/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs +++ b/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs @@ -75,10 +75,15 @@ public void AddColumn(string name, PrimitiveType type, params T[] values) /// Constructs a new key column from an array where values are copied to output simply /// by being assigned. /// + /// The name of the column. + /// The delegate that does a reverse lookup based upon the given key. This is for metadata creation + /// The minimum to use. + /// The count of unique keys specified in values + /// The values to add to the column. Note that since this is creating a column, the values will be offset by 1. public void AddColumn(string name, ValueGetter>> getKeyValues, ulong keyMin, int keyCount, params uint[] values) { _host.CheckValue(getKeyValues, nameof(getKeyValues)); - //_host.CheckParam(keyCount > 0, nameof(keyCount)); + _host.CheckParam(keyCount > 0, nameof(keyCount)); CheckLength(name, values); _columns.Add(new AssignmentColumn(new KeyType(DataKind.U4, keyMin, keyCount), values)); _getKeyValues.Add(name, getKeyValues); @@ -89,10 +94,15 @@ public void AddColumn(string name, ValueGetter>> ge /// Constructs a new key column from an array where values are copied to output simply /// by being assigned. /// + /// The name of the column. + /// The delegate that does a reverse lookup based upon the given key. This is for metadata creation + /// The minimum to use. + /// The count of unique keys specified in values + /// The values to add to the column. Note that since this is creating a column, the values will be offset by 1. public void AddColumn(string name, ValueGetter>> getKeyValues, ulong keyMin, int keyCount, params ulong[] values) { _host.CheckValue(getKeyValues, nameof(getKeyValues)); - //_host.CheckParam(keyCount > 0, nameof(keyCount)); + _host.CheckParam(keyCount > 0, nameof(keyCount)); CheckLength(name, values); _columns.Add(new AssignmentColumn(new KeyType(DataKind.U8, keyMin, keyCount), values)); _getKeyValues.Add(name, getKeyValues); diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index 615211ef1d..ac26042956 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -129,8 +129,23 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, file, termsColumn, loaderFactory); } + /// + /// Extensions for the ValueMapping Estimator + /// + public static class ToMappedValueCatalog { + /// + /// Maps specified keys to specified values + /// + /// The key type. + /// The value type. + /// The categorical transform's catalog + /// The list of keys to use for the mapping. The mapping is 1-1 with values. This list must be the same length as values and + /// cannot contain duplicate keys. + /// The list of values to pair with the keys for the mapping. This list must be equal to the same length as keys. + /// The columns to apply this transform on. + /// public static ValueMappingEstimator ValueMap( this TransformsCatalog catalog, IEnumerable keys, diff --git a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs index 6479aacfcf..defe52ee65 100644 --- a/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs @@ -5,7 +5,6 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Transforms; -using System.Collections.Generic; namespace Microsoft.ML { @@ -62,5 +61,4 @@ public static ColumnSelectingEstimator SelectColumns(this TransformsCatalog cata => new ColumnSelectingEstimator(CatalogUtils.GetEnvironment(catalog), keepColumns, null, keepHidden, ColumnSelectingTransformer.Defaults.IgnoreMissing); } - } diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs index 93bb1f5cb3..5d1917bd07 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs @@ -19,7 +19,7 @@ [assembly: LoadableClass(ValueMappingTransformer.Summary, typeof(IDataTransform), typeof(ValueMappingTransformer), typeof(ValueMappingTransformer.Arguments), typeof(SignatureDataTransform), - ValueMappingTransformer.UserName, "ValueMapping", "ValueMappingTransformer", ValueMappingTransformer.ShortName, + ValueMappingTransformer.UserName, "ValueMapping", "ValueMappingTransformer", ValueMappingTransformer.ShortName, "TermLookup", DocName = "transform/ValueMappingTransformer.md")] [assembly: LoadableClass(ValueMappingTransformer.Summary, typeof(IDataTransform), typeof(ValueMappingTransformer), null, typeof(SignatureLoadDataTransform), diff --git a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs index 9480b8bc68..37a5cf6560 100644 --- a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs +++ b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs @@ -43,7 +43,7 @@ public void SavePipeLabelParsers() "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", "xf=AutoLabel{col=AutoLabel:RawLabel}", "xf=Term{col=StringLabel:RawLabel terms={Wirtschaft,Gesundheit,Deutschland,Ausland,Unterhaltung,Sport,Technik & Wissen}}", - string.Format("xf=ValueMap{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=TermLookup{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=AutoLabel keepcol=StringLabel keepcol=FileLabel hidden=-}" }); @@ -63,7 +63,7 @@ public void SavePipeLabelParsers() TestCore(pathData, true, new[] { "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", - string.Format("xf=ValueMap{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=TermLookup{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=FileLabel hidden=-}" }, suffix: "1"); @@ -83,7 +83,7 @@ public void SavePipeLabelParsers() TestCore(pathData, true, new[] { "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", - string.Format("xf=ValueMap{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=TermLookup{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=FileLabel hidden=-}" }, suffix: "2"); @@ -103,7 +103,7 @@ public void SavePipeLabelParsers() TestCore(pathData, true, new[] { "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", - string.Format("xf=ValueMap{{valuesAsKeyType=- col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=TermLookup{{valuesAsKeyType=- col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=FileLabel hidden=-}" }, suffix: "3"); @@ -128,8 +128,8 @@ public void SavePipeLabelParsers() TestCore(pathData, true, new[] { "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", - string.Format("xf=ValueMap{{valuesAsKeyType=- col=FileLabelNum:RawLabel data={{{0}}}}}", mappingPathData), - string.Format("xf=ValueMap{{col=FileLabelKey:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=TermLookup{{valuesAsKeyType=- col=FileLabelNum:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=TermLookup{{col=FileLabelKey:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=FileLabelNum keepcol=FileLabelKey hidden=-}" }, suffix: "4"); writer.WriteLine(ProgressLogLine); @@ -153,7 +153,7 @@ public void SavePipeLabelParsers() TestCore(pathData, true, new[] { "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", - string.Format("xf=ValueMap{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=TermLookup{{col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=FileLabel hidden=-}" }, suffix: "5"); From 88f759e49c03845d28986376c2cd981c39fcca9f Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Fri, 21 Dec 2018 08:55:15 -0800 Subject: [PATCH 16/16] - Updating from further feedback --- .../DataView/ArrayDataViewBuilder.cs | 24 +-- .../ConversionsExtensionsCatalog.cs | 9 +- .../Transforms/ValueMappingTransformer.cs | 170 ++++++++---------- .../DataPipe/TestDataPipe.cs | 4 +- .../Transformers/ValueMappingTests.cs | 9 +- 5 files changed, 85 insertions(+), 131 deletions(-) diff --git a/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs b/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs index 923d5bd90d..882b7e33cb 100644 --- a/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs +++ b/src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs @@ -80,31 +80,13 @@ public void AddColumn(string name, PrimitiveType type, params T[] values) /// The minimum to use. /// The count of unique keys specified in values /// The values to add to the column. Note that since this is creating a column, the values will be offset by 1. - public void AddColumn(string name, ValueGetter>> getKeyValues, ulong keyMin, int keyCount, params uint[] values) + public void AddColumn(string name, ValueGetter>> getKeyValues, ulong keyMin, int keyCount, params T1[] values) { _host.CheckValue(getKeyValues, nameof(getKeyValues)); _host.CheckParam(keyCount > 0, nameof(keyCount)); CheckLength(name, values); - _columns.Add(new AssignmentColumn(new KeyType(DataKind.U4, keyMin, keyCount), values)); - _getKeyValues.Add(name, getKeyValues); - _names.Add(name); - } - - /// - /// Constructs a new key column from an array where values are copied to output simply - /// by being assigned. - /// - /// The name of the column. - /// The delegate that does a reverse lookup based upon the given key. This is for metadata creation - /// The minimum to use. - /// The count of unique keys specified in values - /// The values to add to the column. Note that since this is creating a column, the values will be offset by 1. - public void AddColumn(string name, ValueGetter>> getKeyValues, ulong keyMin, int keyCount, params ulong[] values) - { - _host.CheckValue(getKeyValues, nameof(getKeyValues)); - _host.CheckParam(keyCount > 0, nameof(keyCount)); - CheckLength(name, values); - _columns.Add(new AssignmentColumn(new KeyType(DataKind.U8, keyMin, keyCount), values)); + values.GetType().GetElementType().TryGetDataKind(out DataKind kind); + _columns.Add(new AssignmentColumn(new KeyType(kind, keyMin, keyCount), values)); _getKeyValues.Add(name, getKeyValues); _names.Add(name); } diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index ac26042956..db6dcd3d14 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -127,14 +127,7 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co string termsColumn = null, IComponentFactory loaderFactory = null) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, file, termsColumn, loaderFactory); - } - /// - /// Extensions for the ValueMapping Estimator - /// - - public static class ToMappedValueCatalog - { /// /// Maps specified keys to specified values /// @@ -147,7 +140,7 @@ public static class ToMappedValueCatalog /// The columns to apply this transform on. /// public static ValueMappingEstimator ValueMap( - this TransformsCatalog catalog, + this TransformsCatalog.ConversionTransforms catalog, IEnumerable keys, IEnumerable values, params (string source, string name)[] columns) diff --git a/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs b/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs index 5d1917bd07..15a0ccccfc 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs @@ -10,7 +10,7 @@ using Microsoft.ML.Runtime.Data.IO; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; -using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Conversions; using System; using System.Collections.Generic; using System.IO; @@ -19,11 +19,11 @@ [assembly: LoadableClass(ValueMappingTransformer.Summary, typeof(IDataTransform), typeof(ValueMappingTransformer), typeof(ValueMappingTransformer.Arguments), typeof(SignatureDataTransform), - ValueMappingTransformer.UserName, "ValueMapping", "ValueMappingTransformer", ValueMappingTransformer.ShortName, "TermLookup", - DocName = "transform/ValueMappingTransformer.md")] + ValueMappingTransformer.UserName, "ValueMapping", "ValueMappingTransformer", ValueMappingTransformer.ShortName, + "TermLookup", "Lookup", "LookupTransform", DocName = "transform/ValueMappingTransformer.md")] [assembly: LoadableClass(ValueMappingTransformer.Summary, typeof(IDataTransform), typeof(ValueMappingTransformer), null, typeof(SignatureLoadDataTransform), - "Value Mapping Transform", ValueMappingTransformer.LoaderSignature)] + "Value Mapping Transform", ValueMappingTransformer.LoaderSignature, ValueMappingTransformer.TermLookupLoaderSignature)] [assembly: LoadableClass(ValueMappingTransformer.Summary, typeof(ValueMappingTransformer), null, typeof(SignatureLoadModel), "Value Mapping Transform", ValueMappingTransformer.LoaderSignature)] @@ -31,18 +31,15 @@ [assembly: LoadableClass(typeof(IRowMapper), typeof(ValueMappingTransformer), null, typeof(SignatureLoadRowMapper), ValueMappingTransformer.UserName, ValueMappingTransformer.LoaderSignature)] -[assembly: LoadableClass("", typeof(IDataTransform), typeof(ValueMappingTransformer), null, typeof(SignatureLoadDataTransform), - "", ValueMappingTransformer.TermLookupLoaderSignature)] - -namespace Microsoft.ML.Transforms +namespace Microsoft.ML.Transforms.Conversions { /// /// The ValueMappingEstimator is a 1-1 mapping from a key to value. The key type and value type are specified /// through TKey and TValue. Arrays are supported for vector types which can be used as either a key or a value /// or both. The mapping is specified, not trained by providiing a list of keys and a list of values. /// - /// Specifies the key type - /// Specifies the value type + /// Specifies the key type. + /// Specifies the value type. public sealed class ValueMappingEstimator : TrivialEstimator> { private (string input, string output)[] _columns; @@ -50,10 +47,10 @@ public sealed class ValueMappingEstimator : TrivialEstimator /// Constructs the ValueMappingEstimator, key type -> value type mapping /// - /// Instance of the host environment - /// The list of keys of TKey - /// The list of values of TValue - /// The list of columns to apply + /// The environment to use. + /// The list of keys of TKey. + /// The list of values of TValue. + /// The list of columns to apply. public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), new ValueMappingTransformer(env, keys, values, false, columns)) @@ -64,11 +61,11 @@ public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnum /// /// Constructs the ValueMappingEstimator, key type -> value type mapping /// - /// Instance of the host environment - /// The list of keys of TKey - /// The list of values of TValue - /// Specifies to treat the values as a - /// The list of columns to apply + /// The environment to use. + /// The list of keys of TKey. + /// The list of values of TValue. + /// Specifies to treat the values as a . + /// The list of columns to apply. public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyType, params (string input, string output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), new ValueMappingTransformer(env, keys, values, treatValuesAsKeyType, columns)) @@ -79,10 +76,10 @@ public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnum /// /// Constructs the ValueMappingEstimator, key type -> value array type mapping /// - /// Instance of the host environment - /// The list of keys of TKey - /// The list of values of TValue[] - /// The list of columns to apply + /// The environment to use. + /// The list of keys of TKey. + /// The list of values of TValue[]. + /// The list of columns to apply. public ValueMappingEstimator(IHostEnvironment env, IEnumerable keys, IEnumerable values, params (string input, string output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingEstimator)), new ValueMappingTransformer(env, keys, values, columns)) @@ -135,10 +132,8 @@ internal static PrimitiveType GetPrimitiveType(Type rawType, out bool isVectorTy isVectorType = true; } - if (!type.TryGetDataKind(out DataKind kind)) - { + if (!type.TryGetDataKind(out DataKind kind)) throw new InvalidOperationException($"Unsupported type {type} used in mapping."); - } return PrimitiveType.FromKind(kind); } @@ -198,7 +193,7 @@ internal static IDataView CreateDataView(IHostEnvironment env, // the column. HashSet valueSet = new HashSet(); HashSet keySet = new HashSet(); - for(int i = 0; i < values.Count(); ++i) + for (int i = 0; i < values.Count(); ++i) { var v = values.ElementAt(i); if (valueSet.Contains(v)) @@ -229,13 +224,13 @@ internal static IDataView CreateDataView(IHostEnvironment env, // be assigned the same index. The dictionary is used to maintain uniqueness, indices will contain // the full list of indices (equal to the same length of values). Dictionary keyTypeValueMapping = new Dictionary(); - uint[] indices = new uint[values.Count()]; + uint[] indices = new uint[values.Count()]; // Start the index at 1 uint index = 1; - for(int i = 0; i < values.Count(); ++i) + for (int i = 0; i < values.Count(); ++i) { TValue value = values.ElementAt(i); - if(!keyTypeValueMapping.ContainsKey(value)) + if (!keyTypeValueMapping.ContainsKey(value)) { keyTypeValueMapping.Add(value, index); index++; @@ -249,9 +244,7 @@ internal static IDataView CreateDataView(IHostEnvironment env, } } else - { dataViewBuilder.AddColumn(valueColumnName, valueType, values.ToArray()); - } return dataViewBuilder.GetDataView(); } @@ -267,26 +260,26 @@ internal static IDataView CreateDataView(IHostEnvironment env, public sealed class ValueMappingTransformer : ValueMappingTransformer { /// - /// Constructs a ValueMappingTransformer with a key type to value type + /// Constructs a ValueMappingTransformer with a key type to value type. /// - /// Instance of the host environment - /// The list of keys that are TKey - /// The list of values that are TValue - /// Specifies to treat the values as a + /// The environment to use. + /// The list of keys that are TKey. + /// The list of values that are TValue. + /// Specifies to treat the values as a . /// The specified columns to apply - public ValueMappingTransformer(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyTypes, (string Input, string Output)[] columns) + public ValueMappingTransformer(IHostEnvironment env, IEnumerable keys, IEnumerable values, bool treatValuesAsKeyTypes, (string input, string output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransformer)), ConvertToDataView(env, keys, values, treatValuesAsKeyTypes), KeyColumnName, ValueColumnName, columns) { } /// - /// Constructs a ValueMappingTransformer with a key type to value array type + /// Constructs a ValueMappingTransformer with a key type to value array type. /// - /// Instance of the host environment - /// The list of keys that are TKey - /// The list of values that are TValue[] - /// The specified columns to apply - public ValueMappingTransformer(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string Input, string Output)[] columns) + /// The environment to use. + /// The list of keys that are TKey. + /// The list of values that are TValue[]. + /// The specified columns to apply. + public ValueMappingTransformer(IHostEnvironment env, IEnumerable keys, IEnumerable values, (string input, string output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransformer)), ConvertToDataView(env, keys, values), KeyColumnName, ValueColumnName, columns) { } @@ -369,45 +362,39 @@ public sealed class Arguments [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", ShortName = "col", SortOrder = 1)] public Column[] Column; - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the column containing the keys", ShortName = "key")] + [Argument(ArgumentType.AtMostOnce, IsInputFileName = true, HelpText = "The data file containing the terms", ShortName = "data", SortOrder = 2)] + public string DataFile; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the column containing the keys", ShortName = "keyCol, term, TermColumn")] public string KeyColumn; - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the column containing the values", ShortName = "value")] + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the column containing the values", ShortName = "valueCol, value")] public string ValueColumn; [Argument(ArgumentType.Multiple, HelpText = "The data loader", NullName = "", SignatureType = typeof(SignatureDataLoader))] public IComponentFactory Loader; - [Argument(ArgumentType.AtMostOnce, IsInputFileName = true, HelpText = "The data file containing the terms", ShortName = "data", SortOrder = 2)] - public string DataFile; - [Argument(ArgumentType.AtMostOnce, - HelpText = "Specifies whether the values are key values or numeric, only valid when loader is not specified and the type of data is not an idv.")] + HelpText = "Specifies whether the values are key values or numeric, only valid when loader is not specified and the type of data is not an idv.", + ShortName = "key")] public bool ValuesAsKeyType = true; } protected ValueMappingTransformer(IHostEnvironment env, IDataView lookupMap, - string keyColumn, string valueColumn, (string Input, string Output)[] columns) + string keyColumn, string valueColumn, (string input, string output)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ValueMappingTransformer)), columns) { - env.CheckNonEmpty(keyColumn, nameof(keyColumn), "A key column must be specified when passing in an IDataView for the value mapping"); - env.CheckNonEmpty(valueColumn, nameof(valueColumn), "A value column must be specified when passing in an IDataView for the value mapping"); + Host.CheckNonEmpty(keyColumn, nameof(keyColumn), "A key column must be specified when passing in an IDataView for the value mapping"); + Host.CheckNonEmpty(valueColumn, nameof(valueColumn), "A value column must be specified when passing in an IDataView for the value mapping"); _valueMap = CreateValueMapFromDataView(lookupMap, keyColumn, valueColumn); int valueColumnIdx = 0; - env.Assert(lookupMap.Schema.TryGetColumnIndex(valueColumn, out valueColumnIdx)); - _valueMetadata = CopyMetadata(lookupMap.Schema[valueColumnIdx].Metadata); + Host.Assert(lookupMap.Schema.TryGetColumnIndex(valueColumn, out valueColumnIdx)); + _valueMetadata = lookupMap.Schema[valueColumnIdx].Metadata; // Create the byte array of the original IDataView, this is used for saving out the data. _dataView = GetBytesFromDataView(Host, lookupMap, keyColumn, valueColumn); } - private Schema.Metadata CopyMetadata(Schema.Metadata metadata) - { - var meta = new MetadataBuilder(); - meta.Add(metadata, x=> true); - return meta.GetMetadata(); - } - private ValueMap CreateValueMapFromDataView(IDataView dataView, string keyColumn, string valueColumn) { // Confirm that the key and value columns exist in the dataView @@ -416,7 +403,7 @@ private ValueMap CreateValueMapFromDataView(IDataView dataView, string keyColumn var keyType = dataView.Schema[keyIdx].Type; var valueType = dataView.Schema[valueIdx].Type; var valueMap = ValueMap.Create(keyType, valueType, _valueMetadata); - using (var cursor = dataView.GetRowCursor(c=> c == keyIdx || c == valueIdx)) + using (var cursor = dataView.GetRowCursor(c => c == keyIdx || c == valueIdx)) valueMap.Train(Host, cursor); return valueMap; } @@ -435,7 +422,7 @@ private static TextLoader.Column GenerateValueColumn(IHostEnvironment env, // scan the input to create convert the values as key types using (var cursor = loader.GetRowCursor(c => true)) { - using(var ch = env.Start($"Processing key values from file {fileName}")) + using (var ch = env.Start($"Processing key values from file {fileName}")) { var getKey = cursor.GetGetter>(keyIdx); var getValue = cursor.GetGetter>(valueIdx); @@ -443,7 +430,7 @@ private static TextLoader.Column GenerateValueColumn(IHostEnvironment env, ReadOnlyMemory key = default; ReadOnlyMemory value = default; - while(cursor.MoveNext()) + while (cursor.MoveNext()) { getKey(ref key); getValue(ref value); @@ -510,7 +497,7 @@ private static ValueMappingTransformer CreateTransformInvoke(IHost string keyColumnName, string valueColumnName, bool treatValuesAsKeyTypes, - (string Input, string Output)[] columns) + (string input, string output)[] columns) { // Read in the data // scan the input to create convert the values as key types @@ -521,19 +508,19 @@ private static ValueMappingTransformer CreateTransformInvoke(IHost idv.Schema.TryGetColumnIndex(valueColumnName, out int valueIdx); using (var cursor = idv.GetRowCursor(c => true)) { - using(var ch = env.Start("Processing key values")) + using (var ch = env.Start("Processing key values")) { TKey key = default; TValue value = default; var getKey = cursor.GetGetter(keyIdx); var getValue = cursor.GetGetter(valueIdx); - while(cursor.MoveNext()) + while (cursor.MoveNext()) { try { getKey(ref key); } - catch(InvalidOperationException) + catch (InvalidOperationException) { ch.Warning("Invalid key parsed, row will be skipped."); continue; @@ -543,7 +530,7 @@ private static ValueMappingTransformer CreateTransformInvoke(IHost { getValue(ref value); } - catch(InvalidOperationException) + catch (InvalidOperationException) { ch.Warning("Invalid value parsed for key {key}, row will be skipped."); continue; @@ -602,7 +589,7 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData valueColumn = new TextLoader.Column(valueColumnName, DataKind.TXT, 1); var txtArgs = new TextLoader.Arguments() { - Column=new TextLoader.Column[] + Column = new TextLoader.Column[] { keyColumn, valueColumn @@ -614,7 +601,7 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData var textLoader = TextLoader.ReadFile(env, txtArgs, fileSource); valueColumn = GenerateValueColumn(env, textLoader, valueColumnName, 0, 1, args.DataFile); } - catch(Exception ex) + catch (Exception ex) { throw env.Except(ex, "Failed to parse the lookup file '{args.DataFile}' in ValueMappingTransformerer"); } @@ -645,7 +632,7 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData ValueMappingTransformer transformer = null; (string Source, string Name)[] columns = args.Column.Select(x => (x.Source, x.Name)).ToArray(); - transformer = new ValueMappingTransformer(env, loader, keyColumnName, valueColumnName, columns); + transformer = new ValueMappingTransformer(env, loader, keyColumnName, valueColumnName, columns); return transformer.MakeDataTransform(input); } @@ -733,10 +720,8 @@ protected static PrimitiveType GetPrimitiveType(Type rawType, out bool isVectorT isVectorType = true; } - if (!type.TryGetDataKind(out DataKind kind)) - { - throw new InvalidOperationException($"Unsupported type {type} used in mapping."); - } + if (!type.TryGetDataKind(out DataKind kind)) + throw Contracts.Except($"Unsupported type {type} used in mapping."); return PrimitiveType.FromKind(kind); } @@ -767,17 +752,15 @@ public ValueMap(ColumnType keyType, ColumnType valueType) public static ValueMap Create(ColumnType keyType, ColumnType valueType, Schema.Metadata valueMetadata) { - Func del = CreateValueMapInvoke; - var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(keyType.RawType, valueType.RawType); - return (ValueMap)meth.Invoke(null, new object[] { keyType, valueType, valueMetadata }); + Func del = CreateValueMapInvoke; + var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(keyType.RawType, valueType.RawType); + return (ValueMap)meth.Invoke(null, new object[] { keyType, valueType, valueMetadata }); } private static ValueMap CreateValueMapInvoke(ColumnType keyType, ColumnType valueType, Schema.Metadata valueMetadata) - { - return new ValueMap(keyType, valueType, valueMetadata); - } + => new ValueMap(keyType, valueType, valueMetadata); public abstract void Train(IHostEnvironment env, RowCursor cursor); @@ -819,7 +802,7 @@ public override void Train(IHostEnvironment env, RowCursor cursor) ValueMapper, TValue> conv; // For keys that are not in the mapping, the missingValue will be returned. - _missingValue = default; + _missingValue = default; if (!ValueType.IsVector) { // For handling missing values, this follows how a missing value is handled when loading from a text source. @@ -840,16 +823,15 @@ public override void Train(IHostEnvironment env, RowCursor cursor) var keyGetter = cursor.GetGetter(0); var valueGetter = cursor.GetGetter(1); - while(cursor.MoveNext()) + while (cursor.MoveNext()) { TKey key = default; TValue value = default; keyGetter(ref key); valueGetter(ref value); if (_mapping.ContainsKey(key)) - { throw env.Except($"Duplicate keys in data '{key}'"); - } + _mapping.Add(key, value); } } @@ -870,7 +852,7 @@ public override Delegate GetGetter(Row input, int index) dst = Utils.MarshalInvoke(GetValue, ValueType.RawType, _mapping[src]); } else - dst = _missingValue; + dst = _missingValue; }; return retVal; } @@ -896,14 +878,12 @@ private static TValue GetVector(TValue value) return default; } - private static TValue GetValue(TValue value) - => value; + private static TValue GetValue(TValue value) => value; } /// /// Retrieves the byte array given a dataview and columns /// - private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string keyColumn, string valueColumn) { Contracts.AssertValue(host); @@ -913,9 +893,9 @@ private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string var schema = lookup.Schema; - if (!schema.TryGetColumnIndex(keyColumn, out int colKey)) + if (!schema.GetColumnOrNull(keyColumn).HasValue) throw host.ExceptUserArg(nameof(Arguments.KeyColumn), $"Key column not found: '{keyColumn}'"); - if (!schema.TryGetColumnIndex(valueColumn, out int colValue)) + if (!schema.GetColumnOrNull(valueColumn).HasValue) throw host.ExceptUserArg(nameof(Arguments.ValueColumn), $"Value column not found: '{valueColumn}'"); var cols = new List<(string Source, string Name)>() @@ -925,7 +905,7 @@ private static byte[] GetBytesFromDataView(IHost host, IDataView lookup, string }; var view = new ColumnCopyingTransformer(host, cols.ToArray()).Transform(lookup); - view = ColumnSelectingTransformer.CreateKeep(host, view, cols.Select(x=>x.Name).ToArray()); + view = ColumnSelectingTransformer.CreateKeep(host, view, cols.Select(x => x.Name).ToArray()); var saver = new BinarySaver(host, new BinarySaver.Arguments()); using (var strm = new MemoryStream()) @@ -946,7 +926,7 @@ private static BinaryLoader GetLoader(IHostEnvironment env, byte[] bytes) private protected override IRowMapper MakeRowMapper(Schema schema) { - return new Mapper(this, schema, _valueMap, _valueMetadata, ColumnPairs); + return new Mapper(this, schema, _valueMap, _valueMetadata, ColumnPairs); } private sealed class Mapper : OneToOneMapperBase diff --git a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs index 0ee5620c30..fe51159d25 100644 --- a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs +++ b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs @@ -103,7 +103,7 @@ public void SavePipeLabelParsers() TestCore(pathData, true, new[] { "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", - string.Format("xf=TermLookup{{valuesAsKeyType=- col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=TermLookup{{key=- col=FileLabel:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=FileLabel hidden=-}" }, suffix: "3"); @@ -128,7 +128,7 @@ public void SavePipeLabelParsers() TestCore(pathData, true, new[] { "loader=Text{col=RawLabel:TXT:0 col=Names:TXT:1-2 col=Features:TXT:3-4 header+}", - string.Format("xf=TermLookup{{valuesAsKeyType=- col=FileLabelNum:RawLabel data={{{0}}}}}", mappingPathData), + string.Format("xf=TermLookup{{key=- col=FileLabelNum:RawLabel data={{{0}}}}}", mappingPathData), string.Format("xf=TermLookup{{col=FileLabelKey:RawLabel data={{{0}}}}}", mappingPathData), "xf=SelectColumns{keepcol=RawLabel keepcol=FileLabelNum keepcol=FileLabelKey hidden=-}" }, suffix: "4"); diff --git a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs index a1eb5a1523..99fcfa0020 100644 --- a/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ValueMappingTests.cs @@ -1,4 +1,3 @@ -// // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -9,7 +8,7 @@ using Microsoft.ML.Runtime.Model; using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.Runtime.Tools; -using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Conversions; using System; using System.Collections.Generic; using System.IO; @@ -334,7 +333,7 @@ public void ValueMappingWorkout() IEnumerable values = new List() { 1, 2, 3, 4 }; // Workout on value mapping - var est = ML.Transforms.ValueMap(keys, values, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); + var est = ML.Transforms.Conversion.ValueMap(keys, values, new[] { ("A", "D"), ("B", "E"), ("C", "F") }); TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); } @@ -342,7 +341,7 @@ public void ValueMappingWorkout() void TestCommandLine() { var dataFile = GetDataPath("QuotingData.csv"); - Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{key=ID value=Text data=" + Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{keyCol=ID valueCol=Text data=" + dataFile + @" col=A:B loader=Text{col=ID:U8:0 col=Text:TX:1 sep=, header=+} } in=f:\1.txt" }), (int)0); } @@ -362,7 +361,7 @@ void TestCommandLineNoLoaderWithColumnNames() var dataFile = GetDataPath("lm.labels.txt"); Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0 col=B:R4:1 col=C:R4:2} xf=valuemap{data=" + dataFile - + @" col=A:B key=foo value=bar} in=f:\1.txt" }), (int)0); + + @" col=A:B keyCol=foo valueCol=bar} in=f:\1.txt" }), (int)0); } [Fact]