From 607d25acfd05273d819ed07da432f06d51a79a03 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 30 Oct 2018 11:44:03 -0700 Subject: [PATCH 1/7] namespace moves and make it build --- .../SubsetSelector/BootstrapSelector.cs | 1 + .../AssemblyRegistration.cs | 1 + .../Runtime/EntryPoints/TrainTestSplit.cs | 1 + .../BootstrapSampleTransform.cs | 2 +- .../CategoricalCatalog.cs | 3 +-- .../CategoricalHashTransform.cs | 2 +- .../CategoricalTransform.cs | 3 ++- .../CompositeTransform.cs | 7 ++----- src/Microsoft.ML.Transforms/GcnTransform.cs | 10 +++++----- src/Microsoft.ML.Transforms/GroupTransform.cs | 5 +++-- .../HashJoinTransform.cs | 19 +++++++++---------- .../Text/TextTransform.cs | 1 + .../WrappedGcnTransformers.cs | 3 ++- .../KMeansAndLogisticRegressionBench.cs | 1 + .../PredictionEngineBench.cs | 3 +-- .../Text/MultiClassClassification.cs | 1 + .../TestPredictors.cs | 1 + .../EnvironmentExtensions.cs | 1 + .../TensorflowTests.cs | 1 + .../Transformers/CategoricalHashTests.cs | 1 + .../Transformers/NAIndicatorTests.cs | 1 + 21 files changed, 38 insertions(+), 30 deletions(-) diff --git a/src/Microsoft.ML.Ensemble/Selector/SubsetSelector/BootstrapSelector.cs b/src/Microsoft.ML.Ensemble/Selector/SubsetSelector/BootstrapSelector.cs index 97dda8aeba..102b9d10d5 100644 --- a/src/Microsoft.ML.Ensemble/Selector/SubsetSelector/BootstrapSelector.cs +++ b/src/Microsoft.ML.Ensemble/Selector/SubsetSelector/BootstrapSelector.cs @@ -8,6 +8,7 @@ using Microsoft.ML.Runtime.Ensemble.Selector; using Microsoft.ML.Runtime.Ensemble.Selector.SubsetSelector; using Microsoft.ML.Runtime.EntryPoints; +using Microsoft.ML.Transforms; [assembly: LoadableClass(typeof(BootstrapSelector), typeof(BootstrapSelector.Arguments), typeof(SignatureEnsembleDataSelector), BootstrapSelector.UserName, BootstrapSelector.LoadName)] diff --git a/src/Microsoft.ML.Legacy/AssemblyRegistration.cs b/src/Microsoft.ML.Legacy/AssemblyRegistration.cs index 7540c132e9..df272e76a6 100644 --- a/src/Microsoft.ML.Legacy/AssemblyRegistration.cs +++ b/src/Microsoft.ML.Legacy/AssemblyRegistration.cs @@ -9,6 +9,7 @@ using Microsoft.ML.Trainers.PCA; using Microsoft.ML.Runtime.Sweeper; using Microsoft.ML.Runtime.Tools; +using Microsoft.ML.Transforms.Categorical; using System; using System.Reflection; diff --git a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/TrainTestSplit.cs b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/TrainTestSplit.cs index 5b7e735380..76f4157beb 100644 --- a/src/Microsoft.ML.Legacy/Runtime/EntryPoints/TrainTestSplit.cs +++ b/src/Microsoft.ML.Legacy/Runtime/EntryPoints/TrainTestSplit.cs @@ -7,6 +7,7 @@ using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Conversions; [assembly: LoadableClass(typeof(void), typeof(TrainTestSplit), null, typeof(SignatureEntryPointModule), "TrainTestSplit")] diff --git a/src/Microsoft.ML.Transforms/BootstrapSampleTransform.cs b/src/Microsoft.ML.Transforms/BootstrapSampleTransform.cs index 221ec1bdd5..10dad94cdd 100644 --- a/src/Microsoft.ML.Transforms/BootstrapSampleTransform.cs +++ b/src/Microsoft.ML.Transforms/BootstrapSampleTransform.cs @@ -19,7 +19,7 @@ [assembly: EntryPointModule(typeof(BootstrapSample))] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms { /// /// This class approximates bootstrap sampling of a dataview. diff --git a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs index f5766d9edc..a396b46c3b 100644 --- a/src/Microsoft.ML.Transforms/CategoricalCatalog.cs +++ b/src/Microsoft.ML.Transforms/CategoricalCatalog.cs @@ -4,8 +4,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; -using System; -using System.Collections.Generic; +using Microsoft.ML.Transforms.Categorical; namespace Microsoft.ML { diff --git a/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs b/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs index 453cc9b822..d313274fee 100644 --- a/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs +++ b/src/Microsoft.ML.Transforms/CategoricalHashTransform.cs @@ -20,7 +20,7 @@ [assembly: LoadableClass(CategoricalHashTransform.Summary, typeof(IDataTransform), typeof(CategoricalHashTransform), typeof(CategoricalHashTransform.Arguments), typeof(SignatureDataTransform), CategoricalHashTransform.UserName, "CategoricalHashTransform", "CatHashTransform", "CategoricalHash", "CatHash")] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms.Categorical { public sealed class CategoricalHashTransform : ITransformer, ICanSaveModel { diff --git a/src/Microsoft.ML.Transforms/CategoricalTransform.cs b/src/Microsoft.ML.Transforms/CategoricalTransform.cs index 07240890a0..ab85fea9f2 100644 --- a/src/Microsoft.ML.Transforms/CategoricalTransform.cs +++ b/src/Microsoft.ML.Transforms/CategoricalTransform.cs @@ -22,7 +22,8 @@ CategoricalTransform.UserName, "CategoricalTransform", "CatTransform", "Categorical", "Cat")] [assembly: LoadableClass(typeof(void), typeof(Categorical), null, typeof(SignatureEntryPointModule), "Categorical")] -namespace Microsoft.ML.Runtime.Data + +namespace Microsoft.ML.Transforms.Categorical { /// public sealed class CategoricalTransform : ITransformer, ICanSaveModel diff --git a/src/Microsoft.ML.Transforms/CompositeTransform.cs b/src/Microsoft.ML.Transforms/CompositeTransform.cs index d9cd238399..180a613b32 100644 --- a/src/Microsoft.ML.Transforms/CompositeTransform.cs +++ b/src/Microsoft.ML.Transforms/CompositeTransform.cs @@ -2,13 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Collections.Generic; -using System.Linq; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Transforms; // REVIEW: This is a temporary hack code to allow loading old saved loader models. Delete it once it is no longer needed. @@ -16,7 +13,7 @@ [assembly: LoadableClass(typeof(IDataTransform), typeof(CompositeTransform), null, typeof(SignatureLoadDataTransform), "Composite Transform", CompositeTransform.LoaderSignature)] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms { public static class CompositeTransform { diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index 0f42621d72..009ac0e4d4 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -2,10 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using Float = System.Single; - -using System; -using System.Text; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; @@ -13,6 +9,10 @@ using Microsoft.ML.Runtime.Internal.CpuMath; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Transforms.Projections; +using System; +using System.Text; +using Float = System.Single; [assembly: LoadableClass(LpNormNormalizerTransform.GcnSummary, typeof(LpNormNormalizerTransform), typeof(LpNormNormalizerTransform.GcnArguments), typeof(SignatureDataTransform), LpNormNormalizerTransform.UserNameGn, "GcnTransform", LpNormNormalizerTransform.ShortNameGn)] @@ -25,7 +25,7 @@ [assembly: EntryPointModule(typeof(LpNormalization))] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms.Projections { /// /// Lp-Norm (vector/row-wise) normalization transform. Has the following two set of arguments: diff --git a/src/Microsoft.ML.Transforms/GroupTransform.cs b/src/Microsoft.ML.Transforms/GroupTransform.cs index 75224c52e3..07175140cd 100644 --- a/src/Microsoft.ML.Transforms/GroupTransform.cs +++ b/src/Microsoft.ML.Transforms/GroupTransform.cs @@ -12,6 +12,7 @@ using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Transforms; [assembly: LoadableClass(GroupTransform.Summary, typeof(GroupTransform), typeof(GroupTransform.Arguments), typeof(SignatureDataTransform), GroupTransform.UserName, GroupTransform.ShortName)] @@ -19,9 +20,9 @@ [assembly: LoadableClass(GroupTransform.Summary, typeof(GroupTransform), null, typeof(SignatureLoadDataTransform), GroupTransform.UserName, GroupTransform.LoaderSignature)] -[assembly: EntryPointModule(typeof(GroupingOperations))] +[assembly: EntryPointModule(typeof(Microsoft.ML.Transforms.GroupingOperations))] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms { /// /// This transform essentially performs the following SQL-like operation: diff --git a/src/Microsoft.ML.Transforms/HashJoinTransform.cs b/src/Microsoft.ML.Transforms/HashJoinTransform.cs index bd3d007fa8..2a3f5b3550 100644 --- a/src/Microsoft.ML.Transforms/HashJoinTransform.cs +++ b/src/Microsoft.ML.Transforms/HashJoinTransform.cs @@ -2,20 +2,19 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using Float = System.Single; - -using System; -using System.Linq; -using System.Reflection; -using System.Text; -using System.Threading; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.Data.Conversion; using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Transforms.Conversions; +using System; +using System.Linq; +using System.Reflection; +using System.Text; +using System.Threading; +using Float = System.Single; [assembly: LoadableClass(HashJoinTransform.Summary, typeof(HashJoinTransform), typeof(HashJoinTransform.Arguments), typeof(SignatureDataTransform), HashJoinTransform.UserName, "HashJoinTransform", HashJoinTransform.RegistrationName)] @@ -25,7 +24,7 @@ [assembly: EntryPointModule(typeof(HashJoin))] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms.Conversions { /// /// This transform hashes its input columns. Each column is hashed separately, and within each @@ -656,7 +655,7 @@ private HashDelegate ComposeHashDelegate() // Default case: convert to text and hash as a string. var sb = default(StringBuilder); - var conv = Conversions.Instance.GetStringConversion(); + var conv = Runtime.Data.Conversion.Conversions.Instance.GetStringConversion(); return (ref TSrc value, uint seed) => { diff --git a/src/Microsoft.ML.Transforms/Text/TextTransform.cs b/src/Microsoft.ML.Transforms/Text/TextTransform.cs index 2c77451906..b95850f1ac 100644 --- a/src/Microsoft.ML.Transforms/Text/TextTransform.cs +++ b/src/Microsoft.ML.Transforms/Text/TextTransform.cs @@ -14,6 +14,7 @@ using Microsoft.ML.Runtime.TextAnalytics; using Microsoft.ML.StaticPipe; using Microsoft.ML.StaticPipe.Runtime; +using Microsoft.ML.Transforms.Projections; using Microsoft.ML.Transforms.Text; using System; using System.Collections.Generic; diff --git a/src/Microsoft.ML.Transforms/WrappedGcnTransformers.cs b/src/Microsoft.ML.Transforms/WrappedGcnTransformers.cs index 946a2abc84..20620ba489 100644 --- a/src/Microsoft.ML.Transforms/WrappedGcnTransformers.cs +++ b/src/Microsoft.ML.Transforms/WrappedGcnTransformers.cs @@ -7,9 +7,10 @@ using Microsoft.ML.Runtime.Data; using Microsoft.ML.StaticPipe; using Microsoft.ML.StaticPipe.Runtime; +using Microsoft.ML.Transforms.Projections; using System.Collections.Generic; using System.Linq; -using static Microsoft.ML.Runtime.Data.LpNormNormalizerTransform; +using static Microsoft.ML.Transforms.Projections.LpNormNormalizerTransform; namespace Microsoft.ML.Transforms { diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs index d68d598ade..c56eae60aa 100644 --- a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs +++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs @@ -9,6 +9,7 @@ using Microsoft.ML.Runtime.Learners; using Microsoft.ML.Trainers.KMeans; using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Categorical; using Microsoft.ML.Transforms.Normalizers; namespace Microsoft.ML.Benchmarks diff --git a/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs b/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs index 3c42bd420f..6401ab803d 100644 --- a/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs +++ b/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs @@ -6,10 +6,9 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.Learners; +using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Text; -using Microsoft.ML.Trainers; namespace Microsoft.ML.Benchmarks { diff --git a/test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs b/test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs index 74ec99c2bf..50524107e7 100644 --- a/test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs +++ b/test/Microsoft.ML.Benchmarks/Text/MultiClassClassification.cs @@ -9,6 +9,7 @@ using Microsoft.ML.Runtime.Tools; using Microsoft.ML.Trainers.Online; using Microsoft.ML.Trainers; +using Microsoft.ML.Transforms.Categorical; using System.IO; namespace Microsoft.ML.Benchmarks diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs index ba4c4aaa7d..1b3c13d4b0 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs @@ -20,6 +20,7 @@ namespace Microsoft.ML.Runtime.RunTests using Microsoft.ML.Trainers.FastTree.Internal; using Microsoft.ML.Trainers.Online; using Microsoft.ML.Trainers.SymSgd; + using Microsoft.ML.Transforms.Categorical; using System.Linq; using System.Runtime.InteropServices; using Xunit; diff --git a/test/Microsoft.ML.TestFramework/EnvironmentExtensions.cs b/test/Microsoft.ML.TestFramework/EnvironmentExtensions.cs index d3b2c4451f..180a192b5d 100644 --- a/test/Microsoft.ML.TestFramework/EnvironmentExtensions.cs +++ b/test/Microsoft.ML.TestFramework/EnvironmentExtensions.cs @@ -9,6 +9,7 @@ using Microsoft.ML.Trainers.FastTree; using Microsoft.ML.Trainers.KMeans; using Microsoft.ML.Trainers.PCA; +using Microsoft.ML.Transforms.Categorical; namespace Microsoft.ML.TestFramework { diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index 47a77c0009..15a9f985bd 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -11,6 +11,7 @@ using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Normalizers; using Microsoft.ML.Transforms.TensorFlow; +using Microsoft.ML.Transforms.Categorical; using System; using System.Collections.Generic; using System.IO; diff --git a/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs b/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs index 6bed53f855..8ddfcf4411 100644 --- a/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/CategoricalHashTests.cs @@ -9,6 +9,7 @@ using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.Runtime.Tools; using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Categorical; using System; using System.IO; using System.Linq; diff --git a/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs b/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs index 1ff3f6d7f0..a6f9e7723e 100644 --- a/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NAIndicatorTests.cs @@ -9,6 +9,7 @@ using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.Runtime.Tools; using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Categorical; using System; using System.IO; using Xunit; From 55b68e4cf4a9505135311d2300d12057ef67d6ab Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 30 Oct 2018 12:35:06 -0700 Subject: [PATCH 2/7] Re-generating the entrypoints --- src/Microsoft.ML.Legacy/CSharpApi.cs | 88 ------------------- .../Common/EntryPoints/core_ep-list.tsv | 19 ++-- .../Common/EntryPoints/core_manifest.json | 64 -------------- 3 files changed, 9 insertions(+), 162 deletions(-) diff --git a/src/Microsoft.ML.Legacy/CSharpApi.cs b/src/Microsoft.ML.Legacy/CSharpApi.cs index 9f5b2e8791..c17ae9168a 100644 --- a/src/Microsoft.ML.Legacy/CSharpApi.cs +++ b/src/Microsoft.ML.Legacy/CSharpApi.cs @@ -1582,18 +1582,6 @@ public void Add(Microsoft.ML.Legacy.Transforms.Scorer input, Microsoft.ML.Legacy _jsonNodes.Add(Serialize("Transforms.Scorer", input, output)); } - public Microsoft.ML.Legacy.Transforms.Segregator.Output Add(Microsoft.ML.Legacy.Transforms.Segregator input) - { - var output = new Microsoft.ML.Legacy.Transforms.Segregator.Output(); - Add(input, output); - return output; - } - - public void Add(Microsoft.ML.Legacy.Transforms.Segregator input, Microsoft.ML.Legacy.Transforms.Segregator.Output output) - { - _jsonNodes.Add(Serialize("Transforms.Segregator", input, output)); - } - public Microsoft.ML.Legacy.Transforms.SentimentAnalyzer.Output Add(Microsoft.ML.Legacy.Transforms.SentimentAnalyzer input) { var output = new Microsoft.ML.Legacy.Transforms.SentimentAnalyzer.Output(); @@ -16431,82 +16419,6 @@ public sealed class Output } } - namespace Legacy.Transforms - { - public enum UngroupTransformUngroupMode - { - Inner = 0, - Outer = 1, - First = 2 - } - - - /// - /// - public sealed partial class Segregator : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem - { - - - /// - /// Columns to unroll, or 'pivot' - /// - public string[] Column { get; set; } - - /// - /// Specifies how to unroll multiple pivot columns of different size. - /// - public UngroupTransformUngroupMode Mode { get; set; } = UngroupTransformUngroupMode.Inner; - - /// - /// Input dataset - /// - public Var Data { get; set; } = new Var(); - - - public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITransformOutput - { - /// - /// Transformed dataset - /// - public Var OutputData { get; set; } = new Var(); - - /// - /// Transform model - /// - public Var Model { get; set; } = new Var(); - - } - public Var GetInputData() => Data; - - public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) - { - if (previousStep != null) - { - if (!(previousStep is ILearningPipelineDataStep dataStep)) - { - throw new InvalidOperationException($"{ nameof(Segregator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } - - Data = dataStep.Data; - } - Output output = experiment.Add(this); - return new SegregatorPipelineStep(output); - } - - private class SegregatorPipelineStep : ILearningPipelineDataStep - { - public SegregatorPipelineStep(Output output) - { - Data = output.OutputData; - Model = output.Model; - } - - public Var Data { get; } - public Var Model { get; } - } - } - } - namespace Legacy.Transforms { diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index 327f4ce556..9610359afd 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -75,17 +75,17 @@ Trainers.StochasticDualCoordinateAscentClassifier The SDCA linear multi-class cl Trainers.StochasticDualCoordinateAscentRegressor The SDCA linear regression trainer. Microsoft.ML.Trainers.Sdca TrainRegression Microsoft.ML.Trainers.SdcaRegressionTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+RegressionOutput Trainers.StochasticGradientDescentBinaryClassifier Train an Hogwild SGD binary model. Microsoft.ML.Trainers.StochasticGradientDescentClassificationTrainer TrainBinary Microsoft.ML.Trainers.StochasticGradientDescentClassificationTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.SymSgdBinaryClassifier Train a symbolic SGD. Microsoft.ML.Trainers.SymSgd.SymSgdClassificationTrainer TrainSymSgd Microsoft.ML.Trainers.SymSgd.SymSgdClassificationTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput -Transforms.ApproximateBootstrapSampler Approximate bootstrap sampling. Microsoft.ML.Runtime.Data.BootstrapSample GetSample Microsoft.ML.Runtime.Data.BootstrapSampleTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.ApproximateBootstrapSampler Approximate bootstrap sampling. Microsoft.ML.Transforms.BootstrapSample GetSample Microsoft.ML.Transforms.BootstrapSampleTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.BinaryPredictionScoreColumnsRenamer For binary prediction, it renames the PredictedLabel and Score columns to include the name of the positive class. Microsoft.ML.Runtime.EntryPoints.ScoreModel RenameBinaryPredictionScoreColumns Microsoft.ML.Runtime.EntryPoints.ScoreModel+RenameBinaryPredictionScoreColumnsInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.BinNormalizer The values are assigned into equidensity bins and a value is mapped to its bin_number/number_of_bins. Microsoft.ML.Runtime.Data.Normalize Bin Microsoft.ML.Transforms.Normalizers.NormalizeTransform+BinArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput -Transforms.CategoricalHashOneHotVectorizer Converts the categorical value into an indicator array by hashing the value and using the hash as an index in the bag. If the input column is a vector, a single indicator bag is returned for it. Microsoft.ML.Runtime.Data.Categorical CatTransformHash Microsoft.ML.Runtime.Data.CategoricalHashTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput -Transforms.CategoricalOneHotVectorizer Converts the categorical value into an indicator array by building a dictionary of categories based on the data and using the id in the dictionary as the index in the array. Microsoft.ML.Runtime.Data.Categorical CatTransformDict Microsoft.ML.Runtime.Data.CategoricalTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.CategoricalHashOneHotVectorizer Converts the categorical value into an indicator array by hashing the value and using the hash as an index in the bag. If the input column is a vector, a single indicator bag is returned for it. Microsoft.ML.Transforms.Categorical.Categorical CatTransformHash Microsoft.ML.Transforms.Categorical.CategoricalHashTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.CategoricalOneHotVectorizer Converts the categorical value into an indicator array by building a dictionary of categories based on the data and using the id in the dictionary as the index in the array. Microsoft.ML.Transforms.Categorical.Categorical CatTransformDict Microsoft.ML.Transforms.Categorical.CategoricalTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.CharacterTokenizer Character-oriented tokenizer where text is considered a sequence of characters. Microsoft.ML.Transforms.Text.TextAnalytics CharTokenize Microsoft.ML.Transforms.Text.CharTokenizeTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ColumnConcatenator Concatenates one or more columns of the same item type. Microsoft.ML.Runtime.EntryPoints.SchemaManipulation ConcatColumns Microsoft.ML.Runtime.Data.ConcatTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ColumnCopier Duplicates columns from the dataset Microsoft.ML.Runtime.EntryPoints.SchemaManipulation CopyColumns Microsoft.ML.Transforms.CopyColumnsTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ColumnSelector Selects a set of columns, dropping all others Microsoft.ML.Runtime.EntryPoints.SchemaManipulation SelectColumns Microsoft.ML.Transforms.SelectColumnsTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ColumnTypeConverter Converts a column to a different type, using standard conversions. Microsoft.ML.Transforms.TypeConversion Convert Microsoft.ML.Transforms.ConvertTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput -Transforms.CombinerByContiguousGroupId Groups values of a scalar column into a vector, by a contiguous group ID Microsoft.ML.Runtime.Data.GroupingOperations Group Microsoft.ML.Runtime.Data.GroupTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.CombinerByContiguousGroupId Groups values of a scalar column into a vector, by a contiguous group ID Microsoft.ML.Transforms.GroupingOperations Group Microsoft.ML.Transforms.GroupTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ConditionalNormalizer Normalize the columns only if needed Microsoft.ML.Runtime.Data.Normalize IfNeeded Microsoft.ML.Transforms.Normalizers.NormalizeTransform+MinMaxArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput] Transforms.DataCache Caches using the specified cache option. Microsoft.ML.Runtime.EntryPoints.Cache CacheData Microsoft.ML.Runtime.EntryPoints.Cache+CacheInput Microsoft.ML.Runtime.EntryPoints.Cache+CacheOutput Transforms.DatasetScorer Score a dataset with a predictor model Microsoft.ML.Runtime.EntryPoints.ScoreModel Score Microsoft.ML.Runtime.EntryPoints.ScoreModel+Input Microsoft.ML.Runtime.EntryPoints.ScoreModel+Output @@ -94,19 +94,19 @@ Transforms.Dictionarizer Converts input values (words, numbers, etc.) to index i Transforms.FeatureCombiner Combines all the features into one feature column. Microsoft.ML.Runtime.EntryPoints.FeatureCombiner PrepareFeatures Microsoft.ML.Runtime.EntryPoints.FeatureCombiner+FeatureCombinerInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.FeatureSelectorByCount Selects the slots for which the count of non-default values is greater than or equal to a threshold. Microsoft.ML.Transforms.SelectFeatures CountSelect Microsoft.ML.Transforms.CountFeatureSelectionTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.FeatureSelectorByMutualInformation Selects the top k slots across all specified columns ordered by their mutual information with the label column. Microsoft.ML.Transforms.SelectFeatures MutualInformationSelect Microsoft.ML.Runtime.Data.MutualInformationFeatureSelectionTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput -Transforms.GlobalContrastNormalizer Performs a global contrast normalization on input values: Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation. Microsoft.ML.Runtime.Data.LpNormalization GcNormalize Microsoft.ML.Runtime.Data.LpNormNormalizerTransform+GcnArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput -Transforms.HashConverter Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. Microsoft.ML.Runtime.Data.HashJoin Apply Microsoft.ML.Runtime.Data.HashJoinTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.GlobalContrastNormalizer Performs a global contrast normalization on input values: Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation. Microsoft.ML.Transforms.Projections.LpNormalization GcNormalize Microsoft.ML.Transforms.Projections.LpNormNormalizerTransform+GcnArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.HashConverter Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. Microsoft.ML.Transforms.Conversions.HashJoin Apply Microsoft.ML.Transforms.Conversions.HashJoinTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ImageGrayscale Convert image into grayscale. Microsoft.ML.Runtime.ImageAnalytics.EntryPoints.ImageAnalytics ImageGrayscale Microsoft.ML.Runtime.ImageAnalytics.ImageGrayscaleTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ImageLoader Load images from files. Microsoft.ML.Runtime.ImageAnalytics.EntryPoints.ImageAnalytics ImageLoader Microsoft.ML.Runtime.ImageAnalytics.ImageLoaderTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ImagePixelExtractor Extract color plane(s) from an image. Options include scaling, offset and conversion to floating point. Microsoft.ML.Runtime.ImageAnalytics.EntryPoints.ImageAnalytics ImagePixelExtractor Microsoft.ML.Runtime.ImageAnalytics.ImagePixelExtractorTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ImageResizer Scales an image to specified dimensions using one of the three scale types: isotropic with padding, isotropic with cropping or anisotropic. In case of isotropic padding, transparent color is used to pad resulting image. Microsoft.ML.Runtime.ImageAnalytics.EntryPoints.ImageAnalytics ImageResizer Microsoft.ML.Runtime.ImageAnalytics.ImageResizerTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput -Transforms.KeyToTextConverter KeyToValueTransform utilizes KeyValues metadata to map key indices to the corresponding values in the KeyValues metadata. Microsoft.ML.Runtime.Data.Categorical KeyToText Microsoft.ML.Transforms.Categorical.KeyToValueTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.KeyToTextConverter KeyToValueTransform utilizes KeyValues metadata to map key indices to the corresponding values in the KeyValues metadata. Microsoft.ML.Transforms.Categorical.Categorical KeyToText Microsoft.ML.Transforms.Categorical.KeyToValueTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.LabelColumnKeyBooleanConverter Transforms the label to either key or bool (if needed) to make it suitable for classification. Microsoft.ML.Runtime.EntryPoints.FeatureCombiner PrepareClassificationLabel Microsoft.ML.Runtime.EntryPoints.FeatureCombiner+ClassificationLabelInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.LabelIndicator Label remapper used by OVA Microsoft.ML.Transforms.LabelIndicatorTransform LabelIndicator Microsoft.ML.Transforms.LabelIndicatorTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.LabelToFloatConverter Transforms the label to float to make it suitable for regression. Microsoft.ML.Runtime.EntryPoints.FeatureCombiner PrepareRegressionLabel Microsoft.ML.Runtime.EntryPoints.FeatureCombiner+RegressionLabelInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.LightLda The LDA transform implements LightLDA, a state-of-the-art implementation of Latent Dirichlet Allocation. Microsoft.ML.Transforms.Text.TextAnalytics LightLda Microsoft.ML.Runtime.TextAnalytics.LdaTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.LogMeanVarianceNormalizer Normalizes the data based on the computed mean and variance of the logarithm of the data. Microsoft.ML.Runtime.Data.Normalize LogMeanVar Microsoft.ML.Transforms.Normalizers.NormalizeTransform+LogMeanVarArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput -Transforms.LpNormalizer Normalize vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). Performs the following operation on a vector X: Y = (X - M) / D, where M is mean and D is either L2 norm, L1 norm or LInf norm. Microsoft.ML.Runtime.Data.LpNormalization Normalize Microsoft.ML.Runtime.Data.LpNormNormalizerTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.LpNormalizer Normalize vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). Performs the following operation on a vector X: Y = (X - M) / D, where M is mean and D is either L2 norm, L1 norm or LInf norm. Microsoft.ML.Transforms.Projections.LpNormalization Normalize Microsoft.ML.Transforms.Projections.LpNormNormalizerTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ManyHeterogeneousModelCombiner Combines a sequence of TransformModels and a PredictorModel into a single PredictorModel. Microsoft.ML.Runtime.EntryPoints.ModelOperations CombineModels Microsoft.ML.Runtime.EntryPoints.ModelOperations+PredictorModelInput Microsoft.ML.Runtime.EntryPoints.ModelOperations+PredictorModelOutput Transforms.MeanVarianceNormalizer Normalizes the data based on the computed mean and variance of the data. Microsoft.ML.Runtime.Data.Normalize MeanVar Microsoft.ML.Transforms.Normalizers.NormalizeTransform+MeanVarArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.MinMaxNormalizer Normalizes the data based on the observed minimum and maximum values of the data. Microsoft.ML.Runtime.Data.Normalize MinMax Microsoft.ML.Transforms.Normalizers.NormalizeTransform+MinMaxArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput @@ -128,11 +128,10 @@ Transforms.RowSkipFilter Allows limiting input to a subset of rows by skipping a Transforms.RowTakeFilter Allows limiting input to a subset of rows by taking N first rows. Microsoft.ML.Runtime.EntryPoints.SelectRows TakeFilter Microsoft.ML.Transforms.SkipTakeFilter+TakeArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ScoreColumnSelector Selects only the last score columns and the extra columns specified in the arguments. Microsoft.ML.Runtime.EntryPoints.ScoreModel SelectColumns Microsoft.ML.Runtime.EntryPoints.ScoreModel+ScoreColumnSelectorInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.Scorer Turn the predictor model into a transform model Microsoft.ML.Runtime.EntryPoints.ScoreModel MakeScoringTransform Microsoft.ML.Runtime.EntryPoints.ScoreModel+ModelInput Microsoft.ML.Runtime.EntryPoints.ScoreModel+Output -Transforms.Segregator Un-groups vector columns into sequences of rows, inverse of Group transform Microsoft.ML.Runtime.Data.GroupingOperations Ungroup Microsoft.ML.Runtime.Data.UngroupTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.SentimentAnalyzer Uses a pretrained sentiment model to score input strings Microsoft.ML.Transforms.Text.TextAnalytics AnalyzeSentiment Microsoft.ML.Runtime.TextAnalytics.SentimentAnalyzingTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.TensorFlowScorer Transforms the data using the TensorFlow model. Microsoft.ML.Transforms.TensorFlowTransform TensorFlowScorer Microsoft.ML.Transforms.TensorFlowTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.TextFeaturizer A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are normalized counts of (word and/or character) ngrams in a given tokenized text. Microsoft.ML.Transforms.Text.TextAnalytics TextTransform Microsoft.ML.Transforms.Text.TextFeaturizingEstimator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput -Transforms.TextToKeyConverter Converts input values (words, numbers, etc.) to index in a dictionary. Microsoft.ML.Runtime.Data.Categorical TextToKey Microsoft.ML.Transforms.Categorical.TermTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.TextToKeyConverter Converts input values (words, numbers, etc.) to index in a dictionary. Microsoft.ML.Transforms.Categorical.Categorical TextToKey Microsoft.ML.Transforms.Categorical.TermTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.TrainTestDatasetSplitter Split the dataset into train and test sets Microsoft.ML.Runtime.EntryPoints.TrainTestSplit Split Microsoft.ML.Runtime.EntryPoints.TrainTestSplit+Input Microsoft.ML.Runtime.EntryPoints.TrainTestSplit+Output Transforms.TreeLeafFeaturizer Trains a tree ensemble, or loads it from a file, then maps a numeric feature vector to three outputs: 1. A vector containing the individual tree outputs of the tree ensemble. 2. A vector indicating the leaves that the feature vector falls on in the tree ensemble. 3. A vector indicating the paths that the feature vector falls on in the tree ensemble. If a both a model file and a trainer are specified - will use the model file. If neither are specified, will train a default FastTree model. This can handle key labels by training a regression model towards their optionally permuted indices. Microsoft.ML.Runtime.Data.TreeFeaturize Featurizer Microsoft.ML.Runtime.Data.TreeEnsembleFeaturizerTransform+ArgumentsForEntryPoint Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.TwoHeterogeneousModelCombiner Combines a TransformModel and a PredictorModel into a single PredictorModel. Microsoft.ML.Runtime.EntryPoints.ModelOperations CombineTwoModels Microsoft.ML.Runtime.EntryPoints.ModelOperations+SimplePredictorModelInput Microsoft.ML.Runtime.EntryPoints.ModelOperations+PredictorModelOutput diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 3c174d996e..52c449f55d 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -22449,70 +22449,6 @@ } ] }, - { - "Name": "Transforms.Segregator", - "Desc": "Un-groups vector columns into sequences of rows, inverse of Group transform", - "FriendlyName": "Un-group Transform", - "ShortName": "Ungroup", - "Inputs": [ - { - "Name": "Data", - "Type": "DataView", - "Desc": "Input dataset", - "Required": true, - "SortOrder": 1.0, - "IsNullable": false - }, - { - "Name": "Column", - "Type": { - "Kind": "Array", - "ItemType": "String" - }, - "Desc": "Columns to unroll, or 'pivot'", - "Aliases": [ - "col" - ], - "Required": true, - "SortOrder": 150.0, - "IsNullable": false - }, - { - "Name": "Mode", - "Type": { - "Kind": "Enum", - "Values": [ - "Inner", - "Outer", - "First" - ] - }, - "Desc": "Specifies how to unroll multiple pivot columns of different size.", - "Required": false, - "SortOrder": 150.0, - "IsNullable": false, - "Default": "Inner" - } - ], - "Outputs": [ - { - "Name": "OutputData", - "Type": "DataView", - "Desc": "Transformed dataset" - }, - { - "Name": "Model", - "Type": "TransformModel", - "Desc": "Transform model" - } - ], - "InputKind": [ - "ITransformInput" - ], - "OutputKind": [ - "ITransformOutput" - ] - }, { "Name": "Transforms.SentimentAnalyzer", "Desc": "Uses a pretrained sentiment model to score input strings", From dd83490a2204ca0badd4df1b7adb8f9f9e9866a5 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 30 Oct 2018 14:32:46 -0700 Subject: [PATCH 3/7] keeping group and ungroup changes together. Regenerating the ep manifest. --- src/Microsoft.ML.Legacy/CSharpApi.cs | 88 +++++++++++++++++++ .../UngroupTransform.cs | 7 +- .../Common/EntryPoints/core_ep-list.tsv | 1 + .../Common/EntryPoints/core_manifest.json | 64 ++++++++++++++ 4 files changed, 157 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.Legacy/CSharpApi.cs b/src/Microsoft.ML.Legacy/CSharpApi.cs index c17ae9168a..9f5b2e8791 100644 --- a/src/Microsoft.ML.Legacy/CSharpApi.cs +++ b/src/Microsoft.ML.Legacy/CSharpApi.cs @@ -1582,6 +1582,18 @@ public void Add(Microsoft.ML.Legacy.Transforms.Scorer input, Microsoft.ML.Legacy _jsonNodes.Add(Serialize("Transforms.Scorer", input, output)); } + public Microsoft.ML.Legacy.Transforms.Segregator.Output Add(Microsoft.ML.Legacy.Transforms.Segregator input) + { + var output = new Microsoft.ML.Legacy.Transforms.Segregator.Output(); + Add(input, output); + return output; + } + + public void Add(Microsoft.ML.Legacy.Transforms.Segregator input, Microsoft.ML.Legacy.Transforms.Segregator.Output output) + { + _jsonNodes.Add(Serialize("Transforms.Segregator", input, output)); + } + public Microsoft.ML.Legacy.Transforms.SentimentAnalyzer.Output Add(Microsoft.ML.Legacy.Transforms.SentimentAnalyzer input) { var output = new Microsoft.ML.Legacy.Transforms.SentimentAnalyzer.Output(); @@ -16419,6 +16431,82 @@ public sealed class Output } } + namespace Legacy.Transforms + { + public enum UngroupTransformUngroupMode + { + Inner = 0, + Outer = 1, + First = 2 + } + + + /// + /// + public sealed partial class Segregator : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITransformInput, Microsoft.ML.Legacy.ILearningPipelineItem + { + + + /// + /// Columns to unroll, or 'pivot' + /// + public string[] Column { get; set; } + + /// + /// Specifies how to unroll multiple pivot columns of different size. + /// + public UngroupTransformUngroupMode Mode { get; set; } = UngroupTransformUngroupMode.Inner; + + /// + /// Input dataset + /// + public Var Data { get; set; } = new Var(); + + + public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITransformOutput + { + /// + /// Transformed dataset + /// + public Var OutputData { get; set; } = new Var(); + + /// + /// Transform model + /// + public Var Model { get; set; } = new Var(); + + } + public Var GetInputData() => Data; + + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) + { + if (previousStep != null) + { + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(Segregator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } + + Data = dataStep.Data; + } + Output output = experiment.Add(this); + return new SegregatorPipelineStep(output); + } + + private class SegregatorPipelineStep : ILearningPipelineDataStep + { + public SegregatorPipelineStep(Output output) + { + Data = output.OutputData; + Model = output.Model; + } + + public Var Data { get; } + public Var Model { get; } + } + } + } + namespace Legacy.Transforms { diff --git a/src/Microsoft.ML.Transforms/UngroupTransform.cs b/src/Microsoft.ML.Transforms/UngroupTransform.cs index ad2de0e30a..5601ce0ee9 100644 --- a/src/Microsoft.ML.Transforms/UngroupTransform.cs +++ b/src/Microsoft.ML.Transforms/UngroupTransform.cs @@ -13,6 +13,7 @@ using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.Internal.Utilities; using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Transforms; [assembly: LoadableClass(UngroupTransform.Summary, typeof(UngroupTransform), typeof(UngroupTransform.Arguments), typeof(SignatureDataTransform), UngroupTransform.UserName, UngroupTransform.ShortName)] @@ -20,7 +21,7 @@ [assembly: LoadableClass(UngroupTransform.Summary, typeof(UngroupTransform), null, typeof(SignatureLoadDataTransform), UngroupTransform.UserName, UngroupTransform.LoaderSignature)] -namespace Microsoft.ML.Runtime.Data +namespace Microsoft.ML.Transforms { // This can be thought of as an inverse of GroupTransform. For all specified vector columns @@ -267,7 +268,7 @@ public SchemaImpl(IExceptionContext ectx, Schema inputSchema, UngroupMode mode, _pivotIndex[info.Index] = i; } - AsSchema = Data.Schema.Create(this); + AsSchema = Runtime.Data.Schema.Create(this); } private static void CheckAndBind(IExceptionContext ectx, ISchema inputSchema, @@ -613,7 +614,7 @@ private ValueGetter MakeGetter(int col, PrimitiveType itemType) // cachedIndex == row.Count || _pivotColPosition <= row.Indices[cachedIndex]. int cachedIndex = 0; VBuffer row = default(VBuffer); - T naValue = Conversions.Instance.GetNAOrDefault(itemType); + T naValue = Runtime.Data.Conversion.Conversions.Instance.GetNAOrDefault(itemType); return (ref T value) => { diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index 9610359afd..c7142d0501 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -128,6 +128,7 @@ Transforms.RowSkipFilter Allows limiting input to a subset of rows by skipping a Transforms.RowTakeFilter Allows limiting input to a subset of rows by taking N first rows. Microsoft.ML.Runtime.EntryPoints.SelectRows TakeFilter Microsoft.ML.Transforms.SkipTakeFilter+TakeArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ScoreColumnSelector Selects only the last score columns and the extra columns specified in the arguments. Microsoft.ML.Runtime.EntryPoints.ScoreModel SelectColumns Microsoft.ML.Runtime.EntryPoints.ScoreModel+ScoreColumnSelectorInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.Scorer Turn the predictor model into a transform model Microsoft.ML.Runtime.EntryPoints.ScoreModel MakeScoringTransform Microsoft.ML.Runtime.EntryPoints.ScoreModel+ModelInput Microsoft.ML.Runtime.EntryPoints.ScoreModel+Output +Transforms.Segregator Un-groups vector columns into sequences of rows, inverse of Group transform Microsoft.ML.Transforms.GroupingOperations Ungroup Microsoft.ML.Transforms.UngroupTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.SentimentAnalyzer Uses a pretrained sentiment model to score input strings Microsoft.ML.Transforms.Text.TextAnalytics AnalyzeSentiment Microsoft.ML.Runtime.TextAnalytics.SentimentAnalyzingTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.TensorFlowScorer Transforms the data using the TensorFlow model. Microsoft.ML.Transforms.TensorFlowTransform TensorFlowScorer Microsoft.ML.Transforms.TensorFlowTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.TextFeaturizer A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are normalized counts of (word and/or character) ngrams in a given tokenized text. Microsoft.ML.Transforms.Text.TextAnalytics TextTransform Microsoft.ML.Transforms.Text.TextFeaturizingEstimator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 52c449f55d..3c174d996e 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -22449,6 +22449,70 @@ } ] }, + { + "Name": "Transforms.Segregator", + "Desc": "Un-groups vector columns into sequences of rows, inverse of Group transform", + "FriendlyName": "Un-group Transform", + "ShortName": "Ungroup", + "Inputs": [ + { + "Name": "Data", + "Type": "DataView", + "Desc": "Input dataset", + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + }, + { + "Name": "Column", + "Type": { + "Kind": "Array", + "ItemType": "String" + }, + "Desc": "Columns to unroll, or 'pivot'", + "Aliases": [ + "col" + ], + "Required": true, + "SortOrder": 150.0, + "IsNullable": false + }, + { + "Name": "Mode", + "Type": { + "Kind": "Enum", + "Values": [ + "Inner", + "Outer", + "First" + ] + }, + "Desc": "Specifies how to unroll multiple pivot columns of different size.", + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": "Inner" + } + ], + "Outputs": [ + { + "Name": "OutputData", + "Type": "DataView", + "Desc": "Transformed dataset" + }, + { + "Name": "Model", + "Type": "TransformModel", + "Desc": "Transform model" + } + ], + "InputKind": [ + "ITransformInput" + ], + "OutputKind": [ + "ITransformOutput" + ] + }, { "Name": "Transforms.SentimentAnalyzer", "Desc": "Uses a pretrained sentiment model to score input strings", From 14935628858d09a667c35555997600dc45df6505 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 30 Oct 2018 14:36:46 -0700 Subject: [PATCH 4/7] post merge fix. --- test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs index c1bd93f17f..3caf54f6f3 100644 --- a/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/CategoricalTests.cs @@ -9,6 +9,7 @@ using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.Runtime.Tools; using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Categorical; using System; using System.IO; using System.Linq; From 54b08709ce57681f43396a7c68f02ea6db300b9e Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 30 Oct 2018 16:03:59 -0700 Subject: [PATCH 5/7] Rff is projection. Reverting changes to enable codege --- src/Microsoft.ML.Transforms/RffTransform.cs | 4 ++-- .../UnitTests/TestEntryPoints.cs | 11 ++++++----- test/Microsoft.ML.Tests/CSharpCodeGen.cs | 2 +- test/Microsoft.ML.Tests/Transformers/RffTests.cs | 4 ++-- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Microsoft.ML.Transforms/RffTransform.cs b/src/Microsoft.ML.Transforms/RffTransform.cs index 204f88b3ca..75b4d2e777 100644 --- a/src/Microsoft.ML.Transforms/RffTransform.cs +++ b/src/Microsoft.ML.Transforms/RffTransform.cs @@ -12,7 +12,7 @@ using Microsoft.ML.Runtime.Numeric; using Microsoft.ML.StaticPipe; using Microsoft.ML.StaticPipe.Runtime; -using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Projections; using System; using System.Collections.Generic; using System.Linq; @@ -30,7 +30,7 @@ [assembly: LoadableClass(typeof(IRowMapper), typeof(RffTransform), null, typeof(SignatureLoadRowMapper), "Random Fourier Features Transform", RffTransform.LoaderSignature)] -namespace Microsoft.ML.Transforms +namespace Microsoft.ML.Transforms.Projections { public sealed class RffTransform : OneToOneTransformerBase { diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index a772b43bbe..2dd3fd8f31 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -10,7 +10,6 @@ using Microsoft.ML.Runtime.Ensemble.OutputCombiners; using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Runtime.EntryPoints.JsonUtils; -using Microsoft.ML.Trainers.FastTree; using Microsoft.ML.Runtime.ImageAnalytics; using Microsoft.ML.Runtime.Internal.Calibration; using Microsoft.ML.Runtime.Internal.Internallearn; @@ -18,14 +17,17 @@ using Microsoft.ML.Runtime.Learners; using Microsoft.ML.Runtime.LightGBM; using Microsoft.ML.Runtime.Model.Onnx; -using Microsoft.ML.Trainers.PCA; using Microsoft.ML.Runtime.PipelineInference; -using Microsoft.ML.Trainers.SymSgd; using Microsoft.ML.Runtime.TextAnalytics; using Microsoft.ML.Runtime.TimeSeriesProcessing; +using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.FastTree; +using Microsoft.ML.Trainers.PCA; +using Microsoft.ML.Trainers.SymSgd; using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Categorical; using Microsoft.ML.Transforms.Normalizers; +using Microsoft.ML.Transforms.Projections; using Microsoft.ML.Transforms.Text; using Newtonsoft.Json; using Newtonsoft.Json.Linq; @@ -36,7 +38,6 @@ using System.Text.RegularExpressions; using Xunit; using Xunit.Abstractions; -using Microsoft.ML.Trainers; namespace Microsoft.ML.Runtime.RunTests { @@ -248,7 +249,7 @@ private string GetBuildPrefix() #endif } - [Fact] + [Fact(Skip = "Execute this test if you want to regenerate the core_manifest and core_ep_list files")] public void RegenerateEntryPointCatalog() { var (epListContents, jObj) = BuildManifests(); diff --git a/test/Microsoft.ML.Tests/CSharpCodeGen.cs b/test/Microsoft.ML.Tests/CSharpCodeGen.cs index 30ed45d94a..d16a9924f1 100644 --- a/test/Microsoft.ML.Tests/CSharpCodeGen.cs +++ b/test/Microsoft.ML.Tests/CSharpCodeGen.cs @@ -15,7 +15,7 @@ public CSharpCodeGen(ITestOutputHelper output) : base(output) { } - [Fact] + [Fact(Skip = "Execute this test if you want to regenerate CSharpApi file")] public void RegenerateCSharpApi() { var basePath = GetDataPath("../../src/Microsoft.ML.Legacy/CSharpApi.cs"); diff --git a/test/Microsoft.ML.Tests/Transformers/RffTests.cs b/test/Microsoft.ML.Tests/Transformers/RffTests.cs index 60c6ad84a3..80d404cf9c 100644 --- a/test/Microsoft.ML.Tests/Transformers/RffTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/RffTests.cs @@ -1,11 +1,11 @@ -using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.Api; +using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Data.IO; using Microsoft.ML.Runtime.Model; using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.Runtime.Tools; using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Projections; using System; using System.IO; using System.Linq; From a3a6a994eea2688aad28682990eca14efbd00bc2 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Wed, 31 Oct 2018 11:19:52 -0700 Subject: [PATCH 6/7] regeneratign the ep list --- test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index a5b54a6eec..3092dc8fc2 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -78,7 +78,7 @@ Trainers.SymSgdBinaryClassifier Train a symbolic SGD. Microsoft.ML.Trainers.SymS Transforms.ApproximateBootstrapSampler Approximate bootstrap sampling. Microsoft.ML.Transforms.BootstrapSample GetSample Microsoft.ML.Transforms.BootstrapSampleTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.BinaryPredictionScoreColumnsRenamer For binary prediction, it renames the PredictedLabel and Score columns to include the name of the positive class. Microsoft.ML.Runtime.EntryPoints.ScoreModel RenameBinaryPredictionScoreColumns Microsoft.ML.Runtime.EntryPoints.ScoreModel+RenameBinaryPredictionScoreColumnsInput Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.BinNormalizer The values are assigned into equidensity bins and a value is mapped to its bin_number/number_of_bins. Microsoft.ML.Runtime.Data.Normalize Bin Microsoft.ML.Transforms.Normalizers.NormalizeTransform+BinArguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput -Transforms.CategoricalHashOneHotVectorizer Converts the categorical value into an indicator array by hashing the value and using the hash as an index in the bag. If the input column is a vector, a single indicator bag is returned for it. Microsoft.ML.Transforms.Categorical.Categorical CatTransformHash Microsoft.ML.Runtime.Data.CategoricalHashTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput +Transforms.CategoricalHashOneHotVectorizer Converts the categorical value into an indicator array by hashing the value and using the hash as an index in the bag. If the input column is a vector, a single indicator bag is returned for it. Microsoft.ML.Transforms.Categorical.Categorical CatTransformHash Microsoft.ML.Transforms.Categorical.CategoricalHashTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.CategoricalOneHotVectorizer Converts the categorical value into an indicator array by building a dictionary of categories based on the data and using the id in the dictionary as the index in the array. Microsoft.ML.Transforms.Categorical.Categorical CatTransformDict Microsoft.ML.Transforms.Categorical.CategoricalTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.CharacterTokenizer Character-oriented tokenizer where text is considered a sequence of characters. Microsoft.ML.Transforms.Text.TextAnalytics CharTokenize Microsoft.ML.Transforms.Text.CharTokenizeTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput Transforms.ColumnConcatenator Concatenates one or more columns of the same item type. Microsoft.ML.Runtime.EntryPoints.SchemaManipulation ConcatColumns Microsoft.ML.Runtime.Data.ConcatTransform+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+TransformOutput From e6037f0b3083a3e312987acbdbf87c7ec859485a Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Wed, 31 Oct 2018 21:34:53 -0700 Subject: [PATCH 7/7] merge conflict --- test/Microsoft.ML.Tests/Transformers/ConvertTests.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/test/Microsoft.ML.Tests/Transformers/ConvertTests.cs b/test/Microsoft.ML.Tests/Transformers/ConvertTests.cs index 060af9c6be..8e72219609 100644 --- a/test/Microsoft.ML.Tests/Transformers/ConvertTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/ConvertTests.cs @@ -10,6 +10,7 @@ using Microsoft.ML.Runtime.Tools; using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Conversions; +using Microsoft.ML.Transforms.Categorical; using System; using System.IO; using System.Linq;