From ec4f0fd97dd5b1339fd95d087a23792bc57ec0f2 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Wed, 2 Jan 2019 14:27:39 -0800 Subject: [PATCH 1/4] Add test and clean names --- .../Trainer/Binary/EnsembleTrainer.cs | 2 +- .../MultiClass/MetaMulticlassTrainer.cs | 2 +- .../Standard/Online/LinearSvm.cs | 72 ++++++++------- .../StandardLearnersCatalog.cs | 8 +- test/Microsoft.ML.Tests/Scenarios/OvaTest.cs | 2 +- .../TrainerEstimators/OnlineLinearTests.cs | 87 ++++++++++++++++--- 6 files changed, 117 insertions(+), 56 deletions(-) diff --git a/src/Microsoft.ML.Ensemble/Trainer/Binary/EnsembleTrainer.cs b/src/Microsoft.ML.Ensemble/Trainer/Binary/EnsembleTrainer.cs index c9438c0169..bd74ae8b27 100644 --- a/src/Microsoft.ML.Ensemble/Trainer/Binary/EnsembleTrainer.cs +++ b/src/Microsoft.ML.Ensemble/Trainer/Binary/EnsembleTrainer.cs @@ -58,7 +58,7 @@ public Arguments() BasePredictors = new[] { ComponentFactoryUtils.CreateFromFunction( - env => new LinearSvm(env)) + env => new LinearSvmTrainer(env)) }; } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MetaMulticlassTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MetaMulticlassTrainer.cs index e241f1b587..1395c8b3bc 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MetaMulticlassTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MetaMulticlassTrainer.cs @@ -87,7 +87,7 @@ private TScalarTrainer CreateTrainer() { return Args.PredictorType != null ? Args.PredictorType.CreateComponent(Host) : - new LinearSvm(Host, new LinearSvm.Arguments()); + new LinearSvmTrainer(Host, new LinearSvmTrainer.Arguments()); } private protected IDataView MapLabelsCore(ColumnType type, InPredicate equalsTarget, RoleMappedData data) diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs index d1c192821e..c1ce4fea2d 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs @@ -15,22 +15,21 @@ using Microsoft.ML.Numeric; using Microsoft.ML.Trainers.Online; using Microsoft.ML.Training; -using Float = System.Single; -[assembly: LoadableClass(LinearSvm.Summary, typeof(LinearSvm), typeof(LinearSvm.Arguments), +[assembly: LoadableClass(LinearSvmTrainer.Summary, typeof(LinearSvmTrainer), typeof(LinearSvmTrainer.Arguments), new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer), typeof(SignatureFeatureScorerTrainer) }, - LinearSvm.UserNameValue, - LinearSvm.LoadNameValue, - LinearSvm.ShortName)] + LinearSvmTrainer.UserNameValue, + LinearSvmTrainer.LoadNameValue, + LinearSvmTrainer.ShortName)] -[assembly: LoadableClass(typeof(void), typeof(LinearSvm), null, typeof(SignatureEntryPointModule), "LinearSvm")] +[assembly: LoadableClass(typeof(void), typeof(LinearSvmTrainer), null, typeof(SignatureEntryPointModule), "LinearSvm")] namespace Microsoft.ML.Trainers.Online { /// /// Linear SVM that implements PEGASOS for training. See: http://ttic.uchicago.edu/~shai/papers/ShalevSiSr07.pdf /// - public sealed class LinearSvm : OnlineLinearTrainer, LinearBinaryModelParameters> + public sealed class LinearSvmTrainer : OnlineLinearTrainer, LinearBinaryModelParameters> { internal const string LoadNameValue = "LinearSVM"; internal const string ShortName = "svm"; @@ -47,7 +46,7 @@ public sealed class Arguments : OnlineLinearArguments [Argument(ArgumentType.AtMostOnce, HelpText = "Regularizer constant", ShortName = "lambda", SortOrder = 50)] [TGUI(SuggestedSweeps = "0.00001-0.1;log;inc:10")] [TlcModule.SweepableFloatParamAttribute("Lambda", 0.00001f, 0.1f, 10, isLogScale: true)] - public Float Lambda = (Float)0.001; + public float Lambda = 0.001f; [Argument(ArgumentType.AtMostOnce, HelpText = "Batch size", ShortName = "batch", SortOrder = 190)] [TGUI(Label = "Batch Size")] @@ -78,16 +77,16 @@ private sealed class TrainState : TrainStateBase // weightsUpdate/weightsUpdateScale/biasUpdate are similar to weights/weightsScale/bias, in that // all elements of weightsUpdate are considered to be multiplied by weightsUpdateScale, and the // bias update term is not considered to be multiplied by the scale. - private VBuffer _weightsUpdate; - private Float _weightsUpdateScale; - private Float _biasUpdate; + private VBuffer _weightsUpdate; + private float _weightsUpdateScale; + private float _biasUpdate; private readonly int _batchSize; private readonly bool _noBias; private readonly bool _performProjection; private readonly float _lambda; - public TrainState(IChannel ch, int numFeatures, LinearModelParameters predictor, LinearSvm parent) + public TrainState(IChannel ch, int numFeatures, LinearModelParameters predictor, LinearSvmTrainer parent) : base(ch, numFeatures, predictor, parent) { _batchSize = parent.Args.BatchSize; @@ -101,7 +100,7 @@ public TrainState(IChannel ch, int numFeatures, LinearModelParameters predictor, if (predictor == null) VBufferUtils.Densify(ref Weights); - _weightsUpdate = VBufferUtils.CreateEmpty(numFeatures); + _weightsUpdate = VBufferUtils.CreateEmpty(numFeatures); } @@ -119,7 +118,7 @@ private void BeginBatch() VBufferUtils.Resize(ref _weightsUpdate, _weightsUpdate.Length, 0); } - private void FinishBatch(in VBuffer weightsUpdate, Float weightsUpdateScale) + private void FinishBatch(in VBuffer weightsUpdate, float weightsUpdateScale) { if (_numBatchExamples > 0) UpdateWeights(in weightsUpdate, weightsUpdateScale); @@ -129,19 +128,19 @@ private void FinishBatch(in VBuffer weightsUpdate, Float weightsUpdateSca /// /// Observe an example and update weights if necesary. /// - public override void ProcessDataInstance(IChannel ch, in VBuffer feat, Float label, Float weight) + public override void ProcessDataInstance(IChannel ch, in VBuffer feat, float label, float weight) { base.ProcessDataInstance(ch, in feat, label, weight); // compute the update and update if needed - Float output = Margin(in feat); - Float trueOutput = (label > 0 ? 1 : -1); - Float loss = output * trueOutput - 1; + float output = Margin(in feat); + float trueOutput = (label > 0 ? 1 : -1); + float loss = output * trueOutput - 1; // Accumulate the update if there is a loss and we have larger batches. if (_batchSize > 1 && loss < 0) { - Float currentBiasUpdate = trueOutput * weight; + float currentBiasUpdate = trueOutput * weight; _biasUpdate += currentBiasUpdate; // Only aggregate in the case where we're handling multiple instances. if (_weightsUpdate.GetValues().Length == 0) @@ -160,7 +159,7 @@ public override void ProcessDataInstance(IChannel ch, in VBuffer feat, Fl Contracts.Assert(_weightsUpdate.GetValues().Length == 0); // If we aren't aggregating multiple instances, just use the instance's // vector directly. - Float currentBiasUpdate = trueOutput * weight; + float currentBiasUpdate = trueOutput * weight; _biasUpdate += currentBiasUpdate; FinishBatch(in feat, currentBiasUpdate); } @@ -174,13 +173,13 @@ public override void ProcessDataInstance(IChannel ch, in VBuffer feat, Fl /// Updates the weights at the end of the batch. Since weightsUpdate can be an instance /// feature vector, this function should not change the contents of weightsUpdate. /// - private void UpdateWeights(in VBuffer weightsUpdate, Float weightsUpdateScale) + private void UpdateWeights(in VBuffer weightsUpdate, float weightsUpdateScale) { Contracts.Assert(_batch > 0); // REVIEW: This is really odd - normally lambda is small, so the learning rate is initially huge!?!?! // Changed from the paper's recommended rate = 1 / (lambda * t) to rate = 1 / (1 + lambda * t). - Float rate = 1 / (1 + _lambda * _batch); + float rate = 1 / (1 + _lambda * _batch); // w_{t+1/2} = (1 - eta*lambda) w_t + eta/k * totalUpdate WeightsScale *= 1 - rate * _lambda; @@ -194,7 +193,7 @@ private void UpdateWeights(in VBuffer weightsUpdate, Float weightsUpdateS // w_{t+1} = min{1, 1/sqrt(lambda)/|w_{t+1/2}|} * w_{t+1/2} if (_performProjection) { - Float normalizer = 1 / (MathUtils.Sqrt(_lambda) * VectorUtils.Norm(Weights) * Math.Abs(WeightsScale)); + float normalizer = 1 / (MathUtils.Sqrt(_lambda) * VectorUtils.Norm(Weights) * Math.Abs(WeightsScale)); if (normalizer < 1) { // REVIEW: Why would we not scale _bias if we're scaling the weights? @@ -208,7 +207,7 @@ private void UpdateWeights(in VBuffer weightsUpdate, Float weightsUpdateS /// /// Return the raw margin from the decision hyperplane. /// - public override Float Margin(in VBuffer feat) + public override float Margin(in VBuffer feat) => Bias + VectorUtils.DotProduct(in feat, in Weights) * WeightsScale; public override LinearBinaryModelParameters CreatePredictor() @@ -222,7 +221,7 @@ public override LinearBinaryModelParameters CreatePredictor() protected override bool NeedCalibration => true; /// - /// Initializes a new instance of . + /// Initializes a new instance of . /// /// The environment to use. /// The name of the label column. @@ -230,13 +229,13 @@ public override LinearBinaryModelParameters CreatePredictor() /// The optional name of the weights column. /// The number of training iteraitons. /// A delegate to supply more advanced arguments to the algorithm. - public LinearSvm(IHostEnvironment env, + public LinearSvmTrainer(IHostEnvironment env, string labelColumn = DefaultColumnNames.Label, string featureColumn = DefaultColumnNames.Features, string weightsColumn = null, int numIterations = Arguments.OnlineDefaultArgs.NumIterations, Action advancedSettings = null) - :this(env, InvokeAdvanced(advancedSettings, new Arguments + : this(env, InvokeAdvanced(advancedSettings, new Arguments { LabelColumn = labelColumn, FeatureColumn = featureColumn, @@ -246,8 +245,8 @@ public LinearSvm(IHostEnvironment env, { } - internal LinearSvm(IHostEnvironment env, Arguments args) - : base(args, env, UserNameValue, MakeLabelColumn(args.LabelColumn)) + internal LinearSvmTrainer(IHostEnvironment env, Arguments args) + : base(args, env, UserNameValue, TrainerUtils.MakeBoolScalarLabel(args.LabelColumn)) { Contracts.CheckUserArg(args.Lambda > 0, nameof(args.Lambda), UserErrorPositive); Contracts.CheckUserArg(args.BatchSize > 0, nameof(args.BatchSize), UserErrorPositive); @@ -261,9 +260,8 @@ protected override SchemaShape.Column[] GetOutputColumnsCore(SchemaShape inputSc { return new[] { - new SchemaShape.Column(DefaultColumnNames.Score, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false), - new SchemaShape.Column(DefaultColumnNames.Probability, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false), - new SchemaShape.Column(DefaultColumnNames.PredictedLabel, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false) + new SchemaShape.Column(DefaultColumnNames.Score, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false, new SchemaShape(MetadataUtils.GetTrainerOutputMetadata())), + new SchemaShape.Column(DefaultColumnNames.PredictedLabel, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false, new SchemaShape(MetadataUtils.GetTrainerOutputMetadata())) }; } @@ -278,11 +276,6 @@ private protected override TrainStateBase MakeState(IChannel ch, int numFeatures return new TrainState(ch, numFeatures, predictor, this); } - private static SchemaShape.Column MakeLabelColumn(string labelColumn) - { - return new SchemaShape.Column(labelColumn, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false); - } - [TlcModule.EntryPoint(Name = "Trainers.LinearSvmBinaryClassifier", Desc = "Train a linear SVM.", UserName = UserNameValue, ShortName = ShortName)] public static CommonOutputs.BinaryClassificationOutput TrainLinearSvm(IHostEnvironment env, Arguments input) { @@ -292,12 +285,15 @@ public static CommonOutputs.BinaryClassificationOutput TrainLinearSvm(IHostEnvir EntryPointUtils.CheckInputArgs(host, input); return LearnerEntryPointsUtils.Train(host, input, - () => new LinearSvm(host, input), + () => new LinearSvmTrainer(host, input), () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.LabelColumn), calibrator: input.Calibrator, maxCalibrationExamples: input.MaxCalibrationExamples); } protected override BinaryPredictionTransformer MakeTransformer(LinearBinaryModelParameters model, Schema trainSchema) => new BinaryPredictionTransformer(Host, model, trainSchema, FeatureColumn.Name); + + public BinaryPredictionTransformer Train(IDataView trainData, IPredictor initialPredictor = null) + => TrainTransformer(trainData, initPredictor: initialPredictor); } } diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index 726c606eeb..c5018748f8 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -384,7 +384,7 @@ public static Pkpd PairwiseCoupling(this MulticlassClassificationContext.Multicl } /// - /// Predict a target using a linear binary classification model trained with the trainer. + /// Predict a target using a linear binary classification model trained with the trainer. /// /// /// @@ -403,15 +403,15 @@ public static Pkpd PairwiseCoupling(this MulticlassClassificationContext.Multicl /// The optional name of the weights column. /// The number of training iteraitons. /// A delegate to supply more advanced arguments to the algorithm. - public static LinearSvm LinearSupportVectorMachines(this BinaryClassificationContext.BinaryClassificationTrainers ctx, + public static LinearSvmTrainer LinearSupportVectorMachines(this BinaryClassificationContext.BinaryClassificationTrainers ctx, string labelColumn = DefaultColumnNames.Label, string featureColumn = DefaultColumnNames.Features, string weightsColumn = null, int numIterations = OnlineLinearArguments.OnlineDefaultArgs.NumIterations, - Action advancedSettings = null) + Action advancedSettings = null) { Contracts.CheckValue(ctx, nameof(ctx)); - return new LinearSvm(CatalogUtils.GetEnvironment(ctx), labelColumn, featureColumn, weightsColumn, numIterations, advancedSettings); + return new LinearSvmTrainer(CatalogUtils.GetEnvironment(ctx), labelColumn, featureColumn, weightsColumn, numIterations, advancedSettings); } } } diff --git a/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs b/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs index cb4d0a0af7..e3e84bccd5 100644 --- a/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs +++ b/test/Microsoft.ML.Tests/Scenarios/OvaTest.cs @@ -136,7 +136,7 @@ public void OvaLinearSvm() var data = mlContext.Data.Cache(reader.Read(GetDataPath(dataPath))); // Pipeline - var pipeline = new Ova(mlContext, new LinearSvm(mlContext, numIterations: 100), useProbabilities: false); + var pipeline = new Ova(mlContext, new LinearSvmTrainer(mlContext, numIterations: 100), useProbabilities: false); var model = pipeline.Fit(data); var predictions = model.Transform(data); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/OnlineLinearTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/OnlineLinearTests.cs index e134af0df1..fb46523341 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/OnlineLinearTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/OnlineLinearTests.cs @@ -17,30 +17,95 @@ public void OnlineLinearWorkout() { var dataPath = GetDataPath("breast-cancer.txt"); - var data = TextLoader.CreateReader(Env, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10))) + var regressionData = TextLoader.CreateReader(ML, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10))) .Read(dataPath); - var pipe = data.MakeNewEstimator() + var regressionPipe = regressionData.MakeNewEstimator() .Append(r => (r.Label, Features: r.Features.Normalize())); - var trainData = pipe.Fit(data).Transform(data).AsDynamic; + var regressionTrainData = regressionPipe.Fit(regressionData).Transform(regressionData).AsDynamic; - var ogdTrainer = new OnlineGradientDescentTrainer(Env, "Label", "Features"); - TestEstimatorCore(ogdTrainer, trainData); - var ogdModel = ogdTrainer.Fit(trainData); - ogdTrainer.Train(trainData, ogdModel.Model); + var ogdTrainer = new OnlineGradientDescentTrainer(ML, "Label", "Features"); + TestEstimatorCore(ogdTrainer, regressionTrainData); + var ogdModel = ogdTrainer.Fit(regressionTrainData); + ogdTrainer.Train(regressionTrainData, ogdModel.Model); - var apTrainer = new AveragedPerceptronTrainer(Env, "Label", "Features", lossFunction: new HingeLoss(), advancedSettings: s => + var binaryData = TextLoader.CreateReader(ML, ctx => (Label: ctx.LoadBool(0), Features: ctx.LoadFloat(1, 10))) + .Read(dataPath); + + var binaryPipe = binaryData.MakeNewEstimator() + .Append(r => (r.Label, Features: r.Features.Normalize())); + + var binaryTrainData = binaryPipe.Fit(binaryData).Transform(binaryData).AsDynamic; + var apTrainer = new AveragedPerceptronTrainer(ML, "Label", "Features", lossFunction: new HingeLoss(), advancedSettings: s => { s.LearningRate = 0.5f; }); - TestEstimatorCore(apTrainer, trainData); + TestEstimatorCore(apTrainer, binaryTrainData); - var apModel = apTrainer.Fit(trainData); - apTrainer.Train(trainData, apModel.Model); + var apModel = apTrainer.Fit(binaryTrainData); + apTrainer.Train(binaryTrainData, apModel.Model); + + var svmTrainer = new LinearSvmTrainer(ML, "Label", "Features"); + TestEstimatorCore(svmTrainer, binaryTrainData); + + var svmModel = svmTrainer.Fit(binaryTrainData); + svmTrainer.Train(binaryTrainData, apModel.Model); Done(); } + + + [Fact] + public void OnlineLinearWorkout1() + { + // load data + var textLoader = new TextLoader(ML, + new TextLoader.Arguments() + { + Separator = ",", + HasHeader = true, + Column = new[] + { + new TextLoader.Column("Age", DataKind.R4, 0), + new TextLoader.Column("Workclass", DataKind.TX, 1), + new TextLoader.Column("Fnlwgt", DataKind.R4, 2), + new TextLoader.Column("Education", DataKind.TX, 3), + new TextLoader.Column("EducationNum", DataKind.R4, 4), + new TextLoader.Column("MaritalStatus", DataKind.TX, 5), + new TextLoader.Column("Occupation", DataKind.TX, 6), + new TextLoader.Column("Relationship", DataKind.TX, 7), + new TextLoader.Column("Race", DataKind.TX, 8), + new TextLoader.Column("Sex", DataKind.TX, 9), + new TextLoader.Column("CapitalGain", DataKind.R4, 10), + new TextLoader.Column("CapitalLoss", DataKind.R4, 11), + new TextLoader.Column("HoursPerWeek", DataKind.R4, 12), + new TextLoader.Column("NativeCountry", DataKind.TX, 13), + new TextLoader.Column("Label", DataKind.Bool, 14), + } + }); + var trainDataPath = "F:/tlc/test/adult.train"; + var trainData = textLoader.Read(trainDataPath); + var validationData = textLoader.Read(trainDataPath); + var testData = textLoader.Read(trainDataPath); + + // preprocess + var preprocessorEstimator = ML.Transforms.Categorical.OneHotEncoding("Workclass", "Workclass") + .Append(ML.Transforms.Categorical.OneHotEncoding("Education", "Education")) + .Append(ML.Transforms.Categorical.OneHotEncoding("MaritalStatus", "MaritalStatus")) + .Append(ML.Transforms.Categorical.OneHotEncoding("Occupation", "Occupation")) + .Append(ML.Transforms.Categorical.OneHotEncoding("Relationship", "Relationship")) + .Append(ML.Transforms.Categorical.OneHotEncoding("Race", "Race")) + .Append(ML.Transforms.Categorical.OneHotEncoding("Sex", "Sex")) + .Append(ML.Transforms.Categorical.OneHotEncoding("NativeCountry", "NativeCountry")) + .Append(ML.Transforms.Concatenate(DefaultColumnNames.Features, + "Age", "Workclass", "Fnlwgt", "Education", "EducationNum", "MaritalStatus", "Occupation", "Relationship", + "Race", "Sex", "CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry")); + // train model + var trainer = ML.BinaryClassification.Trainers.LinearSupportVectorMachines(); + var estimatorChain = preprocessorEstimator.Append(trainer); + var model = estimatorChain.Fit(trainData); + } } } From 39bcd5bdbd07edbe2dc015492a9bc4c9ea50ce2c Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Wed, 2 Jan 2019 14:27:56 -0800 Subject: [PATCH 2/4] remove test --- .../TrainerEstimators/OnlineLinearTests.cs | 52 ------------------- 1 file changed, 52 deletions(-) diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/OnlineLinearTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/OnlineLinearTests.cs index fb46523341..32060e4587 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/OnlineLinearTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/OnlineLinearTests.cs @@ -55,57 +55,5 @@ public void OnlineLinearWorkout() Done(); } - - - [Fact] - public void OnlineLinearWorkout1() - { - // load data - var textLoader = new TextLoader(ML, - new TextLoader.Arguments() - { - Separator = ",", - HasHeader = true, - Column = new[] - { - new TextLoader.Column("Age", DataKind.R4, 0), - new TextLoader.Column("Workclass", DataKind.TX, 1), - new TextLoader.Column("Fnlwgt", DataKind.R4, 2), - new TextLoader.Column("Education", DataKind.TX, 3), - new TextLoader.Column("EducationNum", DataKind.R4, 4), - new TextLoader.Column("MaritalStatus", DataKind.TX, 5), - new TextLoader.Column("Occupation", DataKind.TX, 6), - new TextLoader.Column("Relationship", DataKind.TX, 7), - new TextLoader.Column("Race", DataKind.TX, 8), - new TextLoader.Column("Sex", DataKind.TX, 9), - new TextLoader.Column("CapitalGain", DataKind.R4, 10), - new TextLoader.Column("CapitalLoss", DataKind.R4, 11), - new TextLoader.Column("HoursPerWeek", DataKind.R4, 12), - new TextLoader.Column("NativeCountry", DataKind.TX, 13), - new TextLoader.Column("Label", DataKind.Bool, 14), - } - }); - var trainDataPath = "F:/tlc/test/adult.train"; - var trainData = textLoader.Read(trainDataPath); - var validationData = textLoader.Read(trainDataPath); - var testData = textLoader.Read(trainDataPath); - - // preprocess - var preprocessorEstimator = ML.Transforms.Categorical.OneHotEncoding("Workclass", "Workclass") - .Append(ML.Transforms.Categorical.OneHotEncoding("Education", "Education")) - .Append(ML.Transforms.Categorical.OneHotEncoding("MaritalStatus", "MaritalStatus")) - .Append(ML.Transforms.Categorical.OneHotEncoding("Occupation", "Occupation")) - .Append(ML.Transforms.Categorical.OneHotEncoding("Relationship", "Relationship")) - .Append(ML.Transforms.Categorical.OneHotEncoding("Race", "Race")) - .Append(ML.Transforms.Categorical.OneHotEncoding("Sex", "Sex")) - .Append(ML.Transforms.Categorical.OneHotEncoding("NativeCountry", "NativeCountry")) - .Append(ML.Transforms.Concatenate(DefaultColumnNames.Features, - "Age", "Workclass", "Fnlwgt", "Education", "EducationNum", "MaritalStatus", "Occupation", "Relationship", - "Race", "Sex", "CapitalGain", "CapitalLoss", "HoursPerWeek", "NativeCountry")); - // train model - var trainer = ML.BinaryClassification.Trainers.LinearSupportVectorMachines(); - var estimatorChain = preprocessorEstimator.Append(trainer); - var model = estimatorChain.Fit(trainData); - } } } From 55460fc20ca3d4a848d2b9f8fc09e8513834180c Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Wed, 2 Jan 2019 14:38:57 -0800 Subject: [PATCH 3/4] it's always entrypoints --- test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index 165a3f923d..75f83c5030 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -57,7 +57,7 @@ Trainers.LightGbmBinaryClassifier Train a LightGBM binary classification model. Trainers.LightGbmClassifier Train a LightGBM multi class model. Microsoft.ML.LightGBM.LightGbm TrainMultiClass Microsoft.ML.LightGBM.LightGbmArguments Microsoft.ML.EntryPoints.CommonOutputs+MulticlassClassificationOutput Trainers.LightGbmRanker Train a LightGBM ranking model. Microsoft.ML.LightGBM.LightGbm TrainRanking Microsoft.ML.LightGBM.LightGbmArguments Microsoft.ML.EntryPoints.CommonOutputs+RankingOutput Trainers.LightGbmRegressor LightGBM Regression Microsoft.ML.LightGBM.LightGbm TrainRegression Microsoft.ML.LightGBM.LightGbmArguments Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput -Trainers.LinearSvmBinaryClassifier Train a linear SVM. Microsoft.ML.Trainers.Online.LinearSvm TrainLinearSvm Microsoft.ML.Trainers.Online.LinearSvm+Arguments Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput +Trainers.LinearSvmBinaryClassifier Train a linear SVM. Microsoft.ML.Trainers.Online.LinearSvmTrainer TrainLinearSvm Microsoft.ML.Trainers.Online.LinearSvmTrainer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.LogisticRegressionBinaryClassifier Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function. Microsoft.ML.Learners.LogisticRegression TrainBinary Microsoft.ML.Learners.LogisticRegression+Arguments Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.LogisticRegressionClassifier Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function. Microsoft.ML.Learners.LogisticRegression TrainMultiClass Microsoft.ML.Learners.MulticlassLogisticRegression+Arguments Microsoft.ML.EntryPoints.CommonOutputs+MulticlassClassificationOutput Trainers.NaiveBayesClassifier Train a MultiClassNaiveBayesTrainer. Microsoft.ML.Trainers.MultiClassNaiveBayesTrainer TrainMultiClassNaiveBayesTrainer Microsoft.ML.Trainers.MultiClassNaiveBayesTrainer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+MulticlassClassificationOutput From 4631b4a65cabd237b8dbf725d3d761640cf3d597 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Wed, 2 Jan 2019 14:59:37 -0800 Subject: [PATCH 4/4] => --- .../Standard/Online/LinearSvm.cs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs index c1ce4fea2d..71d59cc430 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/LinearSvm.cs @@ -272,9 +272,7 @@ private protected override void CheckLabels(RoleMappedData data) } private protected override TrainStateBase MakeState(IChannel ch, int numFeatures, LinearModelParameters predictor) - { - return new TrainState(ch, numFeatures, predictor, this); - } + => new TrainState(ch, numFeatures, predictor, this); [TlcModule.EntryPoint(Name = "Trainers.LinearSvmBinaryClassifier", Desc = "Train a linear SVM.", UserName = UserNameValue, ShortName = ShortName)] public static CommonOutputs.BinaryClassificationOutput TrainLinearSvm(IHostEnvironment env, Arguments input) @@ -291,9 +289,9 @@ public static CommonOutputs.BinaryClassificationOutput TrainLinearSvm(IHostEnvir } protected override BinaryPredictionTransformer MakeTransformer(LinearBinaryModelParameters model, Schema trainSchema) - => new BinaryPredictionTransformer(Host, model, trainSchema, FeatureColumn.Name); + => new BinaryPredictionTransformer(Host, model, trainSchema, FeatureColumn.Name); public BinaryPredictionTransformer Train(IDataView trainData, IPredictor initialPredictor = null) - => TrainTransformer(trainData, initPredictor: initialPredictor); + => TrainTransformer(trainData, initPredictor: initialPredictor); } }