Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Microsoft.ML/Models/ClassificationMetrics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace Microsoft.ML.Models
/// </summary>
public sealed class ClassificationMetrics
{
public static ClassificationMetrics Empty = new ClassificationMetrics();
private ClassificationMetrics()
{
}
Expand Down
121 changes: 121 additions & 0 deletions test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.CommandLine;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Learners;

namespace Microsoft.ML.Benchmarks
{
public class KMeansAndLogisticRegressionBench
{
private static string s_dataPath;

[Benchmark]
public IPredictor TrainKMeansAndLR() => TrainKMeansAndLRCore();

[GlobalSetup]
public void Setup()
{
s_dataPath = Program.GetDataPath("adult.train");
StochasticDualCoordinateAscentClassifierBench.s_metrics = Models.ClassificationMetrics.Empty;
}

private static IPredictor TrainKMeansAndLRCore()
{
string dataPath = s_dataPath;

using (var env = new TlcEnvironment(seed: 1))
{
// Pipeline
var loader = new TextLoader(env,
new TextLoader.Arguments()
{
HasHeader = true,
Separator = ",",
Column = new[] {
new TextLoader.Column()
{
Name = "Label",
Source = new [] { new TextLoader.Range() { Min = 14, Max = 14} },
Type = DataKind.R4
},
new TextLoader.Column()
{
Name = "CatFeatures",
Source = new [] {
new TextLoader.Range() { Min = 1, Max = 1 },
new TextLoader.Range() { Min = 3, Max = 3 },
new TextLoader.Range() { Min = 5, Max = 9 },
new TextLoader.Range() { Min = 13, Max = 13 }
},
Type = DataKind.TX
},
new TextLoader.Column()
{
Name = "NumFeatures",
Source = new [] {
new TextLoader.Range() { Min = 0, Max = 0 },
new TextLoader.Range() { Min = 2, Max = 2 },
new TextLoader.Range() { Min = 4, Max = 4 },
new TextLoader.Range() { Min = 10, Max = 12 }
},
Type = DataKind.R4
}
}
}, new MultiFileSource(dataPath));

IDataTransform trans = CategoricalTransform.Create(env, new CategoricalTransform.Arguments
{
Column = new[]
{
new CategoricalTransform.Column { Name = "CatFeatures", Source = "CatFeatures" }
}
}, loader);

trans = NormalizeTransform.CreateMinMaxNormalizer(env, trans, "NumFeatures");
trans = new ConcatTransform(env, trans, "Features", "NumFeatures", "CatFeatures");
trans = TrainAndScoreTransform.Create(env, new TrainAndScoreTransform.Arguments
{
Trainer = new SubComponent<ITrainer, SignatureTrainer>("KMeans", "k=100"),
FeatureColumn = "Features"
}, trans);
trans = new ConcatTransform(env, trans, "Features", "Features", "Score");

// Train
var trainer = new LogisticRegression(env, new LogisticRegression.Arguments() { EnforceNonNegativity = true, OptTol = 1e-3f });
var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
return trainer.Train(trainRoles);
}
}

public class IrisData
{
[Column("0")]
public float Label;

[Column("1")]
public float SepalLength;

[Column("2")]
public float SepalWidth;

[Column("3")]
public float PetalLength;

[Column("4")]
public float PetalWidth;
}

public class IrisPrediction
{
[ColumnName("Score")]
public float[] PredictedLabels;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
<PackageReference Include="BenchmarkDotNet" Version="0.10.14" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.StandardLearners\Microsoft.ML.StandardLearners.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML\Microsoft.ML.csproj" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using Microsoft.ML.Data;
using Microsoft.ML.Models;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Learners;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
using System;
using System.Collections.Generic;
using System.Linq;

namespace Microsoft.ML.Benchmarks
{
Expand All @@ -20,6 +21,7 @@ public class StochasticDualCoordinateAscentClassifierBench
internal static ClassificationMetrics s_metrics;
private static PredictionModel<IrisData, IrisPrediction> s_trainedModel;
private static string s_dataPath;
private static string s_sentimentDataPath;
private static IrisData[][] s_batches;
private static readonly int[] s_batchSizes = new int[] { 1, 2, 5 };
private readonly Random r = new Random(0);
Expand All @@ -44,14 +46,18 @@ public class StochasticDualCoordinateAscentClassifierBench
[Benchmark]
public IEnumerable<IrisPrediction> PredictIrisBatchOf5() => s_trainedModel.Predict(s_batches[2]);

[Benchmark]
public IPredictor TrainSentiment() => TrainSentimentCore();

[GlobalSetup]
public void Setup()
{
s_dataPath = Program.GetDataPath("iris.txt");
s_sentimentDataPath = Program.GetDataPath("wikipedia-detox-250-line-data.tsv");
s_trainedModel = TrainCore();
IrisPrediction prediction = s_trainedModel.Predict(s_example);

var testData = new TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true);
var testData = new Data.TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true);
var evaluator = new ClassificationEvaluator();
s_metrics = evaluator.Evaluate(s_trainedModel, testData);

Expand All @@ -71,7 +77,7 @@ private static PredictionModel<IrisData, IrisPrediction> TrainCore()
{
var pipeline = new LearningPipeline();

pipeline.Add(new TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true));
pipeline.Add(new Data.TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true));
pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
"SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

Expand All @@ -81,6 +87,75 @@ private static PredictionModel<IrisData, IrisPrediction> TrainCore()
return model;
}

private static IPredictor TrainSentimentCore()
{
var dataPath = s_sentimentDataPath;
using (var env = new TlcEnvironment(seed: 1))
{
// Pipeline
var loader = new TextLoader(env,
new TextLoader.Arguments()
{
AllowQuoting = false,
AllowSparse = false,
Separator = "tab",
HasHeader = true,
Column = new[]
{
new TextLoader.Column()
{
Name = "Label",
Source = new [] { new TextLoader.Range() { Min=0, Max=0} },
Type = DataKind.Num
},

new TextLoader.Column()
{
Name = "SentimentText",
Source = new [] { new TextLoader.Range() { Min=1, Max=1} },
Type = DataKind.Text
}
}
}, new MultiFileSource(dataPath));

var text = TextTransform.Create(env, new TextTransform.Arguments()
{
Column = new TextTransform.Column
{
Name = "WordEmbeddings",
Source = new[] { "SentimentText" }
},
KeepDiacritics = false,
KeepPunctuations = false,
TextCase = Runtime.TextAnalytics.TextNormalizerTransform.CaseNormalizationMode.Lower,
OutputTokens = true,
StopWordsRemover = new Runtime.TextAnalytics.PredefinedStopWordsRemoverFactory(),
VectorNormalizer = TextTransform.TextNormKind.None,
CharFeatureExtractor = null,
WordFeatureExtractor = null,
},
loader);

var trans = new WordEmbeddingsTransform(env, new WordEmbeddingsTransform.Arguments()
{
Column = new WordEmbeddingsTransform.Column[1]
{
new WordEmbeddingsTransform.Column
{
Name = "Features",
Source = "WordEmbeddings_TransformedText"
}
},
ModelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe,
}, text);
// Train
var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments() { MaxIterations = 20 });

var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
return trainer.Train(trainRoles);
}
}

public class IrisData
{
[Column("0")]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.FastTree;
using Microsoft.ML.Runtime.Internal.Calibration;
using Microsoft.ML.Runtime.Learners;
using Microsoft.ML.Runtime.Model;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
Expand Down Expand Up @@ -193,6 +194,7 @@ public void TrainAndPredictSentimentModelWithDirectionInstantiationTestWithWordE
Assert.Equal(1.0, (double)summary[0].Value, 1);
}
}

private BinaryClassificationMetrics EvaluateBinary(IHostEnvironment env, IDataView scoredData)
{
var dataEval = new RoleMappedData(scoredData, label: "Label", feature: "Features", opt: true);
Expand Down