Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Microsoft.ML/Models/ClassificationMetrics.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace Microsoft.ML.Models
/// </summary>
public sealed class ClassificationMetrics
{
public static ClassificationMetrics Empty = new ClassificationMetrics();
private ClassificationMetrics()
{
}
Expand Down
121 changes: 121 additions & 0 deletions test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.CommandLine;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Learners;

namespace Microsoft.ML.Benchmarks
{
public class KMeansAndLogisticRegressionBench
{
private static string s_dataPath;

[Benchmark]
public IPredictor TrainKMeansAndLR() => TrainKMeansAndLRCore();

[GlobalSetup]
public void Setup()
{
s_dataPath = Program.GetDataPath("adult.train");
StochasticDualCoordinateAscentClassifierBench.s_metrics = Models.ClassificationMetrics.Empty;
}

private static IPredictor TrainKMeansAndLRCore()
{
string dataPath = s_dataPath;

using (var env = new TlcEnvironment(seed: 1))
{
// Pipeline
var loader = new TextLoader(env,
new TextLoader.Arguments()
{
HasHeader = true,
Separator = ",",
Column = new[] {
new TextLoader.Column()
{
Name = "Label",
Source = new [] { new TextLoader.Range() { Min = 14, Max = 14} },
Type = DataKind.R4
},
new TextLoader.Column()
{
Name = "CatFeatures",
Source = new [] {
new TextLoader.Range() { Min = 1, Max = 1 },
new TextLoader.Range() { Min = 3, Max = 3 },
new TextLoader.Range() { Min = 5, Max = 9 },
new TextLoader.Range() { Min = 13, Max = 13 }
},
Type = DataKind.TX
},
new TextLoader.Column()
{
Name = "NumFeatures",
Source = new [] {
new TextLoader.Range() { Min = 0, Max = 0 },
new TextLoader.Range() { Min = 2, Max = 2 },
new TextLoader.Range() { Min = 4, Max = 4 },
new TextLoader.Range() { Min = 10, Max = 12 }
},
Type = DataKind.R4
}
}
}, new MultiFileSource(dataPath));

IDataTransform trans = CategoricalTransform.Create(env, new CategoricalTransform.Arguments
{
Column = new[]
{
new CategoricalTransform.Column { Name = "CatFeatures", Source = "CatFeatures" }
}
}, loader);

trans = NormalizeTransform.CreateMinMaxNormalizer(env, trans, "NumFeatures");
trans = new ConcatTransform(env, trans, "Features", "NumFeatures", "CatFeatures");
trans = TrainAndScoreTransform.Create(env, new TrainAndScoreTransform.Arguments
{
Trainer = new SubComponent<ITrainer, SignatureTrainer>("KMeans", "k=100"),
FeatureColumn = "Features"
}, trans);
trans = new ConcatTransform(env, trans, "Features", "Features", "Score");

// Train
var trainer = new LogisticRegression(env, new LogisticRegression.Arguments() { EnforceNonNegativity = true, OptTol = 1e-3f });
var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
return trainer.Train(trainRoles);
}
}

public class IrisData
{
[Column("0")]
public float Label;

[Column("1")]
public float SepalLength;

[Column("2")]
public float SepalWidth;

[Column("3")]
public float PetalLength;

[Column("4")]
public float PetalWidth;
}

public class IrisPrediction
{
[ColumnName("Score")]
public float[] PredictedLabels;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<PackageReference Include="BenchmarkDotNet" Version="$(BenchmarkDotNetVersion)" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.StandardLearners\Microsoft.ML.StandardLearners.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML\Microsoft.ML.csproj" />
</ItemGroup>
Expand Down
3 changes: 2 additions & 1 deletion test/Microsoft.ML.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ static void Main(string[] args)
private static IConfig CreateCustomConfig()
=> DefaultConfig.Instance
.With(Job.Default
.WithWarmupCount(1) // 1 warmup iteration is enough for the benchmarks we have here
.WithMaxIterationCount(20)
.With(InProcessToolchain.Instance))
.With(new ClassificationMetricsColumn("AccuracyMacro", "Macro-average accuracy of the model"))
.With(new ClassificationMetricsColumn(nameof(ClassificationMetrics.AccuracyMacro), "Macro-average accuracy of the model"))
.With(MemoryDiagnoser.Default);

internal static string GetDataPath(string name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,17 @@

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Engines;
using Microsoft.ML.Data;
using Microsoft.ML.Models;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Learners;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Threading;

namespace Microsoft.ML.Benchmarks
{
Expand All @@ -19,6 +23,7 @@ public class StochasticDualCoordinateAscentClassifierBench
internal static ClassificationMetrics s_metrics;
private static PredictionModel<IrisData, IrisPrediction> s_trainedModel;
private static string s_dataPath;
private static string s_sentimentDataPath;
private static IrisData[][] s_batches;
private static readonly int[] s_batchSizes = new int[] { 1, 2, 5 };
private readonly Random r = new Random(0);
Expand All @@ -31,14 +36,18 @@ public class StochasticDualCoordinateAscentClassifierBench
PetalWidth = 5.1f,
};

[Benchmark]
public IPredictor TrainSentiment() => TrainSentimentCore();

[GlobalSetup]
public void Setup()
{
s_dataPath = Program.GetDataPath("iris.txt");
s_sentimentDataPath = Program.GetDataPath("wikipedia-detox-250-line-data.tsv");
s_trainedModel = TrainCore();
IrisPrediction prediction = s_trainedModel.Predict(s_example);

var testData = new TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true);
var testData = new Data.TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true);
var evaluator = new ClassificationEvaluator();
s_metrics = evaluator.Evaluate(s_trainedModel, testData);

Expand Down Expand Up @@ -79,7 +88,7 @@ private static PredictionModel<IrisData, IrisPrediction> TrainCore()
{
var pipeline = new LearningPipeline();

pipeline.Add(new TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true));
pipeline.Add(new Data.TextLoader(s_dataPath).CreateFrom<IrisData>(useHeader: true));
pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
"SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

Expand All @@ -89,6 +98,77 @@ private static PredictionModel<IrisData, IrisPrediction> TrainCore()
return model;
}

private static IPredictor TrainSentimentCore()
{
Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture; // the input file contains numbers with `.` decimal separator

var dataPath = s_sentimentDataPath;
using (var env = new TlcEnvironment(seed: 1))
{
// Pipeline
var loader = new TextLoader(env,
new TextLoader.Arguments()
{
AllowQuoting = false,
AllowSparse = false,
Separator = "tab",
HasHeader = true,
Column = new[]
{
new TextLoader.Column()
{
Name = "Label",
Source = new [] { new TextLoader.Range() { Min=0, Max=0} },
Type = DataKind.Num
},

new TextLoader.Column()
{
Name = "SentimentText",
Source = new [] { new TextLoader.Range() { Min=1, Max=1} },
Type = DataKind.Text
}
}
}, new MultiFileSource(dataPath));

var text = TextTransform.Create(env, new TextTransform.Arguments()
{
Column = new TextTransform.Column
{
Name = "WordEmbeddings",
Source = new[] { "SentimentText" }
},
KeepDiacritics = false,
KeepPunctuations = false,
TextCase = Runtime.TextAnalytics.TextNormalizerTransform.CaseNormalizationMode.Lower,
OutputTokens = true,
StopWordsRemover = new Runtime.TextAnalytics.PredefinedStopWordsRemoverFactory(),
VectorNormalizer = TextTransform.TextNormKind.None,
CharFeatureExtractor = null,
WordFeatureExtractor = null,
},
loader);

var trans = new WordEmbeddingsTransform(env, new WordEmbeddingsTransform.Arguments()
{
Column = new WordEmbeddingsTransform.Column[1]
{
new WordEmbeddingsTransform.Column
{
Name = "Features",
Source = "WordEmbeddings_TransformedText"
}
},
ModelKind = WordEmbeddingsTransform.PretrainedModelKind.Sswe,
}, text);
// Train
var trainer = new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments() { MaxIterations = 20 });

var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
return trainer.Train(trainRoles);
}
}

public class IrisData
{
[Column("0")]
Expand Down