Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/Microsoft.ML.Api/GenerateCodeCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ public void Run()
bool isScoreColumn = scorer.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.ScoreColumnSetId, i) != null;

var sb = isScoreColumn ? scoreSb : nonScoreSb;

if (sb.Length > 0)
sb.AppendLine();

Expand Down
13 changes: 11 additions & 2 deletions src/Microsoft.ML.Api/GeneratedCodeTemplate.csresource
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Legacy;
using Microsoft.ML.Runtime.Api;

namespace MLGeneratedCode
{
Expand Down Expand Up @@ -31,6 +32,14 @@ public class ScoredOutput
{
/*#SCORE_CLASS_DECL#*/
/*#/SCORE_CLASS_DECL#*/

// These are all remaining available columns, either supplied as the input, or intermediate
// columns generated by the transforms. Materializing these columns has a performance cost,
// so they are commented out. Feel free to uncomment any column that is useful for your scenario.
#if false
/*#SCORED_EXAMPLE_CLASS_DECL#*/
/*#/SCORED_EXAMPLE_CLASS_DECL#*/
#endif
}

/*public static void Main(string[] args)
Expand All @@ -45,7 +54,7 @@ PredictAsync(modelPath);
/// This method demonstrates how to run prediction.
///
///</summary>
public static void Predict(string modelPath)
public static async void PredictAsync(string modelPath)
{
var model = await PredictionModel.ReadAsync<InputData, ScoredOutput>(modelPath);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PredictionModel [](start = 22, length = 15)

PredictionModel is in Microsoft.ML.Legacy which we don't reference in usings.
I doubt this code would compile.
Also do we actually want it to generate Legacy things or we should switch to new api?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I opened issue #1738 to convert the command to use the new api.


Expand Down
273 changes: 273 additions & 0 deletions test/BaselineOutput/Common/Command/CommandShowSchemaModel-out.txt

Large diffs are not rendered by default.

88 changes: 88 additions & 0 deletions test/BaselineOutput/Common/Command/codegen-out.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
using System;
using System.Collections.Generic;
using Microsoft.ML.Legacy;
using Microsoft.ML.Runtime.Api;

namespace MLGeneratedCode
{
public class Program
{
/// <summary>
/// This is the input to the trained model.
///
/// In most pipelines, not all columns that are used in training are also used in scoring. Namely, the label
/// and weight columns are almost never required at scoring time. Since we don't know which columns
/// are 'optional' in this sense, all the columns are listed below.
///
/// You are free to remove any fields from the below class. If the fields are not required for scoring, the model
/// will continue to work. Otherwise, the exception will be thrown when a prediction engine is created.
///
/// </summary>
public class InputData
{
public Single Label;

[VectorType(5)]
[ColumnName("F!1")]
public Single[] Column1 = new Single[5];

[VectorType(4)]
public Single[] F2 = new Single[4];
}

/// <summary>
/// This is the output of the scored model, the prediction.
///
///</summary>
public class ScoredOutput
{
public Boolean PredictedLabel;

public Single Score;

public Single Probability;

// These are all remaining available columns, either supplied as the input, or intermediate
// columns generated by the transforms. Materializing these columns has a performance cost,
// so they are commented out. Feel free to uncomment any column that is useful for your scenario.
#if false
public Single Label;

[VectorType(5)]
[ColumnName("F!1")]
public Single[] Column1;

[VectorType(4)]
public Single[] F2;

[VectorType(9)]
public Single[] Features;
#endif
}

/*public static void Main(string[] args)
{
string modelPath;
modelPath = "model.zip";
PredictAsync(modelPath);
}*/

/// <summary>
/// This method demonstrates how to run prediction.
///
///</summary>
public static async void PredictAsync(string modelPath)
{
var model = await PredictionModel.ReadAsync<InputData, ScoredOutput>(modelPath);

var inputData = new InputData();
// TODO: populate the example's features.

var score = model.Predict(inputData);
// TODO: consume the resulting score.

var scores = model.Predict(new List<InputData> { inputData, inputData });
// TODO: consume the resulting scores.
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ public TestParallelFasttreeInterface(ITestOutputHelper helper)
{
}

[Fact]
[Fact(Skip = "'checker' is not a valid value for the 'parag' argument in FastTree")]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we provide a valid value instead of skipping the test ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we do skip the test, should there be an issue filed on how to fix?


In reply to: 237247906 [](ancestors = 237247906)

[TestCategory("ParallelFasttree")]
public void CheckFastTreeParallelInterface()
{
Expand All @@ -185,7 +185,8 @@ public void CheckFastTreeParallelInterface()
var trainArgs = string.Format(
"train data={{{0}}} loader=Text{{col=Label:0 col=F!1:1-5 col=F2:6-9}} xf=Concat{{col=Features:F!1,F2}} tr=FastTreeBinaryClassification{{lr=0.1 nl=12 mil=10 iter=1 parag=checker}} out={{{1}}}",
dataPath, modelOutPath);
MainForTest(trainArgs);
var res = MainForTest(trainArgs);
Assert.Equal(0, res);
}
}
}
7 changes: 4 additions & 3 deletions test/Microsoft.ML.TestFramework/BaseTestBaseline.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Runtime.Tools;
Expand Down Expand Up @@ -100,6 +101,7 @@ protected override void Initialize()
_env = new ConsoleEnvironment(42, outWriter: LogWriter, errWriter: LogWriter)
.AddStandardComponents();
ML = new MLContext(42);
ML.AddStandardComponents();
}

// This method is used by subclass to dispose of disposable objects
Expand Down Expand Up @@ -815,10 +817,9 @@ protected static StreamReader OpenReader(string path)
/// This method is used in unit tests when the output is not baselined.
/// If the output is to be baselined and compared, the other overload should be used.
/// </summary>
protected static int MainForTest(string args)
protected int MainForTest(string args)
{
var env = new MLContext();
return Maml.MainCore(env, args, false);
return Maml.MainCore(ML, args, false);
}
}

Expand Down
2 changes: 2 additions & 0 deletions test/Microsoft.ML.TestFramework/EnvironmentExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Ensemble;
using Microsoft.ML.Runtime.Learners;
Expand All @@ -28,6 +29,7 @@ public static TEnvironment AddStandardComponents<TEnvironment>(this TEnvironment
#pragma warning disable 612
env.ComponentCatalog.RegisterAssembly(typeof(Experiment).Assembly); // ML.Legacy
#pragma warning restore 612
env.ComponentCatalog.RegisterAssembly(typeof(ComponentCreation).Assembly); // ML.Api
return env;
}
}
Expand Down
60 changes: 42 additions & 18 deletions test/Microsoft.ML.TestFramework/TestCommandBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -672,16 +672,15 @@ public void CommandShowSchema()
}

[TestCategory(Cat)]
[Fact(Skip = "Need CoreTLC specific baseline update")]
[Fact]
public void CommandShowSchemaModel()
{
string trainDataPath = GetDataPath(@"..\UCI", "adult.test.tiny");
string trainDataPath = GetDataPath("adult.tiny.with-schema.txt");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be TestDataset.adult.testFilename?

string modelPath = ModelPath().Path;
string args =
string.Format(
@"train data={{{0}}}
loader=Text{{
sep=,
header=+
col=NumFeatures:Num:9-14
col=CatFeaturesText:TX:0~*
Expand Down Expand Up @@ -922,29 +921,27 @@ public void CommandCrossValidationAndSave()
// multiple different FastTree (Ranking and Classification for example) instances in different threads.
// FastTree internally fails if we try to run it simultaneously and if this happens we wouldn't get model file for training.
[TestCategory(Cat)]
[Fact(Skip = "Need CoreTLC specific baseline update")]
[Fact]
public void CommandTrainFastTreeInDifferentThreads()
{
var dataPath = GetDataPath("vw.dat");
var firstModelOutPath = CreateOutputPath("TreeTransform-model2.zip");
var secondModelOutPath = CreateOutputPath("TreeTransform-model1.zip");
var trainArgs = "Train tr=SDCA loader=TextLoader{sep=space col=Label:R4:0 col=Features:R4:1 col=Name:TX:2,5-17 col=Cat:TX:3 col=Cat01:TX:4}" + "xf=CategoricalTransform{col=Cat col=Cat01} xf=Concat{col=Features:Features,Cat,Cat01} xf=TreeFeat{tr=FastTreeBinaryClassification} xf=TreeFeat" + "{tr=FastTreeRanking} xf=Concat{col=Features:Features,Leaves,Paths,Trees}";
var dataPath = GetDataPath(TestDatasets.adult.testFilename);
var firstModelOutPath = DeleteOutputPath("TreeTransform-model2.zip");
var secondModelOutPath = DeleteOutputPath("TreeTransform-model1.zip");
var trainArgs = $"Train tr=SDCA {TestDatasets.adult.loaderSettings} {TestDatasets.adult.mamlExtraSettings[0]} {TestDatasets.adult.mamlExtraSettings[1]}" +
" xf=TreeFeat{tr=FastTreeBinaryClassification} xf=TreeFeat{tr=FastTreeRanking} xf=Concat{col=Features:Features,Leaves,Paths,Trees}";

var firsttrainArgs = string.Format("{0} data={1} out={2}", trainArgs, dataPath, firstModelOutPath.Path);
var secondTrainArgs = string.Format("{0} data={1} out={2}", trainArgs, dataPath, secondModelOutPath.Path);
var firsttrainArgs = $"{trainArgs} data={dataPath} out={firstModelOutPath}";
var secondTrainArgs = $"{trainArgs} data={dataPath} out={secondModelOutPath}";

var t = new Task[2];
t[0] = new Task(() => { MainForTest(firsttrainArgs); });
t[1] = new Task(() => { MainForTest(secondTrainArgs); });
var t = new Task<int>[2];
t[0] = new Task<int>(() => MainForTest(firsttrainArgs));
t[1] = new Task<int>(() => MainForTest(secondTrainArgs));
t[0].Start();
t[1].Start();
Task.WaitAll(t);

if (!File.Exists(firstModelOutPath.Path))
Fail("First model doesn't exist");
if (!File.Exists(secondModelOutPath.Path))
Fail("Second model doesn't exist");
Done();
Assert.Equal(0, t[0].Result);
Assert.Equal(0, t[1].Result);
}

[TestCategory(Cat), TestCategory("FastTree")]
Expand Down Expand Up @@ -2096,5 +2093,32 @@ public void Datatypes()
TestCore("savedata", intermediateData.Path, "loader=binary", "saver=text", textOutputPath.Arg("dout"));
Done();
}

[Fact]
public void CommandCodeGen()
{
if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
return;

// REVIEW: this tests that the generated output matches the baseline. This does NOT baseline
// the console output. Currently, there's no console output either, but if some is added, a baseline test
// will be in order.

// First, train a model on breast-cancer.
var dataPath = GetDataPath("breast-cancer.txt");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TestDatasets.breastCancer.testFilename?

var modelOutPath = DeleteOutputPath("Command", "codegen-model.zip");
var csOutPath = DeleteOutputPath("Command", "codegen-out.cs");

var trainArgs = string.Format(
"train data={{{0}}} loader=Text{{col=Label:0 col=F!1:1-5 col=F2:6-9}} xf=Concat{{col=Features:F!1,F2}} tr=lr out={{{1}}}",
dataPath, modelOutPath);
MainForTest(trainArgs);

// Now, generate the prediction code.
MainForTest(string.Format("codegen in={{{0}}} cs={{{1}}} modelNameOverride=model.zip", modelOutPath, csOutPath));
CheckEquality("Command", "codegen-out.cs");

Done();
}
}
}