-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Fix CodeGen command and add a unit test #1654
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6a5093a
936e596
3eec8c4
d1c9eea
7ce9ae7
f9cfbc5
13a5ee3
d5d4d18
f8c5737
b904b08
a2e5b5f
0e1008e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| using System; | ||
| using System.Collections.Generic; | ||
| using Microsoft.ML.Legacy; | ||
| using Microsoft.ML.Runtime.Api; | ||
|
|
||
| namespace MLGeneratedCode | ||
| { | ||
| public class Program | ||
| { | ||
| /// <summary> | ||
| /// This is the input to the trained model. | ||
| /// | ||
| /// In most pipelines, not all columns that are used in training are also used in scoring. Namely, the label | ||
| /// and weight columns are almost never required at scoring time. Since we don't know which columns | ||
| /// are 'optional' in this sense, all the columns are listed below. | ||
| /// | ||
| /// You are free to remove any fields from the below class. If the fields are not required for scoring, the model | ||
| /// will continue to work. Otherwise, the exception will be thrown when a prediction engine is created. | ||
| /// | ||
| /// </summary> | ||
| public class InputData | ||
| { | ||
| public Single Label; | ||
|
|
||
| [VectorType(5)] | ||
| [ColumnName("F!1")] | ||
| public Single[] Column1 = new Single[5]; | ||
|
|
||
| [VectorType(4)] | ||
| public Single[] F2 = new Single[4]; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// This is the output of the scored model, the prediction. | ||
| /// | ||
| ///</summary> | ||
| public class ScoredOutput | ||
| { | ||
| public Boolean PredictedLabel; | ||
|
|
||
| public Single Score; | ||
|
|
||
| public Single Probability; | ||
|
|
||
| // These are all remaining available columns, either supplied as the input, or intermediate | ||
| // columns generated by the transforms. Materializing these columns has a performance cost, | ||
| // so they are commented out. Feel free to uncomment any column that is useful for your scenario. | ||
| #if false | ||
| public Single Label; | ||
|
|
||
| [VectorType(5)] | ||
| [ColumnName("F!1")] | ||
| public Single[] Column1; | ||
|
|
||
| [VectorType(4)] | ||
| public Single[] F2; | ||
|
|
||
| [VectorType(9)] | ||
| public Single[] Features; | ||
| #endif | ||
| } | ||
|
|
||
| /*public static void Main(string[] args) | ||
| { | ||
| string modelPath; | ||
| modelPath = "model.zip"; | ||
| PredictAsync(modelPath); | ||
| }*/ | ||
|
|
||
| /// <summary> | ||
| /// This method demonstrates how to run prediction. | ||
| /// | ||
| ///</summary> | ||
| public static async void PredictAsync(string modelPath) | ||
| { | ||
| var model = await PredictionModel.ReadAsync<InputData, ScoredOutput>(modelPath); | ||
|
|
||
| var inputData = new InputData(); | ||
| // TODO: populate the example's features. | ||
|
|
||
| var score = model.Predict(inputData); | ||
| // TODO: consume the resulting score. | ||
|
|
||
| var scores = model.Predict(new List<InputData> { inputData, inputData }); | ||
| // TODO: consume the resulting scores. | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -173,7 +173,7 @@ public TestParallelFasttreeInterface(ITestOutputHelper helper) | |
| { | ||
| } | ||
|
|
||
| [Fact] | ||
| [Fact(Skip = "'checker' is not a valid value for the 'parag' argument in FastTree")] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we provide a valid value instead of skipping the test ?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we do skip the test, should there be an issue filed on how to fix? In reply to: 237247906 [](ancestors = 237247906) |
||
| [TestCategory("ParallelFasttree")] | ||
| public void CheckFastTreeParallelInterface() | ||
| { | ||
|
|
@@ -185,7 +185,8 @@ public void CheckFastTreeParallelInterface() | |
| var trainArgs = string.Format( | ||
| "train data={{{0}}} loader=Text{{col=Label:0 col=F!1:1-5 col=F2:6-9}} xf=Concat{{col=Features:F!1,F2}} tr=FastTreeBinaryClassification{{lr=0.1 nl=12 mil=10 iter=1 parag=checker}} out={{{1}}}", | ||
| dataPath, modelOutPath); | ||
| MainForTest(trainArgs); | ||
| var res = MainForTest(trainArgs); | ||
| Assert.Equal(0, res); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -672,16 +672,15 @@ public void CommandShowSchema() | |
| } | ||
|
|
||
| [TestCategory(Cat)] | ||
| [Fact(Skip = "Need CoreTLC specific baseline update")] | ||
| [Fact] | ||
| public void CommandShowSchemaModel() | ||
| { | ||
| string trainDataPath = GetDataPath(@"..\UCI", "adult.test.tiny"); | ||
| string trainDataPath = GetDataPath("adult.tiny.with-schema.txt"); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be TestDataset.adult.testFilename? |
||
| string modelPath = ModelPath().Path; | ||
| string args = | ||
| string.Format( | ||
| @"train data={{{0}}} | ||
| loader=Text{{ | ||
| sep=, | ||
| header=+ | ||
| col=NumFeatures:Num:9-14 | ||
| col=CatFeaturesText:TX:0~* | ||
|
|
@@ -922,29 +921,27 @@ public void CommandCrossValidationAndSave() | |
| // multiple different FastTree (Ranking and Classification for example) instances in different threads. | ||
| // FastTree internally fails if we try to run it simultaneously and if this happens we wouldn't get model file for training. | ||
| [TestCategory(Cat)] | ||
| [Fact(Skip = "Need CoreTLC specific baseline update")] | ||
| [Fact] | ||
| public void CommandTrainFastTreeInDifferentThreads() | ||
| { | ||
| var dataPath = GetDataPath("vw.dat"); | ||
| var firstModelOutPath = CreateOutputPath("TreeTransform-model2.zip"); | ||
| var secondModelOutPath = CreateOutputPath("TreeTransform-model1.zip"); | ||
| var trainArgs = "Train tr=SDCA loader=TextLoader{sep=space col=Label:R4:0 col=Features:R4:1 col=Name:TX:2,5-17 col=Cat:TX:3 col=Cat01:TX:4}" + "xf=CategoricalTransform{col=Cat col=Cat01} xf=Concat{col=Features:Features,Cat,Cat01} xf=TreeFeat{tr=FastTreeBinaryClassification} xf=TreeFeat" + "{tr=FastTreeRanking} xf=Concat{col=Features:Features,Leaves,Paths,Trees}"; | ||
| var dataPath = GetDataPath(TestDatasets.adult.testFilename); | ||
| var firstModelOutPath = DeleteOutputPath("TreeTransform-model2.zip"); | ||
| var secondModelOutPath = DeleteOutputPath("TreeTransform-model1.zip"); | ||
| var trainArgs = $"Train tr=SDCA {TestDatasets.adult.loaderSettings} {TestDatasets.adult.mamlExtraSettings[0]} {TestDatasets.adult.mamlExtraSettings[1]}" + | ||
| " xf=TreeFeat{tr=FastTreeBinaryClassification} xf=TreeFeat{tr=FastTreeRanking} xf=Concat{col=Features:Features,Leaves,Paths,Trees}"; | ||
|
|
||
| var firsttrainArgs = string.Format("{0} data={1} out={2}", trainArgs, dataPath, firstModelOutPath.Path); | ||
| var secondTrainArgs = string.Format("{0} data={1} out={2}", trainArgs, dataPath, secondModelOutPath.Path); | ||
| var firsttrainArgs = $"{trainArgs} data={dataPath} out={firstModelOutPath}"; | ||
| var secondTrainArgs = $"{trainArgs} data={dataPath} out={secondModelOutPath}"; | ||
|
|
||
| var t = new Task[2]; | ||
| t[0] = new Task(() => { MainForTest(firsttrainArgs); }); | ||
| t[1] = new Task(() => { MainForTest(secondTrainArgs); }); | ||
| var t = new Task<int>[2]; | ||
| t[0] = new Task<int>(() => MainForTest(firsttrainArgs)); | ||
| t[1] = new Task<int>(() => MainForTest(secondTrainArgs)); | ||
| t[0].Start(); | ||
| t[1].Start(); | ||
| Task.WaitAll(t); | ||
|
|
||
| if (!File.Exists(firstModelOutPath.Path)) | ||
| Fail("First model doesn't exist"); | ||
| if (!File.Exists(secondModelOutPath.Path)) | ||
| Fail("Second model doesn't exist"); | ||
| Done(); | ||
| Assert.Equal(0, t[0].Result); | ||
| Assert.Equal(0, t[1].Result); | ||
| } | ||
|
|
||
| [TestCategory(Cat), TestCategory("FastTree")] | ||
|
|
@@ -2096,5 +2093,32 @@ public void Datatypes() | |
| TestCore("savedata", intermediateData.Path, "loader=binary", "saver=text", textOutputPath.Arg("dout")); | ||
| Done(); | ||
| } | ||
|
|
||
| [Fact] | ||
| public void CommandCodeGen() | ||
| { | ||
| if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) | ||
| return; | ||
|
|
||
| // REVIEW: this tests that the generated output matches the baseline. This does NOT baseline | ||
| // the console output. Currently, there's no console output either, but if some is added, a baseline test | ||
| // will be in order. | ||
|
|
||
| // First, train a model on breast-cancer. | ||
| var dataPath = GetDataPath("breast-cancer.txt"); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TestDatasets.breastCancer.testFilename? |
||
| var modelOutPath = DeleteOutputPath("Command", "codegen-model.zip"); | ||
| var csOutPath = DeleteOutputPath("Command", "codegen-out.cs"); | ||
|
|
||
| var trainArgs = string.Format( | ||
| "train data={{{0}}} loader=Text{{col=Label:0 col=F!1:1-5 col=F2:6-9}} xf=Concat{{col=Features:F!1,F2}} tr=lr out={{{1}}}", | ||
| dataPath, modelOutPath); | ||
| MainForTest(trainArgs); | ||
|
|
||
| // Now, generate the prediction code. | ||
| MainForTest(string.Format("codegen in={{{0}}} cs={{{1}}} modelNameOverride=model.zip", modelOutPath, csOutPath)); | ||
| CheckEquality("Command", "codegen-out.cs"); | ||
|
|
||
| Done(); | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PredictionModel is in Microsoft.ML.Legacy which we don't reference in usings.
I doubt this code would compile.
Also do we actually want it to generate Legacy things or we should switch to new api?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I opened issue #1738 to convert the command to use the new api.