From a3d1c9eab65f8a2a5a38ce3379599cff21b03658 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 18 Dec 2018 07:44:20 -0800 Subject: [PATCH 01/20] Reached parity with TLC on breast-cancer data with all NaNs removed: OVERALL RESULTS --------------------------------------- L1(avg): 0.090945 (0.0000) L2(avg): 0.024789 (0.0000) RMS(avg): 0.157445 (0.0000) Loss-fn(avg): 0.024789 (0.0000) R Squared: 0.891027 (0.0000) --- docs/samples/Microsoft.ML.Samples/Program.cs | 92 ++++++++++++++- src/Microsoft.ML.FastTree/GamTrainer.cs | 117 +++++++++++++++++-- 2 files changed, 200 insertions(+), 9 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index d64bdb55a1..a2813707a0 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -1,4 +1,13 @@ -using Microsoft.ML.Samples.Dynamic; +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Training; +using Microsoft.ML.Samples.Dynamic; +using System.Collections.Generic; +using System; +using System.IO; +using Microsoft.ML.Runtime.Tools; +using Microsoft.ML.Runtime.Internal.Utilities; +using System.Linq; namespace Microsoft.ML.Samples { @@ -6,7 +15,86 @@ internal static class Program { static void Main(string[] args) { - TensorFlowTransformExample.TensorFlowScoringSample(); + //MakeBinarySearchTree(10); + TestGam(); + } + + + private static int[] MakeBinarySearchTree(int numInternalNodes) + { + var binIndices = Enumerable.Range(0, numInternalNodes - 1).ToArray(); + var bstIndices = new List(); + + MakeBinarySearchTreeRecursive(binIndices, 0, binIndices.Length - 1, bstIndices); + var ret = bstIndices.ToArray(); + return ret; + } + + private static void MakeBinarySearchTreeRecursive( + int[] array, int lower, int upper, List bstIndices) + { + if (lower > upper) + { + var mid = (lower + upper) / 2; + bstIndices.Add(array[mid]); + MakeBinarySearchTreeRecursive(array, lower, mid - 1, bstIndices); + MakeBinarySearchTreeRecursive(array, mid + 1, upper, bstIndices); + } + } + + private static void TestGam() + { + var mlContext = new MLContext(seed: 0, conc: 0); + + var idv = mlContext.Data.CreateTextReader( + new TextLoader.Arguments() + { + HasHeader = false, + Column = new[] + { + new TextLoader.Column("Label", DataKind.R4, 0), + new TextLoader.Column("Features", DataKind.R4, 1, 9), + } + }).Read(@"F:\temp\ini\data\breast-cancer-noNan.txt"); + + //var pipeline = mlContext.Transforms.Normalize("Features") + // .Append(mlContext.Regression.Trainers.GeneralizedAdditiveModels()); + + var pipeline = mlContext.Regression.Trainers.GeneralizedAdditiveModels(); + + var model = pipeline.Fit(idv); + var data = model.Transform(idv); + + var roleMappedSchema = new RoleMappedSchema(data.Schema, false, + new KeyValuePair(RoleMappedSchema.ColumnRole.Feature, "Features"), + new KeyValuePair(RoleMappedSchema.ColumnRole.Label, "Label")); + + //using (var fs = File.Create(@"F:\temp\ini\model3.zip")) + // mlContext.Model.Save(model, fs); + + using (StreamWriter writer = new StreamWriter(@"F:\temp\ini\model5.ini")) + model.Model.SaveAsIni(writer, roleMappedSchema); + + var results = mlContext.Regression.Evaluate(data); + + //using (var fs = File.OpenRead(@"F:\temp\ini\model3.zip")) + //{ + // var loadedModel = mlContext.Model.Load(fs); + // var testOut = loadedModel.Transform(idv); + //} + + //var modelPath = @"F:\temp\model.zip"; + //using (var fs = File.Create(modelPath)) + // mlContext.Model.Save(model, fs); + + //var savePredCommand = new SavePredictorCommand( + // mlContext, + // new SavePredictorCommand.Arguments + // { + // InputModelFile = modelPath, + // }); + + //savePredCommand.Run(); } } } diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index 019613d787..bca3d62c31 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -649,8 +649,11 @@ public Stump(uint splitPoint, double lteValue, double gtValue) } public abstract class GamPredictorBase : PredictorBase, IValueMapper, - IFeatureContributionMapper, ICanSaveModel, ICanSaveInTextFormat, ICanSaveSummary + IFeatureContributionMapper, ICanSaveModel, ICanSaveInTextFormat, ICanSaveSummary, + ICanSaveInIniFormat { + private readonly TreeEnsemble _treeEnsemble; + private readonly double[][] _binUpperBounds; private readonly double[][] _binEffects; public readonly double Intercept; @@ -738,6 +741,8 @@ private protected GamPredictorBase(IHostEnvironment env, string name, newBinEffects.Clear(); newBinBoundaries.Clear(); } + + _treeEnsemble = ToTreeEnsemble(); } protected GamPredictorBase(IHostEnvironment env, string name, ModelLoadContext ctx) @@ -789,6 +794,8 @@ protected GamPredictorBase(IHostEnvironment env, string name, ModelLoadContext c _inputType = new VectorType(NumberType.Float, _inputLength); _outputType = NumberType.Float; + + _treeEnsemble = ToTreeEnsemble(); } public override void Save(ModelSaveContext ctx) @@ -833,12 +840,22 @@ private void Map(in VBuffer features, ref float response) double value = Intercept; var featuresValues = features.GetValues(); + Console.WriteLine(string.Join("\t", featuresValues.ToArray())); + if (features.IsDense) { for (int i = 0; i < featuresValues.Length; ++i) { if (_inputFeatureToDatasetFeatureMap.TryGetValue(i, out int j)) - value += GetBinEffect(j, featuresValues[i]); + { + var gamOutput = GetBinEffect(j, featuresValues[i]); + var treeOutput = _treeEnsemble.TreesArray[i].GetOutput(features); + var delta = Math.Abs(treeOutput - gamOutput); + if (delta > 1e-5) + Console.WriteLine(delta); + value += gamOutput; + + } } } else @@ -1028,11 +1045,97 @@ private void GetFeatureContributions(in VBuffer features, ref VBuffer - /// The GAM model visualization command. Because the data access commands must access private members of - /// , it is convenient to have the command itself nested within the base - /// predictor class. - /// + public TreeEnsemble ToTreeEnsemble() + { + var ensemble = new TreeEnsemble(); + + for (int i = 0; i < _numFeatures; i++) + { + var effects = _binEffects[i]; + var thresholds = _binUpperBounds[i]; + + Host.Assert(effects.Length == thresholds.Length); + var numLeaves = effects.Length; + var numInternalNodes = numLeaves - 1; + + var splitFeature = new int[numInternalNodes]; + var splitGain = new double[numInternalNodes]; + var rawThresholds = thresholds.Take(numInternalNodes).Select(x => (float)x).ToArray(); + var defaultValueForMissing = new float[numInternalNodes]; + var binIndices = Enumerable.Range(0, numInternalNodes).ToArray(); + + // Create a long tree + var lteChild = binIndices.Select(x => ~x).ToArray(); + var gtChild = binIndices.Take(numInternalNodes - 1).Select(x => x + 1).ToList(); + gtChild.Add(~numInternalNodes); + + var leafValues = effects; + + for (int j = 0; j < splitFeature.Length; j++) + splitFeature[j] = i; + + var tree = RegressionTree.Create(numLeaves, splitFeature, splitGain, + rawThresholds, defaultValueForMissing, lteChild, gtChild.ToArray(), leafValues, + categoricalSplitFeatures: new int[numInternalNodes][], + categoricalSplit: new bool[numInternalNodes]); + + ensemble.AddTree(tree); + } + + return ensemble; + } + + public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator calibrator = null) + { + var ensemble = new TreeEnsemble(); + + for (int i = 0; i < _numFeatures; i++) + { + var effects = _binEffects[i]; + var thresholds = _binUpperBounds[i]; + + Host.Assert(effects.Length == thresholds.Length); + var numLeaves = effects.Length; + var numInternalNodes = numLeaves - 1; + + var splitFeature = new int[numInternalNodes]; + var splitGain = new double[numInternalNodes]; + var rawThresholds = thresholds.Take(numInternalNodes).Select(x => (float)x).ToArray(); + var defaultValueForMissing = new float[numInternalNodes]; + var binIndices = Enumerable.Range(0, numInternalNodes).ToArray(); + + // Create a long tree + var lteChild = binIndices.Select(x => ~x).ToArray(); + var gtChild = binIndices.Take(numInternalNodes - 1).Select(x => x + 1).ToList(); + gtChild.Add(~numInternalNodes); + + double[] leafValues; + + if (i == 0) + leafValues = effects.Select(x => x + Intercept).ToArray(); + else + leafValues = effects; + + for (int j = 0; j < splitFeature.Length; j++) + splitFeature[j] = i; + + var tree = RegressionTree.Create(numLeaves, splitFeature, splitGain, + rawThresholds, defaultValueForMissing, lteChild, gtChild.ToArray(), leafValues, + categoricalSplitFeatures: new int[numInternalNodes][], + categoricalSplit: new bool[numInternalNodes]); + + ensemble.AddTree(tree); + } + + var ini = ensemble.ToTreeEnsembleIni( + new FeaturesToContentMap(schema), + string.Empty, + appendFeatureGain: true, + includeZeroGainFeatures: false); + + writer.Write(ini); + } + internal sealed class VisualizationCommand : DataCommand.ImplBase { public const string Summary = "Loads a model trained with a GAM learner, and starts an interactive web session to visualize it."; From 49250c26fecde88dd4408ec78229a3726d1e669f Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 18 Dec 2018 08:06:44 -0800 Subject: [PATCH 02/20] Changed intercept to be a tree. Parity intact. --- src/Microsoft.ML.FastTree/GamTrainer.cs | 51 ++++++++++++++++++------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index bca3d62c31..226c164aaa 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -840,7 +840,7 @@ private void Map(in VBuffer features, ref float response) double value = Intercept; var featuresValues = features.GetValues(); - Console.WriteLine(string.Join("\t", featuresValues.ToArray())); + //Console.WriteLine(string.Join("\t", featuresValues.ToArray())); if (features.IsDense) { @@ -1098,10 +1098,8 @@ public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator ca var numLeaves = effects.Length; var numInternalNodes = numLeaves - 1; - var splitFeature = new int[numInternalNodes]; - var splitGain = new double[numInternalNodes]; + var splitFeatures = new int[numInternalNodes]; var rawThresholds = thresholds.Take(numInternalNodes).Select(x => (float)x).ToArray(); - var defaultValueForMissing = new float[numInternalNodes]; var binIndices = Enumerable.Range(0, numInternalNodes).ToArray(); // Create a long tree @@ -1111,22 +1109,47 @@ public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator ca double[] leafValues; - if (i == 0) - leafValues = effects.Select(x => x + Intercept).ToArray(); - else - leafValues = effects; - - for (int j = 0; j < splitFeature.Length; j++) - splitFeature[j] = i; - - var tree = RegressionTree.Create(numLeaves, splitFeature, splitGain, - rawThresholds, defaultValueForMissing, lteChild, gtChild.ToArray(), leafValues, + //if (i == 0) + // leafValues = effects.Select(x => x + Intercept).ToArray(); + //else + // leafValues = effects; + + leafValues = effects; + + for (int j = 0; j < splitFeatures.Length; j++) + splitFeatures[j] = i; + + var tree = RegressionTree.Create( + numLeaves: numLeaves, + splitFeatures: splitFeatures, + rawThresholds: rawThresholds, + lteChild: lteChild, + gtChild: gtChild.ToArray(), + leafValues: leafValues, + // Ignored arguments + splitGain: new double[numInternalNodes], + defaultValueForMissing: new float[numInternalNodes], categoricalSplitFeatures: new int[numInternalNodes][], categoricalSplit: new bool[numInternalNodes]); ensemble.AddTree(tree); } + var interceptTree = RegressionTree.Create( + numLeaves: 2, + splitFeatures: new [] { 0 }, + rawThresholds: new [] { 0f }, + lteChild: new[] { ~0 }, + gtChild: new[] { ~1 }, + leafValues: new[] { Intercept, Intercept }, + // Ignored arguments + splitGain: new double[1], + defaultValueForMissing: new float[1], + categoricalSplitFeatures: new int[1][], + categoricalSplit: new bool[1]); + + ensemble.AddTree(interceptTree); + var ini = ensemble.ToTreeEnsembleIni( new FeaturesToContentMap(schema), string.Empty, From e6e918fc6fa2db4a9409363a706705ba9b7216a6 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 18 Dec 2018 08:20:48 -0800 Subject: [PATCH 03/20] Cleaned up intercept tree. --- src/Microsoft.ML.FastTree/GamTrainer.cs | 49 +++++++++++++------------ 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index 226c164aaa..7f6cfbccae 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -1119,35 +1119,21 @@ public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator ca for (int j = 0; j < splitFeatures.Length; j++) splitFeatures[j] = i; - var tree = RegressionTree.Create( - numLeaves: numLeaves, - splitFeatures: splitFeatures, - rawThresholds: rawThresholds, - lteChild: lteChild, - gtChild: gtChild.ToArray(), - leafValues: leafValues, - // Ignored arguments - splitGain: new double[numInternalNodes], - defaultValueForMissing: new float[numInternalNodes], - categoricalSplitFeatures: new int[numInternalNodes][], - categoricalSplit: new bool[numInternalNodes]); - + var tree = CreateRegressionTree(numLeaves, splitFeatures, rawThresholds, lteChild, gtChild.ToArray(), leafValues); ensemble.AddTree(tree); } - var interceptTree = RegressionTree.Create( + // Tried adding the intercept as the bias term for the final ini aggregator, + // but that didn't seem to have any effects during testing. + // Adding the intercept as a dummy tree with the output values being the model intercept, + // works for reaching parity. + var interceptTree = CreateRegressionTree( numLeaves: 2, - splitFeatures: new [] { 0 }, - rawThresholds: new [] { 0f }, + splitFeatures: new[] { 0 }, + rawThresholds: new[] { 0f }, lteChild: new[] { ~0 }, gtChild: new[] { ~1 }, - leafValues: new[] { Intercept, Intercept }, - // Ignored arguments - splitGain: new double[1], - defaultValueForMissing: new float[1], - categoricalSplitFeatures: new int[1][], - categoricalSplit: new bool[1]); - + leafValues: new[] { Intercept, Intercept }); ensemble.AddTree(interceptTree); var ini = ensemble.ToTreeEnsembleIni( @@ -1159,6 +1145,23 @@ public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator ca writer.Write(ini); } + private static RegressionTree CreateRegressionTree( + int numLeaves, int[] splitFeatures, float[] rawThresholds, int[] lteChild, int[] gtChild, double[] leafValues) + { + var numInternalNodes = numLeaves - 1; + return RegressionTree.Create( + numLeaves: numLeaves, + splitFeatures: splitFeatures, + rawThresholds: rawThresholds, + lteChild: lteChild, + gtChild: gtChild.ToArray(), + leafValues: leafValues, + // Ignored arguments + splitGain: new double[numInternalNodes], + defaultValueForMissing: new float[numInternalNodes], + categoricalSplitFeatures: new int[numInternalNodes][], + categoricalSplit: new bool[numInternalNodes]); + } internal sealed class VisualizationCommand : DataCommand.ImplBase { public const string Summary = "Loads a model trained with a GAM learner, and starts an interactive web session to visualize it."; From dbe8695b5677f8d2211f5003a7e2d2fad8608cc2 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 18 Dec 2018 10:36:39 -0800 Subject: [PATCH 04/20] Checkpointing test program --- docs/samples/Microsoft.ML.Samples/Program.cs | 27 +------------------- 1 file changed, 1 insertion(+), 26 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index a2813707a0..81c73d7010 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -57,9 +57,6 @@ private static void TestGam() } }).Read(@"F:\temp\ini\data\breast-cancer-noNan.txt"); - //var pipeline = mlContext.Transforms.Normalize("Features") - // .Append(mlContext.Regression.Trainers.GeneralizedAdditiveModels()); - var pipeline = mlContext.Regression.Trainers.GeneralizedAdditiveModels(); var model = pipeline.Fit(idv); @@ -69,32 +66,10 @@ private static void TestGam() new KeyValuePair(RoleMappedSchema.ColumnRole.Feature, "Features"), new KeyValuePair(RoleMappedSchema.ColumnRole.Label, "Label")); - //using (var fs = File.Create(@"F:\temp\ini\model3.zip")) - // mlContext.Model.Save(model, fs); - - using (StreamWriter writer = new StreamWriter(@"F:\temp\ini\model5.ini")) + using (StreamWriter writer = new StreamWriter(@"F:\temp\ini\model7.ini")) model.Model.SaveAsIni(writer, roleMappedSchema); var results = mlContext.Regression.Evaluate(data); - - //using (var fs = File.OpenRead(@"F:\temp\ini\model3.zip")) - //{ - // var loadedModel = mlContext.Model.Load(fs); - // var testOut = loadedModel.Transform(idv); - //} - - //var modelPath = @"F:\temp\model.zip"; - //using (var fs = File.Create(modelPath)) - // mlContext.Model.Save(model, fs); - - //var savePredCommand = new SavePredictorCommand( - // mlContext, - // new SavePredictorCommand.Arguments - // { - // InputModelFile = modelPath, - // }); - - //savePredCommand.Run(); } } } From 0d49c9db57ed00cce34b8df72d9f1be975758d3e Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 18 Dec 2018 11:05:01 -0800 Subject: [PATCH 05/20] Tested with a more complicated pipeline with out-of-order features. Model creation: F:\Git\Tlc3\bin\Debug>maml.exe ini ini=F:\temp\ini\model1.ini out=F:\temp\ini\model1.ini.zip kind=Regression data=F:\temp\ini\data\breast-cancer-noNan.txt loader=TextLoader{col=Label:R4:0 col=F1:R4:1 col=F3:R4:3 col=F6:R4:6 col=F7:R4:7 col =F9:R4:9} xf=Concat{col=Features:F1,F9,F7,F6} Parity Results: OVERALL RESULTS --------------------------------------- L1(avg): 0.120008 (0.0000) L2(avg): 0.036569 (0.0000) RMS(avg): 0.191230 (0.0000) Loss-fn(avg): 0.036569 (0.0000) R Squared: 0.839242 (0.0000) --- docs/samples/Microsoft.ML.Samples/Program.cs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index 81c73d7010..44f68fefed 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -53,11 +53,16 @@ private static void TestGam() Column = new[] { new TextLoader.Column("Label", DataKind.R4, 0), - new TextLoader.Column("Features", DataKind.R4, 1, 9), + new TextLoader.Column("F1", DataKind.R4, 1), + new TextLoader.Column("F3", DataKind.R4, 3), + new TextLoader.Column("F6", DataKind.R4, 6), + new TextLoader.Column("F7", DataKind.R4, 7), + new TextLoader.Column("F9", DataKind.R4, 9), } }).Read(@"F:\temp\ini\data\breast-cancer-noNan.txt"); - var pipeline = mlContext.Regression.Trainers.GeneralizedAdditiveModels(); + var pipeline = mlContext.Transforms.Concatenate("Features", "F1", "F9", "F7", "F6") + .Append(mlContext.Regression.Trainers.GeneralizedAdditiveModels()); var model = pipeline.Fit(idv); var data = model.Transform(idv); @@ -66,8 +71,8 @@ private static void TestGam() new KeyValuePair(RoleMappedSchema.ColumnRole.Feature, "Features"), new KeyValuePair(RoleMappedSchema.ColumnRole.Label, "Label")); - using (StreamWriter writer = new StreamWriter(@"F:\temp\ini\model7.ini")) - model.Model.SaveAsIni(writer, roleMappedSchema); + using (StreamWriter writer = new StreamWriter(@"F:\temp\ini\model1.ini")) + model.LastTransformer.Model.SaveAsIni(writer, roleMappedSchema); var results = mlContext.Regression.Evaluate(data); } From e22a82b4d086eb0f05b527821081a2f026f2b81c Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 18 Dec 2018 16:43:23 -0800 Subject: [PATCH 06/20] Parity results achieved for the optimized tree structure. OVERALL RESULTS --------------------------------------- L1(avg): 0.120008 (0.0000) L2(avg): 0.036569 (0.0000) RMS(avg): 0.191230 (0.0000) Loss-fn(avg): 0.036569 (0.0000) R Squared: 0.839242 (0.0000) --- docs/samples/Microsoft.ML.Samples/Program.cs | 43 +++++++++--- src/Microsoft.ML.FastTree/GamTrainer.cs | 71 ++++++++++++++------ 2 files changed, 83 insertions(+), 31 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index 44f68fefed..f53a7c9345 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -13,6 +13,8 @@ namespace Microsoft.ML.Samples { internal static class Program { + static IHostEnvironment Host = new MLContext(); + static void Main(string[] args) { //MakeBinarySearchTree(10); @@ -20,28 +22,49 @@ static void Main(string[] args) } - private static int[] MakeBinarySearchTree(int numInternalNodes) + private static (int[], int[], int[]) MakeBinarySearchTree(int numInternalNodes) { - var binIndices = Enumerable.Range(0, numInternalNodes - 1).ToArray(); + var binIndices = Enumerable.Range(0, numInternalNodes).ToArray(); var bstIndices = new List(); + var lteChild = new List(); + var gtChild = new List(); + var internalNodeId = numInternalNodes; - MakeBinarySearchTreeRecursive(binIndices, 0, binIndices.Length - 1, bstIndices); - var ret = bstIndices.ToArray(); + MakeBinarySearchTreeRecursive(binIndices, 0, binIndices.Length - 1, bstIndices, lteChild, gtChild, ref internalNodeId); + var ret = (bstIndices.ToArray(), lteChild.ToArray(), gtChild.ToArray()); return ret; } - private static void MakeBinarySearchTreeRecursive( - int[] array, int lower, int upper, List bstIndices) + private static int MakeBinarySearchTreeRecursive( + int[] array, int lower, int upper, + List bstIndices, List lteChild, List gtChild, ref int internalNodeId) { if (lower > upper) + { + // Base case: we've reached a leaf node + Assert(lower == upper + 1); + return lower + 100; + } + else { var mid = (lower + upper) / 2; - bstIndices.Add(array[mid]); - MakeBinarySearchTreeRecursive(array, lower, mid - 1, bstIndices); - MakeBinarySearchTreeRecursive(array, mid + 1, upper, bstIndices); + var left = MakeBinarySearchTreeRecursive( + array, lower, mid - 1, bstIndices, lteChild, gtChild, ref internalNodeId); + var right = MakeBinarySearchTreeRecursive( + array, mid + 1, upper, bstIndices, lteChild, gtChild, ref internalNodeId); + bstIndices.Insert(0, array[mid]); + lteChild.Insert(0, left); + gtChild.Insert(0, right); + return --internalNodeId; } } + private static void Assert(bool v) + { + if (!v) + throw new NotImplementedException(); + } + private static void TestGam() { var mlContext = new MLContext(seed: 0, conc: 0); @@ -71,7 +94,7 @@ private static void TestGam() new KeyValuePair(RoleMappedSchema.ColumnRole.Feature, "Features"), new KeyValuePair(RoleMappedSchema.ColumnRole.Label, "Label")); - using (StreamWriter writer = new StreamWriter(@"F:\temp\ini\model1.ini")) + using (StreamWriter writer = new StreamWriter(@"F:\temp\ini\model2.ini")) model.LastTransformer.Model.SaveAsIni(writer, roleMappedSchema); var results = mlContext.Regression.Evaluate(data); diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index 7f6cfbccae..7a349c8c6d 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -1089,37 +1089,30 @@ public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator ca { var ensemble = new TreeEnsemble(); - for (int i = 0; i < _numFeatures; i++) + for (int featureIndex = 0; featureIndex < _numFeatures; featureIndex++) { - var effects = _binEffects[i]; - var thresholds = _binUpperBounds[i]; + var effects = _binEffects[featureIndex]; + var thresholds = _binUpperBounds[featureIndex]; Host.Assert(effects.Length == thresholds.Length); var numLeaves = effects.Length; var numInternalNodes = numLeaves - 1; - var splitFeatures = new int[numInternalNodes]; - var rawThresholds = thresholds.Take(numInternalNodes).Select(x => (float)x).ToArray(); - var binIndices = Enumerable.Range(0, numInternalNodes).ToArray(); - - // Create a long tree - var lteChild = binIndices.Select(x => ~x).ToArray(); - var gtChild = binIndices.Take(numInternalNodes - 1).Select(x => x + 1).ToList(); - gtChild.Add(~numInternalNodes); - - double[] leafValues; + var splitFeatures = Enumerable.Repeat(featureIndex, numInternalNodes).ToArray(); - //if (i == 0) - // leafValues = effects.Select(x => x + Intercept).ToArray(); - //else - // leafValues = effects; + var bstRet = MakeBinarySearchTree(numInternalNodes); + var rawThresholds = bstRet.Item1.Select(x => (float)thresholds[x]).ToArray(); + var lteChild = bstRet.Item2; + var gtChild = bstRet.Item3; - leafValues = effects; + // var binIndices = Enumerable.Range(0, numInternalNodes).ToArray(); - for (int j = 0; j < splitFeatures.Length; j++) - splitFeatures[j] = i; + // Create a long tree + //var lteChild = binIndices.Select(x => ~x).ToArray(); + //var gtChild = binIndices.Take(numInternalNodes - 1).Select(x => x + 1).ToList(); + //gtChild.Add(~numInternalNodes); - var tree = CreateRegressionTree(numLeaves, splitFeatures, rawThresholds, lteChild, gtChild.ToArray(), leafValues); + var tree = CreateRegressionTree(numLeaves, splitFeatures, rawThresholds, lteChild, gtChild.ToArray(), effects); ensemble.AddTree(tree); } @@ -1145,6 +1138,42 @@ public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator ca writer.Write(ini); } + private (int[], int[], int[]) MakeBinarySearchTree(int numInternalNodes) + { + var binIndices = Enumerable.Range(0, numInternalNodes).ToArray(); + var bstIndices = new List(); + var lteChild = new List(); + var gtChild = new List(); + var internalNodeId = numInternalNodes; + + MakeBinarySearchTreeRecursive(binIndices, 0, binIndices.Length - 1, bstIndices, lteChild, gtChild, ref internalNodeId); + var ret = (bstIndices.ToArray(), lteChild.ToArray(), gtChild.ToArray()); + return ret; + } + + private int MakeBinarySearchTreeRecursive( + int[] array, int lower, int upper, + List bstIndices, List lteChild, List gtChild, ref int internalNodeId) + { + if (lower > upper) + { + // Base case: we've reached a leaf node + Host.Assert(lower == upper + 1); + return ~lower; + } + else + { + var mid = (lower + upper) / 2; + var left = MakeBinarySearchTreeRecursive( + array, lower, mid - 1, bstIndices, lteChild, gtChild, ref internalNodeId); + var right = MakeBinarySearchTreeRecursive( + array, mid + 1, upper, bstIndices, lteChild, gtChild, ref internalNodeId); + bstIndices.Insert(0, array[mid]); + lteChild.Insert(0, left); + gtChild.Insert(0, right); + return --internalNodeId; + } + } private static RegressionTree CreateRegressionTree( int numLeaves, int[] splitFeatures, float[] rawThresholds, int[] lteChild, int[] gtChild, double[] leafValues) { From cd783c2f2e142f95ec16799f12cab9a1e4ce8ebf Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Tue, 18 Dec 2018 17:50:05 -0800 Subject: [PATCH 07/20] Cleaned up BalancedTree algo --- src/Microsoft.ML.FastTree/GamTrainer.cs | 58 ++++++++++++------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index 7a349c8c6d..231e363895 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -1092,27 +1092,15 @@ public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator ca for (int featureIndex = 0; featureIndex < _numFeatures; featureIndex++) { var effects = _binEffects[featureIndex]; - var thresholds = _binUpperBounds[featureIndex]; + var binThresholds = _binUpperBounds[featureIndex]; - Host.Assert(effects.Length == thresholds.Length); + Host.Assert(effects.Length == binThresholds.Length); var numLeaves = effects.Length; var numInternalNodes = numLeaves - 1; var splitFeatures = Enumerable.Repeat(featureIndex, numInternalNodes).ToArray(); - - var bstRet = MakeBinarySearchTree(numInternalNodes); - var rawThresholds = bstRet.Item1.Select(x => (float)thresholds[x]).ToArray(); - var lteChild = bstRet.Item2; - var gtChild = bstRet.Item3; - - // var binIndices = Enumerable.Range(0, numInternalNodes).ToArray(); - - // Create a long tree - //var lteChild = binIndices.Select(x => ~x).ToArray(); - //var gtChild = binIndices.Take(numInternalNodes - 1).Select(x => x + 1).ToList(); - //gtChild.Add(~numInternalNodes); - - var tree = CreateRegressionTree(numLeaves, splitFeatures, rawThresholds, lteChild, gtChild.ToArray(), effects); + var (treeThresholds, lteChild, gtChild) = CreateBalancedTree(numInternalNodes, binThresholds); + var tree = CreateRegressionTree(numLeaves, splitFeatures, treeThresholds, lteChild, gtChild.ToArray(), effects); ensemble.AddTree(tree); } @@ -1138,22 +1126,31 @@ public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator ca writer.Write(ini); } - private (int[], int[], int[]) MakeBinarySearchTree(int numInternalNodes) + // GAM bins should be converted to balanced trees / binary search trees + // so that scoring takes O(log(n)) instead of O(n). The following utility + // creates a balanced tree. + private (float[], int[], int[]) CreateBalancedTree(int numInternalNodes, double[] binThresholds) { var binIndices = Enumerable.Range(0, numInternalNodes).ToArray(); - var bstIndices = new List(); + var internalNodeIndices = new List(); var lteChild = new List(); var gtChild = new List(); var internalNodeId = numInternalNodes; - MakeBinarySearchTreeRecursive(binIndices, 0, binIndices.Length - 1, bstIndices, lteChild, gtChild, ref internalNodeId); - var ret = (bstIndices.ToArray(), lteChild.ToArray(), gtChild.ToArray()); - return ret; + CreateBalancedTreeRecursive( + 0, binIndices.Length - 1, internalNodeIndices, lteChild, gtChild, ref internalNodeId); + // internalNodeId should have been counted all the way down to 0 (root node) + Host.Assert(internalNodeId == 0); + + var tree = ( + thresholds: internalNodeIndices.Select(x => (float)binThresholds[binIndices[x]]).ToArray(), + lteChild: lteChild.ToArray(), + gtChild: gtChild.ToArray()); + return tree; } - private int MakeBinarySearchTreeRecursive( - int[] array, int lower, int upper, - List bstIndices, List lteChild, List gtChild, ref int internalNodeId) + private int CreateBalancedTreeRecursive(int lower, int upper, + List internalNodeIndices, List lteChild, List gtChild, ref int internalNodeId) { if (lower > upper) { @@ -1163,12 +1160,15 @@ private int MakeBinarySearchTreeRecursive( } else { + // This is postorder traversal algorithm and populating the internalNodeIndices/lte/gt lists in reverse. + // Preorder is the only option, because we need the results of both left/right recursions for populating the lists. + // As a result, lists are populated in reverse, because the root node should be the first item on the lists. var mid = (lower + upper) / 2; - var left = MakeBinarySearchTreeRecursive( - array, lower, mid - 1, bstIndices, lteChild, gtChild, ref internalNodeId); - var right = MakeBinarySearchTreeRecursive( - array, mid + 1, upper, bstIndices, lteChild, gtChild, ref internalNodeId); - bstIndices.Insert(0, array[mid]); + var left = CreateBalancedTreeRecursive( + lower, mid - 1, internalNodeIndices, lteChild, gtChild, ref internalNodeId); + var right = CreateBalancedTreeRecursive( + mid + 1, upper, internalNodeIndices, lteChild, gtChild, ref internalNodeId); + internalNodeIndices.Insert(0, mid); lteChild.Insert(0, left); gtChild.Insert(0, right); return --internalNodeId; From 195a08bb1883b2530c42d7f04bf0032bae569a27 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Wed, 19 Dec 2018 05:11:10 -0800 Subject: [PATCH 08/20] Removed parity debugging code --- src/Microsoft.ML.FastTree/GamTrainer.cs | 57 +------------------------ 1 file changed, 1 insertion(+), 56 deletions(-) diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index 231e363895..f076a6ac2c 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -652,8 +652,6 @@ public abstract class GamPredictorBase : PredictorBase, IValueMapper, IFeatureContributionMapper, ICanSaveModel, ICanSaveInTextFormat, ICanSaveSummary, ICanSaveInIniFormat { - private readonly TreeEnsemble _treeEnsemble; - private readonly double[][] _binUpperBounds; private readonly double[][] _binEffects; public readonly double Intercept; @@ -741,8 +739,6 @@ private protected GamPredictorBase(IHostEnvironment env, string name, newBinEffects.Clear(); newBinBoundaries.Clear(); } - - _treeEnsemble = ToTreeEnsemble(); } protected GamPredictorBase(IHostEnvironment env, string name, ModelLoadContext ctx) @@ -794,8 +790,6 @@ protected GamPredictorBase(IHostEnvironment env, string name, ModelLoadContext c _inputType = new VectorType(NumberType.Float, _inputLength); _outputType = NumberType.Float; - - _treeEnsemble = ToTreeEnsemble(); } public override void Save(ModelSaveContext ctx) @@ -840,22 +834,13 @@ private void Map(in VBuffer features, ref float response) double value = Intercept; var featuresValues = features.GetValues(); - //Console.WriteLine(string.Join("\t", featuresValues.ToArray())); if (features.IsDense) { for (int i = 0; i < featuresValues.Length; ++i) { if (_inputFeatureToDatasetFeatureMap.TryGetValue(i, out int j)) - { - var gamOutput = GetBinEffect(j, featuresValues[i]); - var treeOutput = _treeEnsemble.TreesArray[i].GetOutput(features); - var delta = Math.Abs(treeOutput - gamOutput); - if (delta > 1e-5) - Console.WriteLine(delta); - value += gamOutput; - - } + value += GetBinEffect(j, featuresValues[i]); } } else @@ -1045,46 +1030,6 @@ private void GetFeatureContributions(in VBuffer features, ref VBuffer (float)x).ToArray(); - var defaultValueForMissing = new float[numInternalNodes]; - var binIndices = Enumerable.Range(0, numInternalNodes).ToArray(); - - // Create a long tree - var lteChild = binIndices.Select(x => ~x).ToArray(); - var gtChild = binIndices.Take(numInternalNodes - 1).Select(x => x + 1).ToList(); - gtChild.Add(~numInternalNodes); - - var leafValues = effects; - - for (int j = 0; j < splitFeature.Length; j++) - splitFeature[j] = i; - - var tree = RegressionTree.Create(numLeaves, splitFeature, splitGain, - rawThresholds, defaultValueForMissing, lteChild, gtChild.ToArray(), leafValues, - categoricalSplitFeatures: new int[numInternalNodes][], - categoricalSplit: new bool[numInternalNodes]); - - ensemble.AddTree(tree); - } - - return ensemble; - } - public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator calibrator = null) { var ensemble = new TreeEnsemble(); From 344a15a25ed981479c3d2075ef18020480220793 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Wed, 19 Dec 2018 05:15:01 -0800 Subject: [PATCH 09/20] Reverted unnecessary changes --- src/Microsoft.ML.FastTree/GamTrainer.cs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index f076a6ac2c..45b37822f2 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -1136,6 +1136,12 @@ private static RegressionTree CreateRegressionTree( categoricalSplitFeatures: new int[numInternalNodes][], categoricalSplit: new bool[numInternalNodes]); } + + /// + /// The GAM model visualization command. Because the data access commands must access private members of + /// , it is convenient to have the command itself nested within the base + /// predictor class. + /// internal sealed class VisualizationCommand : DataCommand.ImplBase { public const string Summary = "Loads a model trained with a GAM learner, and starts an interactive web session to visualize it."; From 9101a322fd4bb529eb9e21b21ab9e24eadd60683 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Wed, 19 Dec 2018 07:07:15 -0800 Subject: [PATCH 10/20] Failed attempt to use SavePredictorCommand --- .../TestIniModels.cs | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs b/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs index 9f8b423de6..7ca83be858 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs @@ -7,7 +7,11 @@ using System.IO; using System.Threading; using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Internal.Utilities; +using Microsoft.ML.Runtime.Tools; +using Xunit; +using Xunit.Abstractions; namespace Microsoft.ML.Runtime.RunTests { @@ -504,4 +508,60 @@ public ProcessDebugInformation RunCommandLine(string commandLine, string dir) } } #endif + + public sealed class TestIniModels : TestDataPipeBase + { + public TestIniModels(ITestOutputHelper output) : base(output) + { + } + + [Fact] + public void TestGamRegressionIni() + { + var mlContext = new MLContext(seed: 0, conc: 0); + + var idv = mlContext.Data.CreateTextReader( + new TextLoader.Arguments() + { + HasHeader = false, + Column = new[] + { + new TextLoader.Column("Label", DataKind.R4, 0), + new TextLoader.Column("Features", DataKind.R4, 1, 9) + } + }).Read(GetDataPath("breast-cancer.txt")); + + var pipeline = mlContext.Regression.Trainers.GeneralizedAdditiveModels(); + var model = pipeline.Fit(idv); + var data = model.Transform(idv); + + //var roleMappedSchema = new RoleMappedSchema(data.Schema, false, + // new KeyValuePair(RoleMappedSchema.ColumnRole.Feature, "Features"), + // new KeyValuePair(RoleMappedSchema.ColumnRole.Label, "Label")); + + string modelZipPath = GetOutputPath(FullTestName + "-model3.zip"); + string modelIniPath = GetOutputPath(FullTestName + "-model3.ini"); + + using (var fs = File.Create(modelZipPath)) + mlContext.Model.Save(model, fs); + + var savePredCommand = new SavePredictorCommand( + mlContext, + new SavePredictorCommand.Arguments + { + InputModelFile = modelZipPath, + IniFile = modelIniPath + }); + + savePredCommand.Run(); + + //string modelIniPath = GetOutputPath(FullTestName + "-model.ini"); + //using (StreamWriter writer = Utils.OpenWriter(modelIniPath)) + //using (StreamWriter writer = new StreamWriter(modelIniPath)) + // model.Model.SaveAsIni(writer, roleMappedSchema); + + //var results = mlContext.Regression.Evaluate(data); + } + } + } From 363bd09d72d315db019cdd1e4d81b9777b879b2b Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Wed, 19 Dec 2018 07:51:21 -0800 Subject: [PATCH 11/20] Regression test parity results: // OVERALL RESULTS // --------------------------------------- // L1(avg): 0.093257 (0.0000) // L2(avg): 0.025707 (0.0000) // RMS(avg): 0.160336 (0.0000) // Loss-fn(avg): 0.025707 (0.0000) // R Squared: 0.886203 (0.0000) --- .../TestIniModels.cs | 59 +++++++++++-------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs b/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs index 7ca83be858..a062a4aede 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs @@ -6,6 +6,7 @@ using System.Collections.Generic; using System.IO; using System.Threading; +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Internal.Utilities; @@ -531,37 +532,45 @@ public void TestGamRegressionIni() } }).Read(GetDataPath("breast-cancer.txt")); - var pipeline = mlContext.Regression.Trainers.GeneralizedAdditiveModels(); + var pipeline = mlContext.Transforms.ReplaceMissingValues("Features") + .Append(mlContext.Regression.Trainers.GeneralizedAdditiveModels()); var model = pipeline.Fit(idv); var data = model.Transform(idv); - //var roleMappedSchema = new RoleMappedSchema(data.Schema, false, - // new KeyValuePair(RoleMappedSchema.ColumnRole.Feature, "Features"), - // new KeyValuePair(RoleMappedSchema.ColumnRole.Label, "Label")); - - string modelZipPath = GetOutputPath(FullTestName + "-model3.zip"); - string modelIniPath = GetOutputPath(FullTestName + "-model3.ini"); - - using (var fs = File.Create(modelZipPath)) - mlContext.Model.Save(model, fs); - - var savePredCommand = new SavePredictorCommand( - mlContext, - new SavePredictorCommand.Arguments - { - InputModelFile = modelZipPath, - IniFile = modelIniPath - }); + var roleMappedSchema = new RoleMappedSchema(data.Schema, false, + new KeyValuePair(RoleMappedSchema.ColumnRole.Feature, "Features"), + new KeyValuePair(RoleMappedSchema.ColumnRole.Label, "Label")); + + string modelIniPath = GetOutputPath(FullTestName + "-model.ini"); + using (Stream iniStream = File.Create(modelIniPath)) + using (StreamWriter iniWriter = Utils.OpenWriter(iniStream)) + model.LastTransformer.Model.SaveAsIni(iniWriter, roleMappedSchema); + + var results = mlContext.Regression.Evaluate(data); + + // Getting parity results from maml.exe: + // maml.exe ini ini=model.ini out=model_ini.zip data=breast-cancer.txt loader=TextLoader{col=Label:R4:0 col=Features:R4:1-9} xf=NAHandleTransform{col=Features slot=- ind=-} kind=Regression + var expectedResults = new RegressionMetrics( + l1: 0.093256807643323947, + l2: 0.025707474358979077, + rms: 0.16033550560926635, + rSquared: 0.88620288753853549, + lossFunction: 0.025707474380004879); + + Assert.Equal(results.L1, expectedResults.L1); + Assert.Equal(results.L2, expectedResults.L2); + Assert.Equal(results.Rms, expectedResults.Rms); + Assert.Equal(results.RSquared, expectedResults.RSquared); + Assert.Equal(results.LossFn, expectedResults.LossFn); + } - savePredCommand.Run(); + //private void AssertEqual(double v1, double v2) + //{ + // //Assert.Equal(v1, v2, 5); + // Assert.Equal(v1, v2); - //string modelIniPath = GetOutputPath(FullTestName + "-model.ini"); - //using (StreamWriter writer = Utils.OpenWriter(modelIniPath)) - //using (StreamWriter writer = new StreamWriter(modelIniPath)) - // model.Model.SaveAsIni(writer, roleMappedSchema); + //} - //var results = mlContext.Regression.Evaluate(data); - } } } From d94a2796746f4d813ebc41c430387fe8935bb06f Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Wed, 19 Dec 2018 09:22:26 -0800 Subject: [PATCH 12/20] Parity for BinaryClassification: OVERALL RESULTS --------------------------------------- AUC: 0.995452 (0.0000) Accuracy: 0.969957 (0.0000) Positive precision: 0.950820 (0.0000) Positive recall: 0.962656 (0.0000) Negative precision: 0.980220 (0.0000) Negative recall: 0.973799 (0.0000) Log-loss: 0.115940 (0.0000) Log-loss reduction: 87.524160 (0.0000) F1 Score: 0.956701 (0.0000) AUPRC: 0.989809 (0.0000) --- src/Microsoft.ML.FastTree/FastTree.cs | 78 +++++++++++-------- src/Microsoft.ML.FastTree/GamTrainer.cs | 9 +-- .../TestIniModels.cs | 67 +++++++++++----- 3 files changed, 95 insertions(+), 59 deletions(-) diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index 7765535aed..c70926b6e4 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -2985,40 +2985,8 @@ void ICanSaveInTextFormat.SaveAsText(TextWriter writer, RoleMappedSchema schema) /// void ICanSaveInIniFormat.SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator calibrator) { - Host.CheckValue(writer, nameof(writer)); - Host.CheckValue(schema, nameof(schema)); - Host.CheckValueOrNull(calibrator); - string ensembleIni = TrainedEnsemble.ToTreeEnsembleIni(new FeaturesToContentMap(schema), + FastTreeIniFormatUtils.SaveTreeEnsembleAsIni(Host, TrainedEnsemble, writer, schema, calibrator, InnerArgs, appendFeatureGain: true, includeZeroGainFeatures: false); - ensembleIni = AddCalibrationToIni(ensembleIni, calibrator); - writer.WriteLine(ensembleIni); - } - - /// - /// Get the calibration summary in INI format - /// - private string AddCalibrationToIni(string ini, ICalibrator calibrator) - { - Host.AssertValue(ini); - Host.AssertValueOrNull(calibrator); - - if (calibrator == null) - return ini; - - if (calibrator is PlattCalibrator) - { - string calibratorEvaluatorIni = IniFileUtils.GetCalibratorEvaluatorIni(ini, calibrator as PlattCalibrator); - return IniFileUtils.AddEvaluator(ini, calibratorEvaluatorIni); - } - else - { - StringBuilder newSection = new StringBuilder(); - newSection.AppendLine(); - newSection.AppendLine(); - newSection.AppendLine("[TLCCalibration]"); - newSection.AppendLine("Type=" + calibrator.GetType().Name); - return ini + newSection; - } } JToken ISingleCanSavePfa.SaveAsPfa(BoundPfaContext ctx, JToken input) @@ -3404,4 +3372,48 @@ public TreeNode(Dictionary keyValues) public Dictionary KeyValues { get; } } } + internal static class FastTreeIniFormatUtils + { + public static void SaveTreeEnsembleAsIni( + IHost host, TreeEnsemble ensemble, TextWriter writer, RoleMappedSchema schema, ICalibrator calibrator, + string trainingParams, bool appendFeatureGain, bool includeZeroGainFeatures) + { + host.CheckValue(ensemble, nameof(ensemble)); + host.CheckValue(writer, nameof(writer)); + host.CheckValue(schema, nameof(schema)); + host.CheckValueOrNull(calibrator); + + string ensembleIni = ensemble.ToTreeEnsembleIni(new FeaturesToContentMap(schema), + trainingParams, appendFeatureGain, includeZeroGainFeatures); + ensembleIni = AddCalibrationToIni(host, ensembleIni, calibrator); + writer.WriteLine(ensembleIni); + } + + /// + /// Get the calibration summary in INI format + /// + private static string AddCalibrationToIni(IHost host, string ini, ICalibrator calibrator) + { + host.AssertValue(ini); + host.AssertValueOrNull(calibrator); + + if (calibrator == null) + return ini; + + if (calibrator is PlattCalibrator) + { + string calibratorEvaluatorIni = IniFileUtils.GetCalibratorEvaluatorIni(ini, calibrator as PlattCalibrator); + return IniFileUtils.AddEvaluator(ini, calibratorEvaluatorIni); + } + else + { + StringBuilder newSection = new StringBuilder(); + newSection.AppendLine(); + newSection.AppendLine(); + newSection.AppendLine("[TLCCalibration]"); + newSection.AppendLine("Type=" + calibrator.GetType().Name); + return ini + newSection; + } + } + } } diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index 45b37822f2..9dd59463ce 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -1062,13 +1062,8 @@ public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator ca leafValues: new[] { Intercept, Intercept }); ensemble.AddTree(interceptTree); - var ini = ensemble.ToTreeEnsembleIni( - new FeaturesToContentMap(schema), - string.Empty, - appendFeatureGain: true, - includeZeroGainFeatures: false); - - writer.Write(ini); + FastTreeIniFormatUtils.SaveTreeEnsembleAsIni( + Host, ensemble, writer, schema, calibrator, string.Empty, false, false); } // GAM bins should be converted to balanced trees / binary search trees diff --git a/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs b/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs index a062a4aede..74e5be040d 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestIniModels.cs @@ -9,7 +9,9 @@ using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Internal.Calibration; using Microsoft.ML.Runtime.Internal.Utilities; +using Microsoft.ML.Trainers.FastTree; using Microsoft.ML.Runtime.Tools; using Xunit; using Xunit.Abstractions; @@ -519,8 +521,7 @@ public TestIniModels(ITestOutputHelper output) : base(output) [Fact] public void TestGamRegressionIni() { - var mlContext = new MLContext(seed: 0, conc: 0); - + var mlContext = new MLContext(seed: 0); var idv = mlContext.Data.CreateTextReader( new TextLoader.Arguments() { @@ -550,27 +551,55 @@ public void TestGamRegressionIni() // Getting parity results from maml.exe: // maml.exe ini ini=model.ini out=model_ini.zip data=breast-cancer.txt loader=TextLoader{col=Label:R4:0 col=Features:R4:1-9} xf=NAHandleTransform{col=Features slot=- ind=-} kind=Regression - var expectedResults = new RegressionMetrics( - l1: 0.093256807643323947, - l2: 0.025707474358979077, - rms: 0.16033550560926635, - rSquared: 0.88620288753853549, - lossFunction: 0.025707474380004879); - - Assert.Equal(results.L1, expectedResults.L1); - Assert.Equal(results.L2, expectedResults.L2); - Assert.Equal(results.Rms, expectedResults.Rms); - Assert.Equal(results.RSquared, expectedResults.RSquared); - Assert.Equal(results.LossFn, expectedResults.LossFn); + Assert.Equal(0.093256807643323947, results.L1); + Assert.Equal(0.025707474358979077, results.L2); + Assert.Equal(0.16033550560926635, results.Rms); + Assert.Equal(0.88620288753853549, results.RSquared); } - //private void AssertEqual(double v1, double v2) - //{ - // //Assert.Equal(v1, v2, 5); - // Assert.Equal(v1, v2); + [Fact] + public void TestGamBinaryClassificationIni() + { + var mlContext = new MLContext(seed: 0); + var idv = mlContext.Data.CreateTextReader( + new TextLoader.Arguments() + { + HasHeader = false, + Column = new[] + { + new TextLoader.Column("Label", DataKind.BL, 0), + new TextLoader.Column("Features", DataKind.R4, 1, 9) + } + }).Read(GetDataPath("breast-cancer.txt")); - //} + var pipeline = mlContext.Transforms.ReplaceMissingValues("Features") + .Append(mlContext.BinaryClassification.Trainers.GeneralizedAdditiveModels()); + var model = pipeline.Fit(idv); + var data = model.Transform(idv); + + var roleMappedSchema = new RoleMappedSchema(data.Schema, false, + new KeyValuePair(RoleMappedSchema.ColumnRole.Feature, "Features"), + new KeyValuePair(RoleMappedSchema.ColumnRole.Label, "Label")); + + var calibratedPredictor = model.LastTransformer.Model as CalibratedPredictor; + var predictor = calibratedPredictor.SubPredictor as BinaryClassificationGamPredictor; + string modelIniPath = GetOutputPath(FullTestName + "-model.ini"); + using (Stream iniStream = File.Create(modelIniPath)) + using (StreamWriter iniWriter = Utils.OpenWriter(iniStream)) + predictor.SaveAsIni(iniWriter, roleMappedSchema, calibratedPredictor.Calibrator); + + var results = mlContext.BinaryClassification.Evaluate(data); + + // Getting parity results from maml.exe: + // maml.exe ini ini=model.ini out=model_ini.zip data=breast-cancer.txt loader=TextLoader{col=Label:R4:0 col=Features:R4:1-9} xf=NAHandleTransform{col=Features slot=- ind=-} kind=Binary + Assert.Equal(0.99545199224483139, results.Auc); + Assert.Equal(0.96995708154506433, results.Accuracy); + Assert.Equal(0.95081967213114749, results.PositivePrecision); + Assert.Equal(0.96265560165975106, results.PositiveRecall); + Assert.Equal(0.95670103092783509, results.F1Score); + Assert.Equal(0.11594021906091197, results.LogLoss); + } } } From e08ec6c01ff5eb17486278c766e4b7ef89d0e456 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Wed, 19 Dec 2018 11:12:07 -0800 Subject: [PATCH 13/20] Removed SplitGain lines. --- src/Microsoft.ML.FastTree/FastTree.cs | 14 +++++++++----- src/Microsoft.ML.FastTree/GamTrainer.cs | 14 ++++++++++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index c70926b6e4..18cbfaeb5d 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -2985,8 +2985,13 @@ void ICanSaveInTextFormat.SaveAsText(TextWriter writer, RoleMappedSchema schema) /// void ICanSaveInIniFormat.SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator calibrator) { - FastTreeIniFormatUtils.SaveTreeEnsembleAsIni(Host, TrainedEnsemble, writer, schema, calibrator, + Host.CheckValue(writer, nameof(writer)); + Host.CheckValue(schema, nameof(schema)); + Host.CheckValueOrNull(calibrator); + + var ini = FastTreeIniFormatUtils.TreeEnsembleToIni(Host, TrainedEnsemble, schema, calibrator, InnerArgs, appendFeatureGain: true, includeZeroGainFeatures: false); + writer.WriteLine(ini); } JToken ISingleCanSavePfa.SaveAsPfa(BoundPfaContext ctx, JToken input) @@ -3374,19 +3379,18 @@ public TreeNode(Dictionary keyValues) } internal static class FastTreeIniFormatUtils { - public static void SaveTreeEnsembleAsIni( - IHost host, TreeEnsemble ensemble, TextWriter writer, RoleMappedSchema schema, ICalibrator calibrator, + public static string TreeEnsembleToIni( + IHost host, TreeEnsemble ensemble, RoleMappedSchema schema, ICalibrator calibrator, string trainingParams, bool appendFeatureGain, bool includeZeroGainFeatures) { host.CheckValue(ensemble, nameof(ensemble)); - host.CheckValue(writer, nameof(writer)); host.CheckValue(schema, nameof(schema)); host.CheckValueOrNull(calibrator); string ensembleIni = ensemble.ToTreeEnsembleIni(new FeaturesToContentMap(schema), trainingParams, appendFeatureGain, includeZeroGainFeatures); ensembleIni = AddCalibrationToIni(host, ensembleIni, calibrator); - writer.WriteLine(ensembleIni); + return ensembleIni; } /// diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index 9dd59463ce..aae0497190 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -1032,6 +1032,10 @@ private void GetFeatureContributions(in VBuffer features, ref VBuffer !line.StartsWith("SplitGain=")); + ini = string.Join("\n", goodLines); + writer.WriteLine(ini); } // GAM bins should be converted to balanced trees / binary search trees From 30e20be2dac0fd2e01271c8ce1d6854ba1430423 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Wed, 19 Dec 2018 11:14:34 -0800 Subject: [PATCH 14/20] Reverted changes to program.cs --- docs/samples/Microsoft.ML.Samples/Program.cs | 95 +------------------- 1 file changed, 2 insertions(+), 93 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index f53a7c9345..d64bdb55a1 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -1,103 +1,12 @@ -using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.Training; -using Microsoft.ML.Samples.Dynamic; -using System.Collections.Generic; -using System; -using System.IO; -using Microsoft.ML.Runtime.Tools; -using Microsoft.ML.Runtime.Internal.Utilities; -using System.Linq; +using Microsoft.ML.Samples.Dynamic; namespace Microsoft.ML.Samples { internal static class Program { - static IHostEnvironment Host = new MLContext(); - static void Main(string[] args) { - //MakeBinarySearchTree(10); - TestGam(); - } - - - private static (int[], int[], int[]) MakeBinarySearchTree(int numInternalNodes) - { - var binIndices = Enumerable.Range(0, numInternalNodes).ToArray(); - var bstIndices = new List(); - var lteChild = new List(); - var gtChild = new List(); - var internalNodeId = numInternalNodes; - - MakeBinarySearchTreeRecursive(binIndices, 0, binIndices.Length - 1, bstIndices, lteChild, gtChild, ref internalNodeId); - var ret = (bstIndices.ToArray(), lteChild.ToArray(), gtChild.ToArray()); - return ret; - } - - private static int MakeBinarySearchTreeRecursive( - int[] array, int lower, int upper, - List bstIndices, List lteChild, List gtChild, ref int internalNodeId) - { - if (lower > upper) - { - // Base case: we've reached a leaf node - Assert(lower == upper + 1); - return lower + 100; - } - else - { - var mid = (lower + upper) / 2; - var left = MakeBinarySearchTreeRecursive( - array, lower, mid - 1, bstIndices, lteChild, gtChild, ref internalNodeId); - var right = MakeBinarySearchTreeRecursive( - array, mid + 1, upper, bstIndices, lteChild, gtChild, ref internalNodeId); - bstIndices.Insert(0, array[mid]); - lteChild.Insert(0, left); - gtChild.Insert(0, right); - return --internalNodeId; - } - } - - private static void Assert(bool v) - { - if (!v) - throw new NotImplementedException(); - } - - private static void TestGam() - { - var mlContext = new MLContext(seed: 0, conc: 0); - - var idv = mlContext.Data.CreateTextReader( - new TextLoader.Arguments() - { - HasHeader = false, - Column = new[] - { - new TextLoader.Column("Label", DataKind.R4, 0), - new TextLoader.Column("F1", DataKind.R4, 1), - new TextLoader.Column("F3", DataKind.R4, 3), - new TextLoader.Column("F6", DataKind.R4, 6), - new TextLoader.Column("F7", DataKind.R4, 7), - new TextLoader.Column("F9", DataKind.R4, 9), - } - }).Read(@"F:\temp\ini\data\breast-cancer-noNan.txt"); - - var pipeline = mlContext.Transforms.Concatenate("Features", "F1", "F9", "F7", "F6") - .Append(mlContext.Regression.Trainers.GeneralizedAdditiveModels()); - - var model = pipeline.Fit(idv); - var data = model.Transform(idv); - - var roleMappedSchema = new RoleMappedSchema(data.Schema, false, - new KeyValuePair(RoleMappedSchema.ColumnRole.Feature, "Features"), - new KeyValuePair(RoleMappedSchema.ColumnRole.Label, "Label")); - - using (StreamWriter writer = new StreamWriter(@"F:\temp\ini\model2.ini")) - model.LastTransformer.Model.SaveAsIni(writer, roleMappedSchema); - - var results = mlContext.Regression.Evaluate(data); + TensorFlowTransformExample.TensorFlowScoringSample(); } } } From 81fcdec619ecca096f56d1645c075727f4c226ba Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Wed, 19 Dec 2018 11:58:29 -0800 Subject: [PATCH 15/20] Minor touch-ups. --- src/Microsoft.ML.FastTree/GamTrainer.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index aae0497190..bb8fbb3432 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -1049,11 +1049,11 @@ public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator ca var splitFeatures = Enumerable.Repeat(featureIndex, numInternalNodes).ToArray(); var (treeThresholds, lteChild, gtChild) = CreateBalancedTree(numInternalNodes, binThresholds); - var tree = CreateRegressionTree(numLeaves, splitFeatures, treeThresholds, lteChild, gtChild.ToArray(), effects); + var tree = CreateRegressionTree(numLeaves, splitFeatures, treeThresholds, lteChild, gtChild, effects); ensemble.AddTree(tree); } - // Tried adding the intercept as the bias term for the final ini aggregator, + // Tried adding the intercept as the bias term in the final ini aggregator, // but that didn't seem to have any effects during testing. // Adding the intercept as a dummy tree with the output values being the model intercept, // works for reaching parity. From 39eb42942245c9402ea81a7d3096c1431722f6c6 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Wed, 26 Dec 2018 12:39:06 -0800 Subject: [PATCH 16/20] Fixed the build errors --- src/Microsoft.ML.FastTree/GamTrainer.cs | 4 ++-- test/Microsoft.ML.Predictor.Tests/TestIniModels.cs | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index 9702269510..f965fd2c34 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -1,7 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. - +using Microsoft.ML.Calibrator; using Microsoft.ML.Core.Data; using Microsoft.ML.Data; using Microsoft.ML.Runtime; @@ -1031,7 +1031,7 @@ private void GetFeatureContributions(in VBuffer features, ref VBuffer(RoleMappedSchema.ColumnRole.Label, "Label")); var calibratedPredictor = model.LastTransformer.Model as CalibratedPredictor; - var predictor = calibratedPredictor.SubPredictor as BinaryClassificationGamPredictor; + ICanSaveInIniFormat predictor = calibratedPredictor.SubPredictor as BinaryClassificationGamPredictor; string modelIniPath = GetOutputPath(FullTestName + "-model.ini"); using (Stream iniStream = File.Create(modelIniPath)) From 1e0e3fa5f12221369fa35c2cb0e271318400ec9d Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Fri, 28 Dec 2018 10:33:23 -0800 Subject: [PATCH 17/20] Adressed PR comments about removing extra checks. --- src/Microsoft.ML.FastTree/FastTree.cs | 8 ++------ src/Microsoft.ML.FastTree/GamTrainer.cs | 5 ----- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index 0acb600da3..d4f4683021 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -2985,12 +2985,9 @@ void ICanSaveInTextFormat.SaveAsText(TextWriter writer, RoleMappedSchema schema) void ICanSaveInIniFormat.SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator calibrator) { Host.CheckValue(writer, nameof(writer)); - Host.CheckValue(schema, nameof(schema)); - Host.CheckValueOrNull(calibrator); - - var ini = FastTreeIniFormatUtils.TreeEnsembleToIni(Host, TrainedEnsemble, schema, calibrator, + var ensembleIni = FastTreeIniFormatUtils.TreeEnsembleToIni(Host, TrainedEnsemble, schema, calibrator, InnerArgs, appendFeatureGain: true, includeZeroGainFeatures: false); - writer.WriteLine(ini); + writer.WriteLine(ensembleIni); } JToken ISingleCanSavePfa.SaveAsPfa(BoundPfaContext ctx, JToken input) @@ -3384,7 +3381,6 @@ public static string TreeEnsembleToIni( { host.CheckValue(ensemble, nameof(ensemble)); host.CheckValue(schema, nameof(schema)); - host.CheckValueOrNull(calibrator); string ensembleIni = ensemble.ToTreeEnsembleIni(new FeaturesToContentMap(schema), trainingParams, appendFeatureGain, includeZeroGainFeatures); diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index b749fad1e4..d8adb3c364 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -1032,9 +1032,6 @@ private void GetFeatureContributions(in VBuffer features, ref VBuffer Date: Fri, 28 Dec 2018 10:50:06 -0800 Subject: [PATCH 18/20] Moved ini utils to separate file. --- src/Microsoft.ML.FastTree/FastTree.cs | 44 +-------------- src/Microsoft.ML.FastTree/GamTrainer.cs | 2 +- .../Utils/FastTreeIniFileUtils.cs | 53 +++++++++++++++++++ 3 files changed, 55 insertions(+), 44 deletions(-) create mode 100644 src/Microsoft.ML.FastTree/Utils/FastTreeIniFileUtils.cs diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index d4f4683021..9c5e7ec4cd 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -2985,7 +2985,7 @@ void ICanSaveInTextFormat.SaveAsText(TextWriter writer, RoleMappedSchema schema) void ICanSaveInIniFormat.SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator calibrator) { Host.CheckValue(writer, nameof(writer)); - var ensembleIni = FastTreeIniFormatUtils.TreeEnsembleToIni(Host, TrainedEnsemble, schema, calibrator, + var ensembleIni = FastTreeIniFileUtils.TreeEnsembleToIni(Host, TrainedEnsemble, schema, calibrator, InnerArgs, appendFeatureGain: true, includeZeroGainFeatures: false); writer.WriteLine(ensembleIni); } @@ -3373,46 +3373,4 @@ public TreeNode(Dictionary keyValues) public Dictionary KeyValues { get; } } } - internal static class FastTreeIniFormatUtils - { - public static string TreeEnsembleToIni( - IHost host, TreeEnsemble ensemble, RoleMappedSchema schema, ICalibrator calibrator, - string trainingParams, bool appendFeatureGain, bool includeZeroGainFeatures) - { - host.CheckValue(ensemble, nameof(ensemble)); - host.CheckValue(schema, nameof(schema)); - - string ensembleIni = ensemble.ToTreeEnsembleIni(new FeaturesToContentMap(schema), - trainingParams, appendFeatureGain, includeZeroGainFeatures); - ensembleIni = AddCalibrationToIni(host, ensembleIni, calibrator); - return ensembleIni; - } - - /// - /// Get the calibration summary in INI format - /// - private static string AddCalibrationToIni(IHost host, string ini, ICalibrator calibrator) - { - host.AssertValue(ini); - host.AssertValueOrNull(calibrator); - - if (calibrator == null) - return ini; - - if (calibrator is PlattCalibrator) - { - string calibratorEvaluatorIni = IniFileUtils.GetCalibratorEvaluatorIni(ini, calibrator as PlattCalibrator); - return IniFileUtils.AddEvaluator(ini, calibratorEvaluatorIni); - } - else - { - StringBuilder newSection = new StringBuilder(); - newSection.AppendLine(); - newSection.AppendLine(); - newSection.AppendLine("[TLCCalibration]"); - newSection.AppendLine("Type=" + calibrator.GetType().Name); - return ini + newSection; - } - } - } } diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index d8adb3c364..cd250b5216 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -1060,7 +1060,7 @@ void ICanSaveInIniFormat.SaveAsIni(TextWriter writer, RoleMappedSchema schema, I leafValues: new[] { Intercept, Intercept }); ensemble.AddTree(interceptTree); - var ini = FastTreeIniFormatUtils.TreeEnsembleToIni( + var ini = FastTreeIniFileUtils.TreeEnsembleToIni( Host, ensemble, schema, calibrator, string.Empty, false, false); // Remove the SplitGain values which are all 0. diff --git a/src/Microsoft.ML.FastTree/Utils/FastTreeIniFileUtils.cs b/src/Microsoft.ML.FastTree/Utils/FastTreeIniFileUtils.cs new file mode 100644 index 0000000000..f6783c1c54 --- /dev/null +++ b/src/Microsoft.ML.FastTree/Utils/FastTreeIniFileUtils.cs @@ -0,0 +1,53 @@ +using System; +using System.Collections.Generic; +using System.Text; +using Microsoft.ML.Calibrator; +using Microsoft.ML.Data; +using Microsoft.ML.Internal.Calibration; +using Microsoft.ML.Internal.Utilities; + +namespace Microsoft.ML.Trainers.FastTree.Internal +{ + internal static class FastTreeIniFileUtils + { + public static string TreeEnsembleToIni( + IHost host, TreeEnsemble ensemble, RoleMappedSchema schema, ICalibrator calibrator, + string trainingParams, bool appendFeatureGain, bool includeZeroGainFeatures) + { + host.CheckValue(ensemble, nameof(ensemble)); + host.CheckValue(schema, nameof(schema)); + + string ensembleIni = ensemble.ToTreeEnsembleIni(new FeaturesToContentMap(schema), + trainingParams, appendFeatureGain, includeZeroGainFeatures); + ensembleIni = AddCalibrationToIni(host, ensembleIni, calibrator); + return ensembleIni; + } + + /// + /// Get the calibration summary in INI format + /// + private static string AddCalibrationToIni(IHost host, string ini, ICalibrator calibrator) + { + host.AssertValue(ini); + host.AssertValueOrNull(calibrator); + + if (calibrator == null) + return ini; + + if (calibrator is PlattCalibrator) + { + string calibratorEvaluatorIni = IniFileUtils.GetCalibratorEvaluatorIni(ini, calibrator as PlattCalibrator); + return IniFileUtils.AddEvaluator(ini, calibratorEvaluatorIni); + } + else + { + StringBuilder newSection = new StringBuilder(); + newSection.AppendLine(); + newSection.AppendLine(); + newSection.AppendLine("[TLCCalibration]"); + newSection.AppendLine("Type=" + calibrator.GetType().Name); + return ini + newSection; + } + } + } +} From 8022e33d4ed2bc208f64377719d68f7da592f859 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Fri, 28 Dec 2018 11:26:35 -0800 Subject: [PATCH 19/20] Addressed PR comments. --- src/Microsoft.ML.FastTree/GamTrainer.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index cd250b5216..d0e431d87b 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -1107,6 +1107,7 @@ private int CreateBalancedTreeRecursive(int lower, int upper, // This is postorder traversal algorithm and populating the internalNodeIndices/lte/gt lists in reverse. // Preorder is the only option, because we need the results of both left/right recursions for populating the lists. // As a result, lists are populated in reverse, because the root node should be the first item on the lists. + // Binary search tree algorithm (recursive splitting to half) is used for creating balanced tree. var mid = (lower + upper) / 2; var left = CreateBalancedTreeRecursive( lower, mid - 1, internalNodeIndices, lteChild, gtChild, ref internalNodeId); From 2728c791734be0fe81956603044b1fa81445c0b3 Mon Sep 17 00:00:00 2001 From: Shahab Moradi Date: Wed, 2 Jan 2019 12:16:33 -0800 Subject: [PATCH 20/20] Addressed Artidoro's comments --- src/Microsoft.ML.FastTree/GamTrainer.cs | 1 + src/Microsoft.ML.FastTree/Utils/FastTreeIniFileUtils.cs | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.FastTree/GamTrainer.cs b/src/Microsoft.ML.FastTree/GamTrainer.cs index d0e431d87b..3c6edf0acc 100644 --- a/src/Microsoft.ML.FastTree/GamTrainer.cs +++ b/src/Microsoft.ML.FastTree/GamTrainer.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. + using System; using System.Collections.Generic; using System.IO; diff --git a/src/Microsoft.ML.FastTree/Utils/FastTreeIniFileUtils.cs b/src/Microsoft.ML.FastTree/Utils/FastTreeIniFileUtils.cs index f6783c1c54..1a2a35136b 100644 --- a/src/Microsoft.ML.FastTree/Utils/FastTreeIniFileUtils.cs +++ b/src/Microsoft.ML.FastTree/Utils/FastTreeIniFileUtils.cs @@ -1,5 +1,7 @@ -using System; -using System.Collections.Generic; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + using System.Text; using Microsoft.ML.Calibrator; using Microsoft.ML.Data;