From add0cc8b83eed03581562ebb6dd8855a82ffecd1 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Tue, 15 Jan 2019 14:56:38 -0800 Subject: [PATCH] Adding XML Docs to public functions; making accessors public and sealing classes; fixing a serialization error; fixing an error in the sparsity calculation. --- .../GamClassification.cs | 13 +++++++--- .../GamModelParameters.cs | 14 ++++++----- src/Microsoft.ML.FastTree/GamRegression.cs | 25 +++++++++++++++---- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/src/Microsoft.ML.FastTree/GamClassification.cs b/src/Microsoft.ML.FastTree/GamClassification.cs index 0bb84597e1b..bd27d5ed5ba 100644 --- a/src/Microsoft.ML.FastTree/GamClassification.cs +++ b/src/Microsoft.ML.FastTree/GamClassification.cs @@ -156,12 +156,15 @@ protected override SchemaShape.Column[] GetOutputColumnsCore(SchemaShape inputSc } } - public class BinaryClassificationGamModelParameters : GamModelParametersBase, IPredictorProducing + /// + /// The model parameters class for Binary Classification GAMs + /// + public sealed class BinaryClassificationGamModelParameters : GamModelParametersBase, IPredictorProducing { internal const string LoaderSignature = "BinaryClassGamPredictor"; public override PredictionKind PredictionKind => PredictionKind.BinaryClassification; - internal BinaryClassificationGamModelParameters(IHostEnvironment env, + public BinaryClassificationGamModelParameters(IHostEnvironment env, double[][] binUpperBounds, double[][] binEffects, double intercept, int[] featureToInputMap) : base(env, LoaderSignature, binUpperBounds, binEffects, intercept, featureToInputMap) { } @@ -172,8 +175,10 @@ private static VersionInfo GetVersionInfo() { return new VersionInfo( modelSignature: "GAM BINP", - verWrittenCur: 0x00010001, - verReadableCur: 0x00010001, + // verWrittenCur: 0x00010001, // Initial + // verWrittenCur: 0x00010001, // Added Intercept but collided from release 0.6-0.9 + verWrittenCur: 0x00020001, // Added Intercept (version revved to address collisions) + verReadableCur: 0x00020001, verWeCanReadBack: 0x00010001, loaderSignature: LoaderSignature, loaderAssemblyName: typeof(BinaryClassificationGamModelParameters).Assembly.FullName); diff --git a/src/Microsoft.ML.FastTree/GamModelParameters.cs b/src/Microsoft.ML.FastTree/GamModelParameters.cs index 140c12ad034..0756450ab64 100644 --- a/src/Microsoft.ML.FastTree/GamModelParameters.cs +++ b/src/Microsoft.ML.FastTree/GamModelParameters.cs @@ -21,6 +21,9 @@ namespace Microsoft.ML.Trainers.FastTree { + /// + /// The base class for GAM Model Parameters. + /// public abstract class GamModelParametersBase : ModelParametersBase, IValueMapper, ICalculateFeatureContribution, IFeatureContributionMapper, ICanSaveInTextFormat, ICanSaveSummary, ICanSaveInIniFormat { @@ -62,7 +65,7 @@ private protected GamModelParametersBase(IHostEnvironment env, string name, _numFeatures = binEffects.Length; // For sparse inputs we have a fast lookup - _binsAtAllZero = new int[_numFeatures]; // All 0s at 0 -- bug? + _binsAtAllZero = new int[_numFeatures]; _valueAtAllZero = 0; // Walk through each feature and perform checks / updates @@ -73,7 +76,7 @@ private protected GamModelParametersBase(IHostEnvironment env, string name, Host.CheckParam(binUpperBounds[i].Length == binEffects[i].Length, nameof(binEffects), "Array contained wrong number of effect values"); // Update the value at zero - _valueAtAllZero += _binEffects[i][_binsAtAllZero[i]]; + _valueAtAllZero += GetBinEffect(i, 0, out _binsAtAllZero[i]); } _featureMap = featureToInputMap; @@ -104,6 +107,9 @@ protected GamModelParametersBase(IHostEnvironment env, string name, ModelLoadCon _inputLength = reader.ReadInt32(); Host.CheckDecode(_inputLength >= 0); Intercept = reader.ReadDouble(); + if (ctx.Header.ModelVerWritten == 0x00010001) + using (var ch = env.Start("GamWarningChannel")) + ch.Warning("GAMs models written prior to ML.NET 0.6 are loaded with an incorrect Intercept. For these models, subtract the value of the intercept from the prediction."); _binEffects = new double[_numFeatures][]; _binUpperBounds = new double[_numFeatures][]; @@ -116,10 +122,6 @@ protected GamModelParametersBase(IHostEnvironment env, string name, ModelLoadCon for (int i = 0; i < _numFeatures; i++) { _binUpperBounds[i] = reader.ReadDoubleArray(_binEffects[i].Length); - // Ideally should verify that the sum of these matches _baseOutput, - // but due to differences in JIT over time and other considerations, - // it's possible that the sum may change even in the absence of - // model corruption. _valueAtAllZero += GetBinEffect(i, 0, out _binsAtAllZero[i]); } int len = reader.ReadInt32(); diff --git a/src/Microsoft.ML.FastTree/GamRegression.cs b/src/Microsoft.ML.FastTree/GamRegression.cs index edd36ac3264..41addcc9f97 100644 --- a/src/Microsoft.ML.FastTree/GamRegression.cs +++ b/src/Microsoft.ML.FastTree/GamRegression.cs @@ -106,13 +106,26 @@ protected override SchemaShape.Column[] GetOutputColumnsCore(SchemaShape inputSc } } - public class RegressionGamModelParameters : GamModelParametersBase + /// + /// The model parameters class for Binary Classification GAMs + /// + public sealed class RegressionGamModelParameters : GamModelParametersBase { internal const string LoaderSignature = "RegressionGamPredictor"; public override PredictionKind PredictionKind => PredictionKind.Regression; - internal RegressionGamModelParameters(IHostEnvironment env, - double[][] binUpperBounds, double[][] binEffects, double intercept, int[] featureToInputMap) + /// + /// Construct a new Regression GAM with the defined properties. + /// + /// The Host Environment + /// An array of arrays of bin-upper-bounds for each feature. + /// Anay array of arrays of effect sizes for each bin for each feature. + /// The intercept term for the model. Also referred to as the bias or the mean effect. + /// A map from the feature shape functions (as described by the binUpperBounds and BinEffects) + /// to the input feature. Used when multiple input features map to the same shape function. Leave null if all features have + /// a shape function. + public RegressionGamModelParameters(IHostEnvironment env, + double[][] binUpperBounds, double[][] binEffects, double intercept, int[] featureToInputMap = null) : base(env, LoaderSignature, binUpperBounds, binEffects, intercept, featureToInputMap) { } private RegressionGamModelParameters(IHostEnvironment env, ModelLoadContext ctx) @@ -122,8 +135,10 @@ private static VersionInfo GetVersionInfo() { return new VersionInfo( modelSignature: "GAM REGP", - verWrittenCur: 0x00010001, - verReadableCur: 0x00010001, + // verWrittenCur: 0x00010001, // Initial + // verWrittenCur: 0x00010001, // Added Intercept but collided from release 0.6-0.9 + verWrittenCur: 0x00020001, // Added Intercept (version revved to address collisions) + verReadableCur: 0x00020001, verWeCanReadBack: 0x00010001, loaderSignature: LoaderSignature, loaderAssemblyName: typeof(RegressionGamModelParameters).Assembly.FullName);