Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Public API for Tree predictors #1837

Merged
merged 6 commits into from
Dec 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions docs/samples/Microsoft.ML.Samples/Dynamic/FastTreeRegression.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.Data;
using System;
using System.Collections.Generic;
using System.Linq;

namespace Microsoft.ML.Samples.Dynamic
{
public class FastTreeRegressionExample
{
public static void FastTreeRegression()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var ml = new MLContext();

// Get a small dataset as an IEnumerable and convert it to an IDataView.
var data = SamplesUtils.DatasetUtils.GetInfertData();
var trainData = ml.CreateStreamingDataView(data);

// Preview of the data.
//
// Age Case Education Induced Parity PooledStratum RowNum ...
// 26 1 0-5yrs 1 6 3 1 ...
// 42 1 0-5yrs 1 1 1 2 ...
// 39 1 0-5yrs 2 6 4 3 ...
// 34 1 0-5yrs 2 4 2 4 ...
// 35 1 6-11yrs 1 3 32 5 ...

// A pipeline for concatenating the Parity and Induced columns together in the Features column.
// We will train a FastTreeRegression model with 1 tree on these two columns to predict Age.
string outputColumnName = "Features";
var pipeline = ml.Transforms.Concatenate(outputColumnName, new[] { "Parity", "Induced" })
.Append(ml.Regression.Trainers.FastTree(labelColumn: "Age", featureColumn: outputColumnName, numTrees: 1, numLeaves: 2, minDatapointsInLeaves: 1));

var model = pipeline.Fit(trainData);

// Get the trained model parameters.
var modelParams = model.LastTransformer.Model;

// Let's see where an example with Parity = 1 and Induced = 1 would end up in the single trained tree.
var testRow = new VBuffer<float>(2, new[] { 1.0f, 1.0f });
// Use the path object to pass to GetLeaf, which will populate path with the IDs of th nodes from root to leaf.
List<int> path = default;
najeeb-kazmi marked this conversation as resolved.
Show resolved Hide resolved
// Get the ID of the leaf this example ends up in tree 0.
var leafID = modelParams.GetLeaf(0, in testRow, ref path);
// Get the leaf value for this leaf ID in tree 0.
var leafValue = modelParams.GetLeafValue(0, leafID);
Console.WriteLine("The leaf value in tree 0 is: " + leafValue);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public static void FastTreeRegression()
var data = reader.Read(dataFile);

// The predictor that gets produced out of training
FastTreeRegressionPredictor pred = null;
FastTreeRegressionModelParameters pred = null;

// Create the estimator
var learningPipeline = reader.MakeNewEstimator()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public static void LightGbmRegression()
var (trainData, testData) = mlContext.Regression.TrainTestSplit(data, testFraction: 0.1);

// The predictor that gets produced out of training
LightGbmRegressionPredictor pred = null;
LightGbmRegressionModelParameters pred = null;

// Create the estimator
var learningPipeline = reader.MakeNewEstimator()
Expand Down
24 changes: 16 additions & 8 deletions src/Microsoft.ML.Data/Dirty/PredictorInterfaces.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ public interface IParameterMixer<TOutput>
/// Predictor that can specialize for quantile regression. It will produce a <see cref="ISchemaBindableMapper"/>, given
/// an array of quantiles.
/// </summary>
public interface IQuantileRegressionPredictor
[BestFriend]
internal interface IQuantileRegressionPredictor
{
ISchemaBindableMapper CreateMapper(Double[] quantiles);
}
Expand All @@ -59,7 +60,8 @@ public interface IDistribution<out TResult>
}

// REVIEW: How should this quantile stuff work?
public interface IQuantileValueMapper
[BestFriend]
internal interface IQuantileValueMapper
{
ValueMapper<VBuffer<Float>, VBuffer<Float>> GetMapper(Float[] quantiles);
}
Expand Down Expand Up @@ -101,15 +103,17 @@ internal interface ICanSaveInTextFormat
/// <summary>
/// Predictors that can output themselves in the Bing ini format.
/// </summary>
public interface ICanSaveInIniFormat
[BestFriend]
internal interface ICanSaveInIniFormat
{
void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator calibrator = null);
}

/// <summary>
/// Predictors that can output Summary.
/// </summary>
public interface ICanSaveSummary
[BestFriend]
internal interface ICanSaveSummary
{
void SaveSummary(TextWriter writer, RoleMappedSchema schema);
}
Expand All @@ -119,15 +123,17 @@ public interface ICanSaveSummary
/// The content of value 'object' can be any type such as integer, float, string or an array of them.
/// It is up the caller to check and decide how to consume the values.
/// </summary>
public interface ICanGetSummaryInKeyValuePairs
[BestFriend]
internal interface ICanGetSummaryInKeyValuePairs
{
/// <summary>
/// Gets model summary including model statistics (if exists) in key value pairs.
/// </summary>
IList<KeyValuePair<string, object>> GetSummaryInKeyValuePairs(RoleMappedSchema schema);
}

public interface ICanGetSummaryAsIRow
[BestFriend]
internal interface ICanGetSummaryAsIRow
{
Row GetSummaryIRowOrNull(RoleMappedSchema schema);

Expand All @@ -142,7 +148,8 @@ public interface ICanGetSummaryAsIDataView
/// <summary>
/// Predictors that can output themselves in C#/C++ code.
/// </summary>
public interface ICanSaveInSourceCode
[BestFriend]
internal interface ICanSaveInSourceCode
{
void SaveAsCode(TextWriter writer, RoleMappedSchema schema);
}
Expand Down Expand Up @@ -178,7 +185,8 @@ public interface IPredictorWithFeatureWeights<out TResult> : IHaveFeatureWeights
/// Interface for mapping input values to corresponding feature contributions.
/// This interface is commonly implemented by predictors.
/// </summary>
public interface IFeatureContributionMapper : IPredictor
[BestFriend]
internal interface IFeatureContributionMapper : IPredictor
{
/// <summary>
/// Get a delegate for mapping Contributions to Features.
Expand Down
24 changes: 12 additions & 12 deletions src/Microsoft.ML.Data/Prediction/Calibrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ protected CalibratedPredictorBase(IHostEnvironment env, string name, IPredictorP
Calibrator = calibrator;
}

public void SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator calibrator = null)
void ICanSaveInIniFormat.SaveAsIni(TextWriter writer, RoleMappedSchema schema, ICalibrator calibrator)
{
Host.Check(calibrator == null, "Too many calibrators.");
var saver = SubPredictor as ICanSaveInIniFormat;
Expand All @@ -167,15 +167,15 @@ void ICanSaveInTextFormat.SaveAsText(TextWriter writer, RoleMappedSchema schema)
saver.SaveAsText(writer, schema);
}

public void SaveAsCode(TextWriter writer, RoleMappedSchema schema)
void ICanSaveInSourceCode.SaveAsCode(TextWriter writer, RoleMappedSchema schema)
{
// REVIEW: What about the calibrator?
var saver = SubPredictor as ICanSaveInSourceCode;
if (saver != null)
saver.SaveAsCode(writer, schema);
}

public void SaveSummary(TextWriter writer, RoleMappedSchema schema)
void ICanSaveSummary.SaveSummary(TextWriter writer, RoleMappedSchema schema)
{
// REVIEW: What about the calibrator?
var saver = SubPredictor as ICanSaveSummary;
Expand All @@ -184,7 +184,7 @@ public void SaveSummary(TextWriter writer, RoleMappedSchema schema)
}

///<inheritdoc/>
public IList<KeyValuePair<string, object>> GetSummaryInKeyValuePairs(RoleMappedSchema schema)
IList<KeyValuePair<string, object>> ICanGetSummaryInKeyValuePairs.GetSummaryInKeyValuePairs(RoleMappedSchema schema)
{
// REVIEW: What about the calibrator?
var saver = SubPredictor as ICanGetSummaryInKeyValuePairs;
Expand Down Expand Up @@ -221,9 +221,9 @@ public abstract class ValueMapperCalibratedPredictorBase : CalibratedPredictorBa
private readonly IValueMapper _mapper;
private readonly IFeatureContributionMapper _featureContribution;

public ColumnType InputType => _mapper.InputType;
public ColumnType OutputType => _mapper.OutputType;
public ColumnType DistType => NumberType.Float;
ColumnType IValueMapper.InputType => _mapper.InputType;
ColumnType IValueMapper.OutputType => _mapper.OutputType;
ColumnType IValueMapperDist.DistType => NumberType.Float;
bool ICanSavePfa.CanSavePfa => (_mapper as ICanSavePfa)?.CanSavePfa == true;
bool ICanSaveOnnx.CanSaveOnnx(OnnxContext ctx) => (_mapper as ICanSaveOnnx)?.CanSaveOnnx(ctx) == true;

Expand All @@ -239,16 +239,16 @@ protected ValueMapperCalibratedPredictorBase(IHostEnvironment env, string name,
_featureContribution = predictor as IFeatureContributionMapper;
}

public ValueMapper<TIn, TOut> GetMapper<TIn, TOut>()
ValueMapper<TIn, TOut> IValueMapper.GetMapper<TIn, TOut>()
{
return _mapper.GetMapper<TIn, TOut>();
}

public ValueMapper<TIn, TOut, TDist> GetMapper<TIn, TOut, TDist>()
ValueMapper<TIn, TOut, TDist> IValueMapperDist.GetMapper<TIn, TOut, TDist>()
{
Host.Check(typeof(TOut) == typeof(Float));
Host.Check(typeof(TDist) == typeof(Float));
var map = GetMapper<TIn, Float>();
var map = ((IValueMapper)this).GetMapper<TIn, Float>();
ValueMapper<TIn, Float, Float> del =
(in TIn src, ref Float score, ref Float prob) =>
{
Expand All @@ -258,7 +258,7 @@ public ValueMapper<TIn, TOut, TDist> GetMapper<TIn, TOut, TDist>()
return (ValueMapper<TIn, TOut, TDist>)(Delegate)del;
}

public ValueMapper<TSrc, VBuffer<Float>> GetFeatureContributionMapper<TSrc, TDst>(int top, int bottom, bool normalize)
ValueMapper<TSrc, VBuffer<Float>> IFeatureContributionMapper.GetFeatureContributionMapper<TSrc, TDst>(int top, int bottom, bool normalize)
{
// REVIEW: checking this a bit too late.
Host.Check(_featureContribution != null, "Predictor does not implement IFeatureContributionMapper");
Expand Down Expand Up @@ -682,7 +682,7 @@ public ISchemaBoundMapper Bind(IHostEnvironment env, RoleMappedSchema schema)
return new Bound(Host, this, schema);
}

public ValueMapper<TSrc, VBuffer<float>> GetFeatureContributionMapper<TSrc, TDst>(int top, int bottom, bool normalize)
ValueMapper<TSrc, VBuffer<float>> IFeatureContributionMapper.GetFeatureContributionMapper<TSrc, TDst>(int top, int bottom, bool normalize)
{
// REVIEW: checking this a bit too late.
Host.Check(_featureContribution != null, "Predictor does not implement " + nameof(IFeatureContributionMapper));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ private ValueGetter<TDst> GetValueGetter<TSrc, TDst>(Row input, int colSrc)
};
}

public void SaveSummary(TextWriter writer, RoleMappedSchema schema)
void ICanSaveSummary.SaveSummary(TextWriter writer, RoleMappedSchema schema)
{
var summarySaver = Predictor as ICanSaveSummary;
if (summarySaver == null)
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Ensemble/PipelineEnsemble.cs
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ public static SchemaBindablePipelineEnsembleBase Create(IHostEnvironment env, Mo

public abstract ISchemaBoundMapper Bind(IHostEnvironment env, RoleMappedSchema schema);

public void SaveSummary(TextWriter writer, RoleMappedSchema schema)
void ICanSaveSummary.SaveSummary(TextWriter writer, RoleMappedSchema schema)
{
for (int i = 0; i < PredictorModels.Length; i++)
{
Expand Down Expand Up @@ -688,7 +688,7 @@ private static bool AreEqual<T>(in VBuffer<T> v1, in VBuffer<T> v2)
/// - If neither of those interfaces are implemented then the value is a string containing the name of the type of model.
/// </summary>
/// <returns></returns>
public IList<KeyValuePair<string, object>> GetSummaryInKeyValuePairs(RoleMappedSchema schema)
IList<KeyValuePair<string, object>> ICanGetSummaryInKeyValuePairs.GetSummaryInKeyValuePairs(RoleMappedSchema schema)
{
Host.CheckValueOrNull(schema);

Expand Down
32 changes: 17 additions & 15 deletions src/Microsoft.ML.Ensemble/Trainer/EnsembleDistributionPredictor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ namespace Microsoft.ML.Runtime.Ensemble
public sealed class EnsembleDistributionPredictor : EnsemblePredictorBase<TDistPredictor, Single>,
TDistPredictor, IValueMapperDist
{
public const string UserName = "Ensemble Distribution Executor";
public const string LoaderSignature = "EnsemDbExec";
public const string RegistrationName = "EnsembleDistributionPredictor";
internal const string UserName = "Ensemble Distribution Executor";
internal const string LoaderSignature = "EnsemDbExec";
internal const string RegistrationName = "EnsembleDistributionPredictor";

private static VersionInfo GetVersionInfo()
{
Expand All @@ -45,9 +45,11 @@ private static VersionInfo GetVersionInfo()
private readonly Median _probabilityCombiner;
private readonly IValueMapperDist[] _mappers;

public ColumnType InputType { get; }
public ColumnType OutputType => NumberType.Float;
public ColumnType DistType => NumberType.Float;
private readonly ColumnType _inputType;

ColumnType IValueMapper.InputType => _inputType;
ColumnType IValueMapper.OutputType => NumberType.Float;
ColumnType IValueMapperDist.DistType => NumberType.Float;

public override PredictionKind PredictionKind { get; }

Expand All @@ -57,7 +59,7 @@ internal EnsembleDistributionPredictor(IHostEnvironment env, PredictionKind kind
{
PredictionKind = kind;
_probabilityCombiner = new Median(env);
InputType = InitializeMappers(out _mappers);
_inputType = InitializeMappers(out _mappers);
ComputeAveragedWeights(out _averagedWeights);
}

Expand All @@ -66,7 +68,7 @@ private EnsembleDistributionPredictor(IHostEnvironment env, ModelLoadContext ctx
{
PredictionKind = (PredictionKind)ctx.Reader.ReadInt32();
_probabilityCombiner = new Median(env);
InputType = InitializeMappers(out _mappers);
_inputType = InitializeMappers(out _mappers);
ComputeAveragedWeights(out _averagedWeights);
}

Expand Down Expand Up @@ -101,7 +103,7 @@ private bool IsValid(IValueMapperDist mapper)
&& mapper.DistType == NumberType.Float;
}

public static EnsembleDistributionPredictor Create(IHostEnvironment env, ModelLoadContext ctx)
private static EnsembleDistributionPredictor Create(IHostEnvironment env, ModelLoadContext ctx)
{
Contracts.CheckValue(env, nameof(env));
env.CheckValue(ctx, nameof(ctx));
Expand All @@ -119,7 +121,7 @@ private protected override void SaveCore(ModelSaveContext ctx)
ctx.Writer.Write((int)PredictionKind);
}

public ValueMapper<TIn, TOut> GetMapper<TIn, TOut>()
ValueMapper<TIn, TOut> IValueMapper.GetMapper<TIn, TOut>()
{
Host.Check(typeof(TIn) == typeof(VBuffer<Single>));
Host.Check(typeof(TOut) == typeof(Single));
Expand All @@ -132,8 +134,8 @@ public ValueMapper<TIn, TOut> GetMapper<TIn, TOut>()
ValueMapper<VBuffer<Single>, Single> del =
(in VBuffer<Single> src, ref Single dst) =>
{
if (InputType.VectorSize > 0)
Host.Check(src.Length == InputType.VectorSize);
if (_inputType.VectorSize > 0)
Host.Check(src.Length == _inputType.VectorSize);

var tmp = src;
Parallel.For(0, maps.Length, i =>
Expand All @@ -155,7 +157,7 @@ public ValueMapper<TIn, TOut> GetMapper<TIn, TOut>()
return (ValueMapper<TIn, TOut>)(Delegate)del;
}

public ValueMapper<TIn, TOut, TDist> GetMapper<TIn, TOut, TDist>()
ValueMapper<TIn, TOut, TDist> IValueMapperDist.GetMapper<TIn, TOut, TDist>()
{
Host.Check(typeof(TIn) == typeof(VBuffer<Single>));
Host.Check(typeof(TOut) == typeof(Single));
Expand All @@ -170,8 +172,8 @@ public ValueMapper<TIn, TOut, TDist> GetMapper<TIn, TOut, TDist>()
ValueMapper<VBuffer<Single>, Single, Single> del =
(in VBuffer<Single> src, ref Single score, ref Single prob) =>
{
if (InputType.VectorSize > 0)
Host.Check(src.Length == InputType.VectorSize);
if (_inputType.VectorSize > 0)
Host.Check(src.Length == _inputType.VectorSize);

var tmp = src;
Parallel.For(0, maps.Length, i =>
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Ensemble/Trainer/EnsemblePredictorBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ void ICanSaveInTextFormat.SaveAsText(TextWriter writer, RoleMappedSchema schema)
/// <summary>
/// Saves the model summary
/// </summary>
public void SaveSummary(TextWriter writer, RoleMappedSchema schema)
void ICanSaveSummary.SaveSummary(TextWriter writer, RoleMappedSchema schema)
{
for (int i = 0; i < Models.Length; i++)
{
Expand Down
Loading