Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scrubbing online learners #2892

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public static void Example()
{
LossFunction = new SmoothedHingeLoss.Options(),
LearningRate = 0.1f,
DoLazyUpdates = false,
LazyUpdates = false,
Copy link
Member

@wschin wschin Mar 9, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
LazyUpdates = false,
LazyUpdate = false,
``` #Resolved

RecencyGain = 0.1f,
NumberOfIterations = 10
Copy link
Member

@wschin wschin Mar 9, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
NumberOfIterations = 10
MaximumNumberOfIterations = 10
``` #ByDesign

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, It's number of iterations.


In reply to: 263972500 [](ancestors = 263972500)

};
Expand Down
26 changes: 13 additions & 13 deletions src/Microsoft.ML.StandardLearners/Standard/Online/AveragedLinear.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@ public abstract class AveragedLinearOptions : OnlineLinearOptions
/// Default is <see langword="true" />.
/// </value>
[Argument(ArgumentType.AtMostOnce, HelpText = "Instead of updating averaged weights on every example, only update when loss is nonzero", ShortName = "lazy")]
public bool DoLazyUpdates = true;
public bool LazyUpdates = true;
Copy link
Member

@wschin wschin Mar 9, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public bool LazyUpdates = true;
public bool LazyUpdate = true;
``` #Resolved


/// <summary>
/// The L2 weight for <a href='tmpurl_regularization'>regularization</a>.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization Weight", ShortName = "reg", SortOrder = 50)]
[Argument(ArgumentType.AtMostOnce, HelpText = "L2 Regularization Weight", ShortName = "reg,L2RegularizerWeight", SortOrder = 50)]
[TGUI(Label = "L2 Regularization Weight")]
[TlcModule.SweepableFloatParam("L2RegularizerWeight", 0.0f, 0.4f)]
public float L2RegularizerWeight = AveragedDefault.L2RegularizerWeight;
public float L2Regularization = AveragedDefault.L2Regularization;

/// <summary>
/// Extra weight given to more recent updates.
Expand All @@ -86,7 +86,7 @@ public abstract class AveragedLinearOptions : OnlineLinearOptions
/// Default is <see langword="false" />.
/// </value>
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether Recency Gain is multiplicative (vs. additive)", ShortName = "rgm")]
public bool RecencyGainMulti = false;
public bool RecencyGainMultiplicative = false;

/// <summary>
/// Determines whether to do averaging or not.
Expand All @@ -109,7 +109,7 @@ internal class AveragedDefault : OnlineLinearOptions.OnlineDefault
{
public const float LearningRate = 1;
public const bool DecreaseLearningRate = false;
public const float L2RegularizerWeight = 0;
public const float L2Regularization = 0;
}

internal abstract IComponentFactory<IScalarOutputLoss> LossFunctionFactory { get; }
Expand Down Expand Up @@ -186,7 +186,7 @@ public override void FinishIteration(IChannel ch)
// Finalize things
if (Averaged)
{
if (_args.DoLazyUpdates && NumNoUpdates > 0)
if (_args.LazyUpdates && NumNoUpdates > 0)
{
// Update the total weights to include the final loss=0 updates
VectorUtils.AddMult(in Weights, NumNoUpdates * WeightsScale, ref TotalWeights);
Expand Down Expand Up @@ -221,10 +221,10 @@ public override void ProcessDataInstance(IChannel ch, in VBuffer<float> feat, fl
// REVIEW: Should this be biasUpdate != 0?
// This loss does not incorporate L2 if present, but the chance of that addition to the loss
// exactly cancelling out loss is remote.
if (loss != 0 || _args.L2RegularizerWeight > 0)
if (loss != 0 || _args.L2Regularization > 0)
{
// If doing lazy weights, we need to update the totalWeights and totalBias before updating weights/bias
if (_args.DoLazyUpdates && _args.Averaged && NumNoUpdates > 0 && TotalMultipliers * _args.AveragedTolerance <= PendingMultipliers)
if (_args.LazyUpdates && _args.Averaged && NumNoUpdates > 0 && TotalMultipliers * _args.AveragedTolerance <= PendingMultipliers)
{
VectorUtils.AddMult(in Weights, NumNoUpdates * WeightsScale, ref TotalWeights);
TotalBias += Bias * NumNoUpdates * WeightsScale;
Expand All @@ -242,7 +242,7 @@ public override void ProcessDataInstance(IChannel ch, in VBuffer<float> feat, fl

// Perform the update to weights and bias.
VectorUtils.AddMult(in feat, biasUpdate / WeightsScale, ref Weights);
WeightsScale *= 1 - 2 * _args.L2RegularizerWeight; // L2 regularization.
WeightsScale *= 1 - 2 * _args.L2Regularization; // L2 regularization.
ScaleWeightsIfNeeded();
Bias += biasUpdate;
PendingMultipliers += Math.Abs(biasUpdate);
Expand All @@ -251,7 +251,7 @@ public override void ProcessDataInstance(IChannel ch, in VBuffer<float> feat, fl
// Add to averaged weights and increment the count.
if (Averaged)
{
if (!_args.DoLazyUpdates)
if (!_args.LazyUpdates)
IncrementAverageNonLazy();
else
NumNoUpdates++;
Expand Down Expand Up @@ -282,7 +282,7 @@ private void IncrementAverageNonLazy()
VectorUtils.AddMult(in Weights, Gain * WeightsScale, ref TotalWeights);
TotalBias += Gain * Bias;
NumWeightUpdates += Gain;
Gain = (_args.RecencyGainMulti ? Gain * _args.RecencyGain : Gain + _args.RecencyGain);
Gain = (_args.RecencyGainMultiplicative ? Gain * _args.RecencyGain : Gain + _args.RecencyGain);

// If gains got too big, rescale!
if (Gain > 1000)
Expand All @@ -303,11 +303,11 @@ private protected AveragedLinearTrainer(AveragedLinearOptions options, IHostEnvi
Contracts.CheckUserArg(!options.ResetWeightsAfterXExamples.HasValue || options.ResetWeightsAfterXExamples > 0, nameof(options.ResetWeightsAfterXExamples), UserErrorPositive);

// Weights are scaled down by 2 * L2 regularization on each update step, so 0.5 would scale all weights to 0, which is not sensible.
Contracts.CheckUserArg(0 <= options.L2RegularizerWeight && options.L2RegularizerWeight < 0.5, nameof(options.L2RegularizerWeight), "must be in range [0, 0.5)");
Contracts.CheckUserArg(0 <= options.L2Regularization && options.L2Regularization < 0.5, nameof(options.L2Regularization), "must be in range [0, 0.5)");
Contracts.CheckUserArg(options.RecencyGain >= 0, nameof(options.RecencyGain), UserErrorNonNegative);
Contracts.CheckUserArg(options.AveragedTolerance >= 0, nameof(options.AveragedTolerance), UserErrorNonNegative);
// Verify user didn't specify parameters that conflict
Contracts.Check(!options.DoLazyUpdates || !options.RecencyGainMulti && options.RecencyGain == 0, "Cannot have both recency gain and lazy updates.");
Contracts.Check(!options.LazyUpdates || !options.RecencyGainMultiplicative && options.RecencyGain == 0, "Cannot have both recency gain and lazy updates.");

AveragedLinearTrainerOptions = options;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,23 +126,23 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, Options options)
/// <param name="featureColumnName">The name of the feature column.</param>
/// <param name="learningRate">The learning rate. </param>
/// <param name="decreaseLearningRate">Whether to decrease learning rate as iterations progress.</param>
/// <param name="l2RegularizerWeight">L2 Regularization Weight.</param>
/// <param name="l2Regularization">Weight of L2 regularization term.</param>
/// <param name="numIterations">The number of training iterations.</param>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// <param name="numIterations">The number of training iterations.</param>
/// <param name="maximumNumberOfIterations">The number of training iterations.</param>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get your suggestion.


In reply to: 263972642 [](ancestors = 263972642)

internal AveragedPerceptronTrainer(IHostEnvironment env,
string labelColumnName = DefaultColumnNames.Label,
string featureColumnName = DefaultColumnNames.Features,
IClassificationLoss lossFunction = null,
float learningRate = Options.AveragedDefault.LearningRate,
bool decreaseLearningRate = Options.AveragedDefault.DecreaseLearningRate,
float l2RegularizerWeight = Options.AveragedDefault.L2RegularizerWeight,
float l2Regularization = Options.AveragedDefault.L2Regularization,
int numIterations = Options.AveragedDefault.NumIterations)
: this(env, new Options
{
LabelColumnName = labelColumnName,
FeatureColumnName = featureColumnName,
LearningRate = learningRate,
DecreaseLearningRate = decreaseLearningRate,
L2RegularizerWeight = l2RegularizerWeight,
L2Regularization = l2Regularization,
NumberOfIterations = numIterations,
LossFunction = new TrivialFactory(lossFunction ?? new HingeLoss())
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public sealed class Options : OnlineLinearOptions
/// Column to use for example weight.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Column to use for example weight", ShortName = "weight", SortOrder = 4, Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly)]
public string WeightColumn = null;
public string ExampleWeightColumnName = null;
}

private sealed class TrainState : TrainStateBase
Expand Down Expand Up @@ -232,19 +232,19 @@ public override LinearBinaryModelParameters CreatePredictor()
/// <param name="env">The environment to use.</param>
/// <param name="labelColumn">The name of the label column. </param>
/// <param name="featureColumn">The name of the feature column.</param>
/// <param name="weightColumn">The optional name of the weight column.</param>
/// <param name="exampleWeightColumnName">The name of the example weight column (optional).</param>
/// <param name="numIterations">The number of training iteraitons.</param>
[BestFriend]
internal LinearSvmTrainer(IHostEnvironment env,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weightColumn = null,
string exampleWeightColumnName = null,
int numIterations = Options.OnlineDefault.NumIterations)
: this(env, new Options
{
LabelColumnName = labelColumn,
FeatureColumnName = featureColumn,
WeightColumn = weightColumn,
ExampleWeightColumnName = exampleWeightColumnName,
NumberOfIterations = numIterations,
})
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,22 +95,22 @@ public override LinearRegressionModelParameters CreatePredictor()
/// <param name="featureColumn">Name of the feature column.</param>
/// <param name="learningRate">The learning Rate.</param>
/// <param name="decreaseLearningRate">Decrease learning rate as iterations progress.</param>
/// <param name="l2RegularizerWeight">L2 Regularization Weight.</param>
/// <param name="l2Regularization">Weight of L2 regularization term.</param>
/// <param name="numIterations">Number of training iterations through the data.</param>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// <param name="numIterations">Number of training iterations through the data.</param>
/// <param name="maximumNumberOfIterations">Number of training iterations through the data.</param>

Please also check other online learners' APIs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see any stop rule in OnlineLinearTrainer.TrainCore but maybe it just me.


In reply to: 263972702 [](ancestors = 263972702)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am too lazy to dig into the code, but my suggestion just reflects this parameter's description.


In reply to: 263975389 [](ancestors = 263975389,263972702)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But you suggest same description as I have right now....


In reply to: 264346723 [](ancestors = 264346723,263975389,263972702)

/// <param name="lossFunction">The custom loss functions. Defaults to <see cref="SquaredLoss"/> if not provided.</param>
internal OnlineGradientDescentTrainer(IHostEnvironment env,
Copy link
Member

@wschin wschin Mar 9, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
internal OnlineGradientDescentTrainer(IHostEnvironment env,
internal OnlineGradientTrainer(IHostEnvironment env,

A descent algorithm ensures the decrease of function value per iteration. However, this is not true for most stochastic gradient learners. #Pending

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get your comment at all.


In reply to: 263972819 [](ancestors = 263972819)

string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
float learningRate = Options.OgdDefaultArgs.LearningRate,
bool decreaseLearningRate = Options.OgdDefaultArgs.DecreaseLearningRate,
float l2RegularizerWeight = Options.OgdDefaultArgs.L2RegularizerWeight,
float l2Regularization = Options.OgdDefaultArgs.L2Regularization,
int numIterations = Options.OgdDefaultArgs.NumIterations,
IRegressionLoss lossFunction = null)
: this(env, new Options
{
LearningRate = learningRate,
DecreaseLearningRate = decreaseLearningRate,
L2RegularizerWeight = l2RegularizerWeight,
L2Regularization= l2Regularization,
NumberOfIterations = numIterations,
LabelColumnName = labelColumn,
FeatureColumnName = featureColumn,
Expand Down
12 changes: 6 additions & 6 deletions src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ public static class StandardLearnersCatalog
/// <see langword="true" /> to decrease the <paramref name="learningRate"/> as iterations progress; otherwise, <see langword="false" />.
/// Default is <see langword="false" />.
/// </param>
/// <param name="l2RegularizerWeight">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
/// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
/// <param name="numIterations">Number of passes through the training dataset.</param>
/// <example>
/// <format type="text/markdown">
Expand All @@ -356,13 +356,13 @@ public static class StandardLearnersCatalog
IClassificationLoss lossFunction = null,
float learningRate = AveragedLinearOptions.AveragedDefault.LearningRate,
bool decreaseLearningRate = AveragedLinearOptions.AveragedDefault.DecreaseLearningRate,
float l2RegularizerWeight = AveragedLinearOptions.AveragedDefault.L2RegularizerWeight,
float l2Regularization = AveragedLinearOptions.AveragedDefault.L2Regularization,
int numIterations = AveragedLinearOptions.AveragedDefault.NumIterations)
{
Contracts.CheckValue(catalog, nameof(catalog));

var env = CatalogUtils.GetEnvironment(catalog);
return new AveragedPerceptronTrainer(env, labelColumnName, featureColumnName, lossFunction ?? new LogLoss(), learningRate, decreaseLearningRate, l2RegularizerWeight, numIterations);
return new AveragedPerceptronTrainer(env, labelColumnName, featureColumnName, lossFunction ?? new LogLoss(), learningRate, decreaseLearningRate, l2Regularization, numIterations);
}

/// <summary>
Expand Down Expand Up @@ -411,20 +411,20 @@ public IClassificationLoss CreateComponent(IHostEnvironment env)
/// <param name="lossFunction">The custom loss. Defaults to <see cref="SquaredLoss"/> if not provided.</param>
/// <param name="learningRate">The learning Rate.</param>
/// <param name="decreaseLearningRate">Decrease learning rate as iterations progress.</param>
/// <param name="l2RegularizerWeight">L2 regularization weight.</param>
/// <param name="l2Regularization">The L2 weight for <a href='tmpurl_regularization'>regularization</a>.</param>
/// <param name="numIterations">Number of training iterations through the data.</param>
public static OnlineGradientDescentTrainer OnlineGradientDescent(this RegressionCatalog.RegressionTrainers catalog,
string labelColumnName = DefaultColumnNames.Label,
string featureColumnName = DefaultColumnNames.Features,
IRegressionLoss lossFunction = null,
float learningRate = OnlineGradientDescentTrainer.Options.OgdDefaultArgs.LearningRate,
bool decreaseLearningRate = OnlineGradientDescentTrainer.Options.OgdDefaultArgs.DecreaseLearningRate,
float l2RegularizerWeight = AveragedLinearOptions.AveragedDefault.L2RegularizerWeight,
float l2Regularization = AveragedLinearOptions.AveragedDefault.L2Regularization,
int numIterations = OnlineLinearOptions.OnlineDefault.NumIterations)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);
return new OnlineGradientDescentTrainer(env, labelColumnName, featureColumnName, learningRate, decreaseLearningRate, l2RegularizerWeight,
return new OnlineGradientDescentTrainer(env, labelColumnName, featureColumnName, learningRate, decreaseLearningRate, l2Regularization,
numIterations, lossFunction);
}

Expand Down