Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auto.ML: Fix issue when parsing float string fails on pl-PL culture set using Regression Experiment #5163

Merged
merged 14 commits into from Oct 30, 2020
Merged
2 changes: 1 addition & 1 deletion src/Microsoft.ML.AutoML/Sweepers/Parameters.cs
Expand Up @@ -83,7 +83,7 @@ public LongParameterValue(string name, long value)
{
_name = name;
_value = value;
_valueText = _value.ToString("D");
_valueText = _value.ToString("D", CultureInfo.InvariantCulture);
}

public bool Equals(IParameterValue other)
Expand Down
11 changes: 9 additions & 2 deletions src/Microsoft.ML.AutoML/Sweepers/SweeperProbabilityUtils.cs
Expand Up @@ -4,6 +4,7 @@

using System;
using System.Collections.Generic;
using System.Globalization;
using Microsoft.ML.Internal.CpuMath;

namespace Microsoft.ML.AutoML
Expand Down Expand Up @@ -98,13 +99,15 @@ public static float[] ParameterSetAsFloatArray(IValueGenerator[] sweepParams, Pa
}
else if (sweepParam is LongValueGenerator lvg)
{
var longValue = GetIfIParameterValueOfT<long>(pset) ?? long.Parse(pset.ValueText, CultureInfo.InvariantCulture);
// Normalizing all numeric parameters to [0,1] range.
result.Add(lvg.NormalizeValue(new LongParameterValue(pset.Name, long.Parse(pset.ValueText))));
result.Add(lvg.NormalizeValue(new LongParameterValue(pset.Name, longValue)));
}
else if (sweepParam is FloatValueGenerator fvg)
{
var floatValue = GetIfIParameterValueOfT<float>(pset) ?? float.Parse(pset.ValueText, CultureInfo.InvariantCulture);
// Normalizing all numeric parameters to [0,1] range.
result.Add(fvg.NormalizeValue(new FloatParameterValue(pset.Name, float.Parse(pset.ValueText))));
antoniovs1029 marked this conversation as resolved.
Show resolved Hide resolved
result.Add(fvg.NormalizeValue(new FloatParameterValue(pset.Name, floatValue)));
}
else
{
Expand All @@ -115,6 +118,10 @@ public static float[] ParameterSetAsFloatArray(IValueGenerator[] sweepParams, Pa
return result.ToArray();
}

private static T? GetIfIParameterValueOfT<T>(IParameterValue parameterValue)
where T : struct =>
parameterValue is IParameterValue<T> pvt ? pvt.Value : default(T?);
antoniovs1029 marked this conversation as resolved.
Show resolved Hide resolved

public static ParameterSet FloatArrayAsParameterSet(IValueGenerator[] sweepParams, float[] array, bool expandedCategoricals = true)
{
Runtime.Contracts.Assert(array.Length == sweepParams.Length);
antoniovs1029 marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
36 changes: 32 additions & 4 deletions test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs
Expand Up @@ -2,7 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Globalization;
using System.Linq;
using System.Threading;
using Microsoft.ML.Data;
using Microsoft.ML.TestFramework;
using Microsoft.ML.TestFramework.Attributes;
Expand Down Expand Up @@ -102,9 +104,33 @@ private void Context_Log(object sender, LoggingEventArgs e)
//throw new NotImplementedException();
}

[Fact]
public void AutoFitRegressionTest()
[Theory]
[InlineData("en-US")]
[InlineData("ar-SA")]
[InlineData("pl-PL")]
public void AutoFitRegressionTest(string culture)
{
var originalCulture = Thread.CurrentThread.CurrentCulture;
Thread.CurrentThread.CurrentCulture = new CultureInfo(culture);

uint experimentTime = 0;

if (culture == "ar-SA")
antoniovs1029 marked this conversation as resolved.
Show resolved Hide resolved
{
// If users run AutoML with a different local, sometimes
// the sweeper encounters problems when parsing some strings.
// So testing in another culture is necessary.
// Furthermore, these issues might only occur after several
antoniovs1029 marked this conversation as resolved.
Show resolved Hide resolved
// iterations, so more experiment time is needed for this to
// occur.
experimentTime = 30;

}
else if(culture == "pl-PL")
antoniovs1029 marked this conversation as resolved.
Show resolved Hide resolved
{
experimentTime = 100;
}

var context = new MLContext(1);
var dataPath = DatasetUtil.GetMlNetGeneratedRegressionDataset();
var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.MlNetGeneratedRegressionLabel);
Expand All @@ -113,11 +139,13 @@ public void AutoFitRegressionTest()
var validationData = context.Data.TakeRows(trainData, 20);
trainData = context.Data.SkipRows(trainData, 20);
var result = context.Auto()
.CreateRegressionExperiment(0)
.CreateRegressionExperiment(experimentTime)
.Execute(trainData, validationData,
new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel });

Assert.True(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9));
antoniovs1029 marked this conversation as resolved.
Show resolved Hide resolved

Thread.CurrentThread.CurrentCulture = originalCulture;
antoniovs1029 marked this conversation as resolved.
Show resolved Hide resolved
}

[LightGBMFact]
Expand Down Expand Up @@ -351,4 +379,4 @@ private TextLoader.Options GetLoaderArgsRank(string labelColumnName, string grou
};
}
}
}
}