From 3eca87e40574f9e0894fb6cc627d22bd8dfa6d4f Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Mon, 8 Apr 2019 14:15:39 -0700 Subject: [PATCH 1/9] Normalize documentation --- .../Dynamic/Transforms/NormalizeBinning.cs | 82 +++++++++++++++++ .../Transforms/NormalizeLogMeanVariance.cs | 73 +++++++++++++++ .../Transforms/NormalizeMeanVariance.cs | 74 +++++++++++++++ .../Dynamic/Transforms/NormalizeMinMax.cs | 70 +++++++++++++++ .../Transforms/NormalizeSupervisedBinning.cs | 89 +++++++++++++++++++ .../NormalizerCatalog.cs | 60 ++++++++++++- 6 files changed, 446 insertions(+), 2 deletions(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs new file mode 100644 index 0000000000..1e4d1ea89c --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs @@ -0,0 +1,82 @@ +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public class NormalizeBinning + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 8, 1, 3, 0} }, + new DataPoint(){ Features = new float[4] { 6, 2, 2, 0} }, + new DataPoint(){ Features = new float[4] { 4, 0, 1, 0} }, + new DataPoint(){ Features = new float[4] { 2,-1,-1, 1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // NormalizeBinning normalizes the data by constructing equidensity bins and produce output based on + // to which bin original value belong. + var normalize = mlContext.Transforms.NormalizeBinning("Features", maximumBinCount: 4, fixZero: false); + + // NormalizeBinning normalizes the data by constructing equidensity bins and produce output based on + // to which bin original value belong but make sure zero values would remain zero after normalization. + // Helps preserve sparsity. + var normalizeFixZero = mlContext.Transforms.NormalizeBinning("Features", maximumBinCount: 4, fixZero: true); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var normalizeTransform = normalize.Fit(data); + var transformedData = normalizeTransform.Transform(data); + var normalizeFixZeroTransform = normalizeFixZero.Fit(data); + var fixZeroData = normalizeFixZeroTransform.Transform(data); + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // 1.0000, 0.6667, 1.0000, 0.0000 + // 0.6667, 1.0000, 0.6667, 0.0000 + // 0.3333, 0.3333, 0.3333, 0.0000 + // 0.0000, 0.0000, 0.0000, 1.0000 + + var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); + foreach (var row in columnFixZero) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // 1.0000, 0.3333, 1.0000, 0.0000 + // 0.6667, 0.6667, 0.6667, 0.0000 + // 0.3333, 0.0000, 0.3333, 0.0000 + // 0.0000, -0.3333, 0.0000, 1.0000 + + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for function. + // If case if we have multiple column transformations we need to pass index of InputOutputColumnPair. + var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); + Console.WriteLine($"Values for slot 0 would be transfromed by applying y = (Index(x) / {transformParams.Density[0]}) - {(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0])}"); + Console.WriteLine("Where Index(x) is index of bin to which x belongs"); + Console.WriteLine($"Bins upper borders are: {string.Join(" ", transformParams.UpperBounds[0])}"); + // Values for slot 0 would be transfromed by applying y = (Index(x) / 3) - 0 + // Where Index(x) is index of bin to which x belongs + // Bins upper borders are: 3 5 7 ∞ + + var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); + Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); + Console.WriteLine("Where Index(x) is index of bin to which x belongs"); + Console.WriteLine($"Bins upper borders are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); + // Values for slot 1 would be transfromed by applying y = (Index(x) / 3) - 0.3333333 + // Where Index(x) is index of bin to which x belongs + // Bins upper borders are: -0.5 0.5 1.5 ∞ + } + + private class DataPoint + { + [VectorType(4)] + public float[] Features { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs new file mode 100644 index 0000000000..1274568c18 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs @@ -0,0 +1,73 @@ +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public class NormalizeLogMeanVariance + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 1, 1, 3, 0} }, + new DataPoint(){ Features = new float[4] { 2, 2, 2, 0} }, + new DataPoint(){ Features = new float[4] { 0, 0, 1, 0} }, + new DataPoint(){ Features = new float[4] {-1,-1,-1, 1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // NormalizeLogMeanVariance normalizes the data based on the computed mean and variance of the logarithm of the data. + // Uses Cumulative distribution function as output. + var normalize = mlContext.Transforms.NormalizeLogMeanVariance("Features", useCdf: true); + + // NormalizeLogMeanVariance normalizes the data based on the computed mean and variance of the logarithm of the data. + var normalizeNoCdf = mlContext.Transforms.NormalizeLogMeanVariance("Features", useCdf: false); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var normalizeTransform = normalize.Fit(data); + var transformedData = normalizeTransform.Transform(data); + var normalizeNoCdfTransform = normalizeNoCdf.Fit(data); + var noCdfData = normalizeNoCdfTransform.Transform(data); + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // 0.1587, 0.1587, 0.8654, 0.0000 + // 0.8413, 0.8413, 0.5837, 0.0000 + // 0.0000, 0.0000, 0.0940, 0.0000 + // 0.0000, 0.0000, 0.0000, 0.0000 + + var columnFixZero = noCdfData.GetColumn("Features").ToArray(); + foreach (var row in columnFixZero) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // 1.8854, 1.8854, 5.2970, 0.0000 + // 4.7708, 4.7708, 3.0925, 0.0000 + //-1.0000,-1.0000, 0.8879, 0.0000 + // 3.8854,-3.8854,-3.5213, 0.0000 + + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for function. + // If case if we have multiple column transformations we need to pass index of InputOutputColumnPair. + var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.CdfNormalizerModelParameters>); + Console.WriteLine($"Values for slot 1 would be transfromed by applying y= 0.5* (1 + ERF((Math.Log(x)- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))" ); + // ERF is https://en.wikipedia.org/wiki/Error_function. + // Values for slot 1 would be transfromed by applying y= 0.5* (1 + ERF((Math.Log(x)- 0.3465736) / (0.3465736 * sqrt(2))) + + var noCdfParams = (normalizeNoCdfTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.AffineNormalizerModelParameters>); + Console.WriteLine($"Values for slot 1 would be transfromed by applying y= (x - ({(noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1])})) * {noCdfParams.Scale[1]}"); + // Values for slot 1 would be transfromed by applying y= (x - (2.88539)) * 0.3465736 + } + + private class DataPoint + { + [VectorType(4)] + public float[] Features { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs new file mode 100644 index 0000000000..6b67e3d678 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs @@ -0,0 +1,74 @@ +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public class NormalizeMeanVariance + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 1, 1, 3, 0} }, + new DataPoint(){ Features = new float[4] { 2, 2, 2, 0} }, + new DataPoint(){ Features = new float[4] { 0, 0, 1, 0} }, + new DataPoint(){ Features = new float[4] {-1,-1,-1, 1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // NormalizeMeanVariance normalizes the data based on the computed mean and variance of the data. + // Uses Cumulative distribution function as output. + var normalize = mlContext.Transforms.NormalizeMeanVariance("Features", useCdf: true); + + // NormalizeMeanVariance normalizes the data based on the computed mean and variance of the data. + var normalizeNoCdf = mlContext.Transforms.NormalizeMeanVariance("Features", useCdf: false); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var normalizeTransform = normalize.Fit(data); + var transformedData = normalizeTransform.Transform(data); + var normalizeNoCdfTransform = normalizeNoCdf.Fit(data); + var noCdfData = normalizeNoCdfTransform.Transform(data); + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // 0.6726, 0.6726, 0.8816, 0.2819 + // 0.9101, 0.9101, 0.6939, 0.2819 + // 0.3274, 0.3274, 0.4329, 0.2819 + // 0.0899, 0.0899, 0.0641, 0.9584 + + + var columnFixZero = noCdfData.GetColumn("Features").ToArray(); + foreach (var row in columnFixZero) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // 0.8165, 0.8165, 1.5492, 0.0000 + // 1.6330, 1.6330, 1.0328, 0.0000 + // 0.0000, 0.0000, 0.5164, 0.0000 + //-0.8165,-0.8165,-0.5164, 2.0000 + + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for function. + // If case if we have multiple column transformations we need to pass index of InputOutputColumnPair. + var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.CdfNormalizerModelParameters>); + Console.WriteLine($"Values for slot 1 would be transfromed by applying y= 0.5* (1 + ERF((x- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); + // ERF is https://en.wikipedia.org/wiki/Error_function. + // Values for slot 1 would be transfromed by applying y = 0.5 * (1 + ERF((x - 0.5) / (1.118034 * sqrt(2))) + + var noCdfParams = (normalizeNoCdfTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.AffineNormalizerModelParameters>); + Console.WriteLine($"Values for slot 1 would be transfromed by applying y= (x - ({(noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1])})) * {noCdfParams.Scale[1]}"); + // Values for slot 1 would be transfromed by applying y = (x - (0)) * 0.8164966 + } + + private class DataPoint + { + [VectorType(4)] + public float[] Features { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs new file mode 100644 index 0000000000..80d3bd60e3 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs @@ -0,0 +1,70 @@ +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public class NormalizeMinMax + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 1, 1, 3, 0} }, + new DataPoint(){ Features = new float[4] { 2, 2, 2, 0} }, + new DataPoint(){ Features = new float[4] { 0, 0, 1, 0} }, + new DataPoint(){ Features = new float[4] {-1,-1,-1, 1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // NormalizeMinMax normalize rows by finding min and max values in each row slot + // and setting projection of min value to 0 and max to 1 and everything else to + // values in between. + var normalize = mlContext.Transforms.NormalizeMinMax("Features", fixZero: false); + + // Normalize rows by finding min and max values in each row slot, but make sure + // zero values would remain zero after normalization. Helps preserve sparsity. + var normalizeFixZero = mlContext.Transforms.NormalizeMinMax("Features", fixZero: true); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var normalizeTransform = normalize.Fit(data); + var transformedData = normalizeTransform.Transform(data); + var normalizeFixZeroTransform = normalizeFixZero.Fit(data); + var fixZeroData = normalizeFixZeroTransform.Transform(data); + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // 0.6667, 0.6667, 1.0000, 0.0000 + // 1.0000, 1.0000, 0.7500, 0.0000 + // 0.3333, 0.3333, 0.5000, 0.0000 + // 0.0000, 0.0000, 0.0000, 1.0000 + + var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); + foreach (var row in columnFixZero) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // 0.5000, 0.5000, 1.0000, 0.0000 + // 1.0000, 1.0000, 0.6667, 0.0000 + // 0.0000, 0.0000, 0.3333, 0.0000 + //-0.5000,-0.5000,-0.3333, 1.0000 + + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for function. + // If case if we have multiple column transformations we need to pass index of InputOutputColumnPair. + var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.AffineNormalizerModelParameters>); + Console.WriteLine($"Values for slot 1 would be transfromed by applying y= (x - ({(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[1])})) * {transformParams.Scale[1]}"); + // Values for slot 1 would be transfromed by applying y= (x - (-1)) * 0.3333333 + } + + private class DataPoint + { + [VectorType(4)] + public float[] Features { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs new file mode 100644 index 0000000000..0ec2549f1b --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs @@ -0,0 +1,89 @@ +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public class NormalizeSupervisedBinning + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 8, 1, 3, 0}, Bin="Bin1" }, + new DataPoint(){ Features = new float[4] { 6, 2, 2, 1}, Bin="Bin2" }, + new DataPoint(){ Features = new float[4] { 5, 3, 0, 2}, Bin="Bin2" }, + new DataPoint(){ Features = new float[4] { 4,-8, 1, 3}, Bin="Bin3" }, + new DataPoint(){ Features = new float[4] { 2,-5,-1, 4}, Bin="Bin3" } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // Let's transform "Bin" column from string to key. + data = mlContext.Transforms.Conversion.MapValueToKey("Bin").Fit(data).Transform(data); + // NormalizeSupervisedBinning normalizes the data by constructing bins based on correlation with the label column and produce output based on + // to which bin original value belong. + var normalize = mlContext.Transforms.NormalizeSupervisedBinning("Features", labelColumnName: "Bin", mininimumExamplesPerBin: 1, fixZero: false); + + // NormalizeSupervisedBinning normalizes the data by constructing bins based on correlation with the label column and produce output based on + // to which bin original value belong but make sure zero values would remain zero after normalization. + // Helps preserve sparsity. + var normalizeFixZero = mlContext.Transforms.NormalizeSupervisedBinning("Features", labelColumnName: "Bin", mininimumExamplesPerBin: 1, fixZero: true); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var normalizeTransform = normalize.Fit(data); + var transformedData = normalizeTransform.Transform(data); + var normalizeFixZeroTransform = normalizeFixZero.Fit(data); + var fixZeroData = normalizeFixZeroTransform.Transform(data); + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // 1.0000, 0.5000, 1.0000, 0.0000 + // 0.5000, 1.0000, 0.0000, 0.5000 + // 0.5000, 1.0000, 0.0000, 0.5000 + // 0.0000, 0.0000, 0.0000, 1.0000 + // 0.0000, 0.0000, 0.0000, 1.0000 + + var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); + foreach (var row in columnFixZero) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // 1.0000, 0.0000, 1.0000, 0.0000 + // 0.5000, 0.5000, 0.0000, 0.5000 + // 0.5000, 0.5000, 0.0000, 0.5000 + // 0.0000, -0.5000, 0.0000, 1.0000 + // 0.0000, -0.5000, 0.0000, 1.0000 + + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for function. + // If case if we have multiple column transformations we need to pass index of InputOutputColumnPair. + var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); + Console.WriteLine($"Values for slot 0 would be transfromed by applying y = (Index(x) / {transformParams.Density[0]}) - {(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0])}"); + Console.WriteLine("Where Index(x) is index of bin to which x belongs"); + Console.WriteLine($"Bins upper borders are: {string.Join(" ", transformParams.UpperBounds[0])}"); + // Values for slot 0 would be transfromed by applying y = (Index(x) / 2) - 0 + // Where Index(x) is index of bin to which x belongs + // Bins upper borders are: 4.5 7 ∞ + + var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); + Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); + Console.WriteLine("Where Index(x) is index of bin to which x belongs"); + Console.WriteLine($"Bins upper borders are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); + // Values for slot 1 would be transfromed by applying y = (Index(x) / 2) - 0.5 + // Where Index(x) is index of bin to which x belongs + // Bins upper borders are: -2 1.5 ∞ + } + + private class DataPoint + { + [VectorType(4)] + public float[] Features { get; set; } + + public string Bin { get; set; } + } + } +} diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index cfec3f878a..686f6130eb 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -48,7 +48,7 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// /// /// /// /// @@ -68,6 +68,13 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo /// List of Output and Input column pairs. /// Maximum number of examples used to train the normalizer. /// Whether to map zero to zero, preserving sparsity. + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, InputOutputColumnPair[] columns, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched) => @@ -84,6 +91,13 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo /// Maximum number of examples used to train the normalizer. /// Whether to map zero to zero, preserving sparsity. /// Whether to use CDF as the output. + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, @@ -102,6 +116,13 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog /// Maximum number of examples used to train the normalizer. /// Whether to map zero to zero, preserving sparsity. /// Whether to use CDF as the output. + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, @@ -118,6 +139,13 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog /// Name of the column to transform. If set to , the value of the will be used as source. /// Maximum number of examples used to train the normalizer. /// Whether to use CDF as the output. + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, @@ -137,7 +165,7 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal /// /// /// /// /// @@ -157,6 +185,13 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal /// Maximum number of examples used to train the normalizer. /// Whether to map zero to zero, preserving sparsity. /// Maximum number of bins (power of 2 recommended). + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, @@ -175,6 +210,13 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal /// Maximum number of examples used to train the normalizer. /// Whether to map zero to zero, preserving sparsity. /// Maximum number of bins (power of 2 recommended). + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog, InputOutputColumnPair[] columns, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, @@ -194,6 +236,13 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal /// Whether to map zero to zero, preserving sparsity. /// Maximum number of bins (power of 2 recommended). /// Minimum number of examples per bin. + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, string labelColumnName = DefaultColumnNames.Label, @@ -216,6 +265,13 @@ public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCat /// Whether to map zero to zero, preserving sparsity. /// Maximum number of bins (power of 2 recommended). /// Minimum number of examples per bin. + /// + /// + /// + /// + /// public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog, InputOutputColumnPair[] columns, string labelColumnName = DefaultColumnNames.Label, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, From aaec4798a1b0997298fcfe0508d140df82352a9d Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 9 Apr 2019 09:06:18 -0700 Subject: [PATCH 2/9] address some comments --- .../Dynamic/Normalizer.cs | 90 ------------------- .../Dynamic/Transforms/NormalizeBinning.cs | 24 ++--- .../Transforms/NormalizeLogMeanVariance.cs | 16 ++-- .../Transforms/NormalizeMeanVariance.cs | 12 ++- .../Dynamic/Transforms/NormalizeMinMax.cs | 11 ++- .../Transforms/NormalizeSupervisedBinning.cs | 24 ++--- 6 files changed, 53 insertions(+), 124 deletions(-) delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs deleted file mode 100644 index cf94245aba..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs +++ /dev/null @@ -1,90 +0,0 @@ -using System; -using System.Collections.Generic; -using Microsoft.ML.Data; - -namespace Microsoft.ML.Samples.Dynamic -{ - public static class NormalizerTransform - { - public static void Example() - { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. - var ml = new MLContext(); - - // Get a small dataset as an IEnumerable and convert it to an IDataView. - IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); - var trainData = ml.Data.LoadFromEnumerable(data); - - // Preview of the data. - // - // Age Case Education Induced Parity PooledStratum RowNum ... - // 26 1 0-5yrs 1 6 3 1 ... - // 42 1 0-5yrs 1 1 1 2 ... - // 39 1 0-5yrs 2 6 4 3 ... - // 34 1 0-5yrs 2 4 2 4 ... - // 35 1 6-11yrs 1 3 32 5 ... - - // A pipeline for normalizing the Induced column. - var pipeline = ml.Transforms.NormalizeMinMax("Induced"); - // The transformed (normalized according to Normalizer.NormalizerMode.MinMax) data. - var transformer = pipeline.Fit(trainData); - - // Normalize the data. - var transformedData = transformer.Transform(trainData); - - // Getting the data of the newly created column, so we can preview it. - var normalizedColumn = transformedData.GetColumn(transformedData.Schema["Induced"]); - - // A small printing utility. - Action> printHelper = (colName, column) => - { - Console.WriteLine($"{colName} column obtained post-transformation."); - foreach (var row in column) - Console.WriteLine($"{row} "); - }; - - printHelper("Induced", normalizedColumn); - - // Induced column obtained post-transformation. - // - // 0.5 - // 0.5 - // 1 - // 1 - // 0.5 - - // Composing a different pipeline if we wanted to normalize more than one column at a time. - // Using log scale as the normalization mode. - var multiColPipeline = ml.Transforms.NormalizeLogMeanVariance(new[] { new InputOutputColumnPair("LogInduced", "Induced"), new InputOutputColumnPair("LogSpontaneous", "Spontaneous") }); - - // The transformed data. - var multiColtransformer = multiColPipeline.Fit(trainData); - var multiColtransformedData = multiColtransformer.Transform(trainData); - - // Getting the newly created columns. - var normalizedInduced = multiColtransformedData.GetColumn(multiColtransformedData.Schema["LogInduced"]); - var normalizedSpont = multiColtransformedData.GetColumn(multiColtransformedData.Schema["LogSpontaneous"]); - - printHelper("LogInduced", normalizedInduced); - - // LogInduced column obtained post-transformation. - // - // 0.2071445 - // 0.2071445 - // 0.889631 - // 0.889631 - // 0.2071445 - - printHelper("LogSpontaneous", normalizedSpont); - - // LogSpontaneous column obtained post-transformation. - // - // 0.8413026 - // 0 - // 0 - // 0 - // 0.1586974 - } - } -} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs index 1e4d1ea89c..a6c56c2d48 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs @@ -41,6 +41,7 @@ public static void Example() var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: // 1.0000, 0.6667, 1.0000, 0.0000 // 0.6667, 1.0000, 0.6667, 0.0000 // 0.3333, 0.3333, 0.3333, 0.0000 @@ -49,28 +50,31 @@ public static void Example() var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); foreach (var row in columnFixZero) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: // 1.0000, 0.3333, 1.0000, 0.0000 // 0.6667, 0.6667, 0.6667, 0.0000 // 0.3333, 0.0000, 0.3333, 0.0000 // 0.0000, -0.3333, 0.0000, 1.0000 - // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for function. - // If case if we have multiple column transformations we need to pass index of InputOutputColumnPair. + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. + // If we have multiple column transformations we need to pass index of InputOutputColumnPair. var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); Console.WriteLine($"Values for slot 0 would be transfromed by applying y = (Index(x) / {transformParams.Density[0]}) - {(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0])}"); - Console.WriteLine("Where Index(x) is index of bin to which x belongs"); - Console.WriteLine($"Bins upper borders are: {string.Join(" ", transformParams.UpperBounds[0])}"); + Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); + Console.WriteLine($"Bins upper bounds are: {string.Join(" ", transformParams.UpperBounds[0])}"); + // Expected output: // Values for slot 0 would be transfromed by applying y = (Index(x) / 3) - 0 - // Where Index(x) is index of bin to which x belongs - // Bins upper borders are: 3 5 7 ∞ + // Where Index(x) is the index of the bin to which x belongs + // Bins upper bounds are: 3 5 7 ∞ var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); - Console.WriteLine("Where Index(x) is index of bin to which x belongs"); - Console.WriteLine($"Bins upper borders are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); + Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); + Console.WriteLine($"Bins upper bounds are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); + // Expected output: // Values for slot 1 would be transfromed by applying y = (Index(x) / 3) - 0.3333333 - // Where Index(x) is index of bin to which x belongs - // Bins upper borders are: -0.5 0.5 1.5 ∞ + // Where Index(x) is the index of the bin to which x belongs + // Bins upper bounds are: -0.5 0.5 1.5 ∞ } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs index 1274568c18..9d8ef03cf4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs @@ -39,6 +39,7 @@ public static void Example() var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: // 0.1587, 0.1587, 0.8654, 0.0000 // 0.8413, 0.8413, 0.5837, 0.0000 // 0.0000, 0.0000, 0.0940, 0.0000 @@ -47,21 +48,24 @@ public static void Example() var columnFixZero = noCdfData.GetColumn("Features").ToArray(); foreach (var row in columnFixZero) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: // 1.8854, 1.8854, 5.2970, 0.0000 // 4.7708, 4.7708, 3.0925, 0.0000 //-1.0000,-1.0000, 0.8879, 0.0000 // 3.8854,-3.8854,-3.5213, 0.0000 - // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for function. - // If case if we have multiple column transformations we need to pass index of InputOutputColumnPair. + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. + // If we have multiple column transformations we need to pass index of InputOutputColumnPair. var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.CdfNormalizerModelParameters>); - Console.WriteLine($"Values for slot 1 would be transfromed by applying y= 0.5* (1 + ERF((Math.Log(x)- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))" ); + Console.WriteLine($"Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((Math.Log(x)- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))" ); // ERF is https://en.wikipedia.org/wiki/Error_function. - // Values for slot 1 would be transfromed by applying y= 0.5* (1 + ERF((Math.Log(x)- 0.3465736) / (0.3465736 * sqrt(2))) + // Expected output: + // Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((Math.Log(x)- 0.3465736) / (0.3465736 * sqrt(2))) var noCdfParams = (normalizeNoCdfTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.AffineNormalizerModelParameters>); - Console.WriteLine($"Values for slot 1 would be transfromed by applying y= (x - ({(noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1])})) * {noCdfParams.Scale[1]}"); - // Values for slot 1 would be transfromed by applying y= (x - (2.88539)) * 0.3465736 + Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({(noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1])})) * {noCdfParams.Scale[1]}"); + // Expected output: + // Values for slot 1 would be transfromed by applying y = (x - (2.88539)) * 0.3465736 } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs index 6b67e3d678..962a489feb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs @@ -39,6 +39,7 @@ public static void Example() var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: // 0.6726, 0.6726, 0.8816, 0.2819 // 0.9101, 0.9101, 0.6939, 0.2819 // 0.3274, 0.3274, 0.4329, 0.2819 @@ -48,20 +49,23 @@ public static void Example() var columnFixZero = noCdfData.GetColumn("Features").ToArray(); foreach (var row in columnFixZero) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: // 0.8165, 0.8165, 1.5492, 0.0000 // 1.6330, 1.6330, 1.0328, 0.0000 // 0.0000, 0.0000, 0.5164, 0.0000 //-0.8165,-0.8165,-0.5164, 2.0000 - // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for function. - // If case if we have multiple column transformations we need to pass index of InputOutputColumnPair. + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. + // If we have multiple column transformations we need to pass index of InputOutputColumnPair. var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.CdfNormalizerModelParameters>); - Console.WriteLine($"Values for slot 1 would be transfromed by applying y= 0.5* (1 + ERF((x- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); + Console.WriteLine($"Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((x- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); // ERF is https://en.wikipedia.org/wiki/Error_function. + // Expected output: // Values for slot 1 would be transfromed by applying y = 0.5 * (1 + ERF((x - 0.5) / (1.118034 * sqrt(2))) var noCdfParams = (normalizeNoCdfTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.AffineNormalizerModelParameters>); - Console.WriteLine($"Values for slot 1 would be transfromed by applying y= (x - ({(noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1])})) * {noCdfParams.Scale[1]}"); + Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({(noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1])})) * {noCdfParams.Scale[1]}"); + // Expected output: // Values for slot 1 would be transfromed by applying y = (x - (0)) * 0.8164966 } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs index 80d3bd60e3..150309e6a4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs @@ -41,6 +41,7 @@ public static void Example() var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: // 0.6667, 0.6667, 1.0000, 0.0000 // 1.0000, 1.0000, 0.7500, 0.0000 // 0.3333, 0.3333, 0.5000, 0.0000 @@ -49,16 +50,18 @@ public static void Example() var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); foreach (var row in columnFixZero) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: // 0.5000, 0.5000, 1.0000, 0.0000 // 1.0000, 1.0000, 0.6667, 0.0000 // 0.0000, 0.0000, 0.3333, 0.0000 //-0.5000,-0.5000,-0.3333, 1.0000 - // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for function. - // If case if we have multiple column transformations we need to pass index of InputOutputColumnPair. + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. + // If we have multiple column transformations we need to pass index of InputOutputColumnPair. var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.AffineNormalizerModelParameters>); - Console.WriteLine($"Values for slot 1 would be transfromed by applying y= (x - ({(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[1])})) * {transformParams.Scale[1]}"); - // Values for slot 1 would be transfromed by applying y= (x - (-1)) * 0.3333333 + Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[1])})) * {transformParams.Scale[1]}"); + // Expected output: + // Values for slot 1 would be transfromed by applying y = (x - (-1)) * 0.3333333 } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs index 0ec2549f1b..c2c400d415 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs @@ -44,6 +44,7 @@ public static void Example() var column = transformedData.GetColumn("Features").ToArray(); foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: // 1.0000, 0.5000, 1.0000, 0.0000 // 0.5000, 1.0000, 0.0000, 0.5000 // 0.5000, 1.0000, 0.0000, 0.5000 @@ -53,29 +54,32 @@ public static void Example() var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); foreach (var row in columnFixZero) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: // 1.0000, 0.0000, 1.0000, 0.0000 // 0.5000, 0.5000, 0.0000, 0.5000 // 0.5000, 0.5000, 0.0000, 0.5000 - // 0.0000, -0.5000, 0.0000, 1.0000 - // 0.0000, -0.5000, 0.0000, 1.0000 + // 0.0000,-0.5000, 0.0000, 1.0000 + // 0.0000,-0.5000, 0.0000, 1.0000 - // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for function. - // If case if we have multiple column transformations we need to pass index of InputOutputColumnPair. + // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. + // If we have multiple column transformations we need to pass index of InputOutputColumnPair. var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); Console.WriteLine($"Values for slot 0 would be transfromed by applying y = (Index(x) / {transformParams.Density[0]}) - {(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0])}"); - Console.WriteLine("Where Index(x) is index of bin to which x belongs"); + Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper borders are: {string.Join(" ", transformParams.UpperBounds[0])}"); + // Expected output: // Values for slot 0 would be transfromed by applying y = (Index(x) / 2) - 0 - // Where Index(x) is index of bin to which x belongs - // Bins upper borders are: 4.5 7 ∞ + // Where Index(x) is the index of the bin to which x belongs + // Bins upper bounds are: 4.5 7 ∞ var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); - Console.WriteLine("Where Index(x) is index of bin to which x belongs"); + Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper borders are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); + // Expected output: // Values for slot 1 would be transfromed by applying y = (Index(x) / 2) - 0.5 - // Where Index(x) is index of bin to which x belongs - // Bins upper borders are: -2 1.5 ∞ + // Where Index(x) is the index of the bin to which x belongs + // Bins upper bounds are: -2 1.5 ∞ } private class DataPoint From 611172a993b345ceb041f10ea213d981a6924d99 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 9 Apr 2019 15:59:02 -0700 Subject: [PATCH 3/9] Expected output shited by one space --- .../Dynamic/Transforms/NormalizeBinning.cs | 28 ++++++++-------- .../Transforms/NormalizeLogMeanVariance.cs | 20 ++++++------ .../Transforms/NormalizeMeanVariance.cs | 20 ++++++------ .../Dynamic/Transforms/NormalizeMinMax.cs | 18 +++++------ .../Transforms/NormalizeSupervisedBinning.cs | 32 +++++++++---------- 5 files changed, 59 insertions(+), 59 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs index a6c56c2d48..0dd0db7a2d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs @@ -42,19 +42,19 @@ public static void Example() foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - // 1.0000, 0.6667, 1.0000, 0.0000 - // 0.6667, 1.0000, 0.6667, 0.0000 - // 0.3333, 0.3333, 0.3333, 0.0000 - // 0.0000, 0.0000, 0.0000, 1.0000 + // 1.0000, 0.6667, 1.0000, 0.0000 + // 0.6667, 1.0000, 0.6667, 0.0000 + // 0.3333, 0.3333, 0.3333, 0.0000 + // 0.0000, 0.0000, 0.0000, 1.0000 var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); foreach (var row in columnFixZero) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - // 1.0000, 0.3333, 1.0000, 0.0000 - // 0.6667, 0.6667, 0.6667, 0.0000 - // 0.3333, 0.0000, 0.3333, 0.0000 - // 0.0000, -0.3333, 0.0000, 1.0000 + // 1.0000, 0.3333, 1.0000, 0.0000 + // 0.6667, 0.6667, 0.6667, 0.0000 + // 0.3333, 0.0000, 0.3333, 0.0000 + // 0.0000, -0.3333, 0.0000, 1.0000 // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. @@ -63,18 +63,18 @@ public static void Example() Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper bounds are: {string.Join(" ", transformParams.UpperBounds[0])}"); // Expected output: - // Values for slot 0 would be transfromed by applying y = (Index(x) / 3) - 0 - // Where Index(x) is the index of the bin to which x belongs - // Bins upper bounds are: 3 5 7 ∞ + // Values for slot 0 would be transfromed by applying y = (Index(x) / 3) - 0 + // Where Index(x) is the index of the bin to which x belongs + // Bins upper bounds are: 3 5 7 ∞ var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper bounds are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); // Expected output: - // Values for slot 1 would be transfromed by applying y = (Index(x) / 3) - 0.3333333 - // Where Index(x) is the index of the bin to which x belongs - // Bins upper bounds are: -0.5 0.5 1.5 ∞ + // Values for slot 1 would be transfromed by applying y = (Index(x) / 3) - 0.3333333 + // Where Index(x) is the index of the bin to which x belongs + // Bins upper bounds are: -0.5 0.5 1.5 ∞ } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs index 9d8ef03cf4..053f7663c3 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs @@ -40,19 +40,19 @@ public static void Example() foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - // 0.1587, 0.1587, 0.8654, 0.0000 - // 0.8413, 0.8413, 0.5837, 0.0000 - // 0.0000, 0.0000, 0.0940, 0.0000 - // 0.0000, 0.0000, 0.0000, 0.0000 + // 0.1587, 0.1587, 0.8654, 0.0000 + // 0.8413, 0.8413, 0.5837, 0.0000 + // 0.0000, 0.0000, 0.0940, 0.0000 + // 0.0000, 0.0000, 0.0000, 0.0000 var columnFixZero = noCdfData.GetColumn("Features").ToArray(); foreach (var row in columnFixZero) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - // 1.8854, 1.8854, 5.2970, 0.0000 - // 4.7708, 4.7708, 3.0925, 0.0000 - //-1.0000,-1.0000, 0.8879, 0.0000 - // 3.8854,-3.8854,-3.5213, 0.0000 + // 1.8854, 1.8854, 5.2970, 0.0000 + // 4.7708, 4.7708, 3.0925, 0.0000 + // -1.0000,-1.0000, 0.8879, 0.0000 + // 3.8854,-3.8854,-3.5213, 0.0000 // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. @@ -60,12 +60,12 @@ public static void Example() Console.WriteLine($"Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((Math.Log(x)- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))" ); // ERF is https://en.wikipedia.org/wiki/Error_function. // Expected output: - // Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((Math.Log(x)- 0.3465736) / (0.3465736 * sqrt(2))) + // Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((Math.Log(x)- 0.3465736) / (0.3465736 * sqrt(2))) var noCdfParams = (normalizeNoCdfTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.AffineNormalizerModelParameters>); Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({(noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1])})) * {noCdfParams.Scale[1]}"); // Expected output: - // Values for slot 1 would be transfromed by applying y = (x - (2.88539)) * 0.3465736 + // Values for slot 1 would be transfromed by applying y = (x - (2.88539)) * 0.3465736 } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs index 962a489feb..485983dc95 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs @@ -40,20 +40,20 @@ public static void Example() foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - // 0.6726, 0.6726, 0.8816, 0.2819 - // 0.9101, 0.9101, 0.6939, 0.2819 - // 0.3274, 0.3274, 0.4329, 0.2819 - // 0.0899, 0.0899, 0.0641, 0.9584 + // 0.6726, 0.6726, 0.8816, 0.2819 + // 0.9101, 0.9101, 0.6939, 0.2819 + // 0.3274, 0.3274, 0.4329, 0.2819 + // 0.0899, 0.0899, 0.0641, 0.9584 var columnFixZero = noCdfData.GetColumn("Features").ToArray(); foreach (var row in columnFixZero) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - // 0.8165, 0.8165, 1.5492, 0.0000 - // 1.6330, 1.6330, 1.0328, 0.0000 - // 0.0000, 0.0000, 0.5164, 0.0000 - //-0.8165,-0.8165,-0.5164, 2.0000 + // 0.8165, 0.8165, 1.5492, 0.0000 + // 1.6330, 1.6330, 1.0328, 0.0000 + // 0.0000, 0.0000, 0.5164, 0.0000 + // -0.8165,-0.8165,-0.5164, 2.0000 // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. @@ -61,12 +61,12 @@ public static void Example() Console.WriteLine($"Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((x- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); // ERF is https://en.wikipedia.org/wiki/Error_function. // Expected output: - // Values for slot 1 would be transfromed by applying y = 0.5 * (1 + ERF((x - 0.5) / (1.118034 * sqrt(2))) + // Values for slot 1 would be transfromed by applying y = 0.5 * (1 + ERF((x - 0.5) / (1.118034 * sqrt(2))) var noCdfParams = (normalizeNoCdfTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.AffineNormalizerModelParameters>); Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({(noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1])})) * {noCdfParams.Scale[1]}"); // Expected output: - // Values for slot 1 would be transfromed by applying y = (x - (0)) * 0.8164966 + // Values for slot 1 would be transfromed by applying y = (x - (0)) * 0.8164966 } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs index 150309e6a4..c8da40f4ba 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs @@ -42,26 +42,26 @@ public static void Example() foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - // 0.6667, 0.6667, 1.0000, 0.0000 - // 1.0000, 1.0000, 0.7500, 0.0000 - // 0.3333, 0.3333, 0.5000, 0.0000 - // 0.0000, 0.0000, 0.0000, 1.0000 + // 0.6667, 0.6667, 1.0000, 0.0000 + // 1.0000, 1.0000, 0.7500, 0.0000 + // 0.3333, 0.3333, 0.5000, 0.0000 + // 0.0000, 0.0000, 0.0000, 1.0000 var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); foreach (var row in columnFixZero) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - // 0.5000, 0.5000, 1.0000, 0.0000 - // 1.0000, 1.0000, 0.6667, 0.0000 - // 0.0000, 0.0000, 0.3333, 0.0000 - //-0.5000,-0.5000,-0.3333, 1.0000 + // 0.5000, 0.5000, 1.0000, 0.0000 + // 1.0000, 1.0000, 0.6667, 0.0000 + // 0.0000, 0.0000, 0.3333, 0.0000 + // -0.5000,-0.5000,-0.3333, 1.0000 // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.AffineNormalizerModelParameters>); Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[1])})) * {transformParams.Scale[1]}"); // Expected output: - // Values for slot 1 would be transfromed by applying y = (x - (-1)) * 0.3333333 + // Values for slot 1 would be transfromed by applying y = (x - (-1)) * 0.3333333 } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs index c2c400d415..8dec4ca09f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs @@ -45,21 +45,21 @@ public static void Example() foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - // 1.0000, 0.5000, 1.0000, 0.0000 - // 0.5000, 1.0000, 0.0000, 0.5000 - // 0.5000, 1.0000, 0.0000, 0.5000 - // 0.0000, 0.0000, 0.0000, 1.0000 - // 0.0000, 0.0000, 0.0000, 1.0000 + // 1.0000, 0.5000, 1.0000, 0.0000 + // 0.5000, 1.0000, 0.0000, 0.5000 + // 0.5000, 1.0000, 0.0000, 0.5000 + // 0.0000, 0.0000, 0.0000, 1.0000 + // 0.0000, 0.0000, 0.0000, 1.0000 var columnFixZero = fixZeroData.GetColumn("Features").ToArray(); foreach (var row in columnFixZero) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - // 1.0000, 0.0000, 1.0000, 0.0000 - // 0.5000, 0.5000, 0.0000, 0.5000 - // 0.5000, 0.5000, 0.0000, 0.5000 - // 0.0000,-0.5000, 0.0000, 1.0000 - // 0.0000,-0.5000, 0.0000, 1.0000 + // 1.0000, 0.0000, 1.0000, 0.0000 + // 0.5000, 0.5000, 0.0000, 0.5000 + // 0.5000, 0.5000, 0.0000, 0.5000 + // 0.0000,-0.5000, 0.0000, 1.0000 + // 0.0000,-0.5000, 0.0000, 1.0000 // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. @@ -68,18 +68,18 @@ public static void Example() Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper borders are: {string.Join(" ", transformParams.UpperBounds[0])}"); // Expected output: - // Values for slot 0 would be transfromed by applying y = (Index(x) / 2) - 0 - // Where Index(x) is the index of the bin to which x belongs - // Bins upper bounds are: 4.5 7 ∞ + // Values for slot 0 would be transfromed by applying y = (Index(x) / 2) - 0 + // Where Index(x) is the index of the bin to which x belongs + // Bins upper bounds are: 4.5 7 ∞ var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper borders are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); // Expected output: - // Values for slot 1 would be transfromed by applying y = (Index(x) / 2) - 0.5 - // Where Index(x) is the index of the bin to which x belongs - // Bins upper bounds are: -2 1.5 ∞ + // Values for slot 1 would be transfromed by applying y = (Index(x) / 2) - 0.5 + // Where Index(x) is the index of the bin to which x belongs + // Bins upper bounds are: -2 1.5 ∞ } private class DataPoint From 9cffe3a97449b6b56b92e93a060afd8349ccebac Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 9 Apr 2019 16:05:11 -0700 Subject: [PATCH 4/9] using static Microsoft.ML.Transforms.NormalizingTransformer --- .../Dynamic/Transforms/NormalizeBinning.cs | 5 +++-- .../Dynamic/Transforms/NormalizeLogMeanVariance.cs | 5 +++-- .../Dynamic/Transforms/NormalizeMeanVariance.cs | 5 +++-- .../Dynamic/Transforms/NormalizeMinMax.cs | 3 ++- .../Dynamic/Transforms/NormalizeSupervisedBinning.cs | 5 +++-- docs/samples/Microsoft.ML.Samples/Program.cs | 4 ++-- 6 files changed, 16 insertions(+), 11 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs index 0dd0db7a2d..e212193bff 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs @@ -4,6 +4,7 @@ using System.Linq; using Microsoft.ML; using Microsoft.ML.Data; +using static Microsoft.ML.Transforms.NormalizingTransformer; namespace Samples.Dynamic { @@ -58,7 +59,7 @@ public static void Example() // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. - var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; Console.WriteLine($"Values for slot 0 would be transfromed by applying y = (Index(x) / {transformParams.Density[0]}) - {(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0])}"); Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper bounds are: {string.Join(" ", transformParams.UpperBounds[0])}"); @@ -67,7 +68,7 @@ public static void Example() // Where Index(x) is the index of the bin to which x belongs // Bins upper bounds are: 3 5 7 ∞ - var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); + var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>); Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper bounds are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs index 053f7663c3..d8606e30b2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs @@ -4,6 +4,7 @@ using System.Linq; using Microsoft.ML; using Microsoft.ML.Data; +using static Microsoft.ML.Transforms.NormalizingTransformer; namespace Samples.Dynamic { @@ -56,13 +57,13 @@ public static void Example() // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. - var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.CdfNormalizerModelParameters>); + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as CdfNormalizerModelParameters>; Console.WriteLine($"Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((Math.Log(x)- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))" ); // ERF is https://en.wikipedia.org/wiki/Error_function. // Expected output: // Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((Math.Log(x)- 0.3465736) / (0.3465736 * sqrt(2))) - var noCdfParams = (normalizeNoCdfTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.AffineNormalizerModelParameters>); + var noCdfParams = normalizeNoCdfTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({(noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1])})) * {noCdfParams.Scale[1]}"); // Expected output: // Values for slot 1 would be transfromed by applying y = (x - (2.88539)) * 0.3465736 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs index 485983dc95..4f9a63ed25 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs @@ -4,6 +4,7 @@ using System.Linq; using Microsoft.ML; using Microsoft.ML.Data; +using static Microsoft.ML.Transforms.NormalizingTransformer; namespace Samples.Dynamic { @@ -57,13 +58,13 @@ public static void Example() // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. - var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.CdfNormalizerModelParameters>); + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as CdfNormalizerModelParameters>; Console.WriteLine($"Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((x- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); // ERF is https://en.wikipedia.org/wiki/Error_function. // Expected output: // Values for slot 1 would be transfromed by applying y = 0.5 * (1 + ERF((x - 0.5) / (1.118034 * sqrt(2))) - var noCdfParams = (normalizeNoCdfTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.AffineNormalizerModelParameters>); + var noCdfParams = normalizeNoCdfTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({(noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1])})) * {noCdfParams.Scale[1]}"); // Expected output: // Values for slot 1 would be transfromed by applying y = (x - (0)) * 0.8164966 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs index c8da40f4ba..38d78f7811 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs @@ -4,6 +4,7 @@ using System.Linq; using Microsoft.ML; using Microsoft.ML.Data; +using static Microsoft.ML.Transforms.NormalizingTransformer; namespace Samples.Dynamic { @@ -58,7 +59,7 @@ public static void Example() // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. - var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.AffineNormalizerModelParameters>); + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[1])})) * {transformParams.Scale[1]}"); // Expected output: // Values for slot 1 would be transfromed by applying y = (x - (-1)) * 0.3333333 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs index 8dec4ca09f..d4424a76bd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs @@ -4,6 +4,7 @@ using System.Linq; using Microsoft.ML; using Microsoft.ML.Data; +using static Microsoft.ML.Transforms.NormalizingTransformer; namespace Samples.Dynamic { @@ -63,7 +64,7 @@ public static void Example() // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. - var transformParams = (normalizeTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); + var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; Console.WriteLine($"Values for slot 0 would be transfromed by applying y = (Index(x) / {transformParams.Density[0]}) - {(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0])}"); Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper borders are: {string.Join(" ", transformParams.UpperBounds[0])}"); @@ -72,7 +73,7 @@ public static void Example() // Where Index(x) is the index of the bin to which x belongs // Bins upper bounds are: 4.5 7 ∞ - var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as Microsoft.ML.Transforms.NormalizingTransformer.BinNormalizerModelParameters>); + var fixZeroParams = normalizeFixZeroTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper borders are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index ef67739045..6f5a431802 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -1,4 +1,4 @@ -using Microsoft.ML.Samples.Dynamic; +using Samples.Dynamic; namespace Microsoft.ML.Samples { @@ -6,7 +6,7 @@ internal static class Program { static void Main(string[] args) { - ReplaceMissingValues.Example(); + NormalizeSupervisedBinning.Example(); } } } From 5d2da69a3447b2730aa12aadf3f22f44c6bc8b93 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 9 Apr 2019 16:06:02 -0700 Subject: [PATCH 5/9] revert program.cs --- docs/samples/Microsoft.ML.Samples/Program.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index 6f5a431802..e63964533b 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -6,7 +6,7 @@ internal static class Program { static void Main(string[] args) { - NormalizeSupervisedBinning.Example(); + ReplaceMissingValues.Example(); } } -} +} \ No newline at end of file From 215a4ab60db381bb7030bce7a86604701cea10c9 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 9 Apr 2019 16:08:17 -0700 Subject: [PATCH 6/9] revert one more time --- docs/samples/Microsoft.ML.Samples/Program.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index e63964533b..2456660c91 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -1,4 +1,5 @@ -using Samples.Dynamic; +using Microsoft.ML.Samples.Dynamic; +using Samples.Dynamic; namespace Microsoft.ML.Samples { From 66fe40a1e96db945c97b6c77ae034602f9fa856d Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 9 Apr 2019 16:11:24 -0700 Subject: [PATCH 7/9] one more time --- docs/samples/Microsoft.ML.Samples/Program.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index 2456660c91..ef67739045 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -1,5 +1,4 @@ using Microsoft.ML.Samples.Dynamic; -using Samples.Dynamic; namespace Microsoft.ML.Samples { @@ -10,4 +9,4 @@ static void Main(string[] args) ReplaceMissingValues.Example(); } } -} \ No newline at end of file +} From ed811fc96a8967d03fcbabbcc7444859e71b40f2 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Thu, 11 Apr 2019 13:42:30 -0700 Subject: [PATCH 8/9] Address comments --- .../Dynamic/Transforms/NormalizeBinning.cs | 12 ++++-- .../Transforms/NormalizeLogMeanVariance.cs | 14 ++++--- .../Transforms/NormalizeMeanVariance.cs | 12 ++++-- .../Dynamic/Transforms/NormalizeMinMax.cs | 6 ++- .../Transforms/NormalizeSupervisedBinning.cs | 12 ++++-- .../NormalizerCatalog.cs | 42 ------------------- 6 files changed, 37 insertions(+), 61 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs index e212193bff..7931fe6496 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs @@ -60,20 +60,24 @@ public static void Example() // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; - Console.WriteLine($"Values for slot 0 would be transfromed by applying y = (Index(x) / {transformParams.Density[0]}) - {(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0])}"); + var density = transformParams.Density[0]; + var offset = (transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0]); + Console.WriteLine($"The 0-index value in resulting array would be produce by: y = (Index(x) / {density}) - {offset}"); Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper bounds are: {string.Join(" ", transformParams.UpperBounds[0])}"); // Expected output: - // Values for slot 0 would be transfromed by applying y = (Index(x) / 3) - 0 + // The 0-index value in resulting array would be produce by: y = (Index(x) / 3) - 0 // Where Index(x) is the index of the bin to which x belongs // Bins upper bounds are: 3 5 7 ∞ var fixZeroParams = (normalizeFixZeroTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>); - Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); + density = fixZeroParams.Density[1]; + offset = (fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1]); + Console.WriteLine($"The 0-index value in resulting array would be produce by: y = (Index(x) / {density}) - {offset}"); Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper bounds are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); // Expected output: - // Values for slot 1 would be transfromed by applying y = (Index(x) / 3) - 0.3333333 + // The 0-index value in resulting array would be produce by: y = (Index(x) / 3) - 0.3333333 // Where Index(x) is the index of the bin to which x belongs // Bins upper bounds are: -0.5 0.5 1.5 ∞ } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs index d8606e30b2..454335d858 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs @@ -58,15 +58,19 @@ public static void Example() // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as CdfNormalizerModelParameters>; - Console.WriteLine($"Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((Math.Log(x)- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))" ); + Console.WriteLine("The 1-index value in resulting array would be produce by:"); + Console.WriteLine($"y = 0.5* (1 + ERF((Math.Log(x)- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); + // ERF is https://en.wikipedia.org/wiki/Error_function. // Expected output: - // Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((Math.Log(x)- 0.3465736) / (0.3465736 * sqrt(2))) - + // The 1-index value in resulting array would be produce by: + // y = 0.5* (1 + ERF((Math.Log(x)- 0.3465736) / (0.3465736 * sqrt(2))) var noCdfParams = normalizeNoCdfTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; - Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({(noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1])})) * {noCdfParams.Scale[1]}"); + var offset = noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1]; + var scale = noCdfParams.Scale[1]; + Console.WriteLine($"The 1-index value in resulting array would be produce by: y = (x - ({offset})) * {scale}"); // Expected output: - // Values for slot 1 would be transfromed by applying y = (x - (2.88539)) * 0.3465736 + // The 1-index value in resulting array would be produce by: y = (x - (2.88539)) * 0.3465736 } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs index 4f9a63ed25..6c198cc38c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs @@ -59,15 +59,19 @@ public static void Example() // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as CdfNormalizerModelParameters>; - Console.WriteLine($"Values for slot 1 would be transfromed by applying y = 0.5* (1 + ERF((x- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); + Console.WriteLine($"The 1-index value in resulting array would be produce by:"); + Console.WriteLine($" y = 0.5* (1 + ERF((x- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); // ERF is https://en.wikipedia.org/wiki/Error_function. // Expected output: - // Values for slot 1 would be transfromed by applying y = 0.5 * (1 + ERF((x - 0.5) / (1.118034 * sqrt(2))) + // The 1-index value in resulting array would be produce by: + // y = 0.5 * (1 + ERF((x - 0.5) / (1.118034 * sqrt(2))) var noCdfParams = normalizeNoCdfTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; - Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({(noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1])})) * {noCdfParams.Scale[1]}"); + var offset = noCdfParams.Offset.Length == 0 ? 0 : noCdfParams.Offset[1]; + var scale = noCdfParams.Scale[1]; + Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({offset})) * {scale}"); // Expected output: - // Values for slot 1 would be transfromed by applying y = (x - (0)) * 0.8164966 + // The 1-index value in resulting array would be produce by: y = (x - (0)) * 0.8164966 } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs index 38d78f7811..9dae0304a2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs @@ -60,9 +60,11 @@ public static void Example() // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; - Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (x - ({(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[1])})) * {transformParams.Scale[1]}"); + Console.WriteLine($"The 1-index value in resulting array would be produce by:"); + Console.WriteLine($" y = (x - ({(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[1])})) * {transformParams.Scale[1]}"); // Expected output: - // Values for slot 1 would be transfromed by applying y = (x - (-1)) * 0.3333333 + // The 1-index value in resulting array would be produce by: + // y = (x - (-1)) * 0.3333333 } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs index d4424a76bd..55fa9236ec 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs @@ -65,20 +65,24 @@ public static void Example() // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. // If we have multiple column transformations we need to pass index of InputOutputColumnPair. var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; - Console.WriteLine($"Values for slot 0 would be transfromed by applying y = (Index(x) / {transformParams.Density[0]}) - {(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0])}"); + Console.WriteLine($"The 1-index value in resulting array would be produce by:"); + Console.WriteLine($"y = (Index(x) / {transformParams.Density[0]}) - {(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0])}"); Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper borders are: {string.Join(" ", transformParams.UpperBounds[0])}"); // Expected output: - // Values for slot 0 would be transfromed by applying y = (Index(x) / 2) - 0 + // The 1-index value in resulting array would be produce by: + // y = (Index(x) / 2) - 0 // Where Index(x) is the index of the bin to which x belongs // Bins upper bounds are: 4.5 7 ∞ var fixZeroParams = normalizeFixZeroTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; - Console.WriteLine($"Values for slot 1 would be transfromed by applying y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); + Console.WriteLine($"The 1-index value in resulting array would be produce by:"); + Console.WriteLine($" y = (Index(x) / {fixZeroParams.Density[1]}) - {(fixZeroParams.Offset.Length == 0 ? 0 : fixZeroParams.Offset[1])}"); Console.WriteLine("Where Index(x) is the index of the bin to which x belongs"); Console.WriteLine($"Bins upper borders are: {string.Join(" ", fixZeroParams.UpperBounds[1])}"); // Expected output: - // Values for slot 1 would be transfromed by applying y = (Index(x) / 2) - 0.5 + // The 1-index value in resulting array would be produce by: + // y = (Index(x) / 2) - 0.5 // Where Index(x) is the index of the bin to which x belongs // Bins upper bounds are: -2 1.5 ∞ } diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 686f6130eb..a0ad4b2407 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -20,13 +20,6 @@ public static class NormalizationCatalog /// The transform catalog /// The used to map the old values to the new ones. /// The pairs of input and output columns. - /// - /// - /// - /// - /// [BestFriend] internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, NormalizingEstimator.NormalizationMode mode, @@ -68,13 +61,6 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo /// List of Output and Input column pairs. /// Maximum number of examples used to train the normalizer. /// Whether to map zero to zero, preserving sparsity. - /// - /// - /// - /// - /// public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, InputOutputColumnPair[] columns, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched) => @@ -116,13 +102,6 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog /// Maximum number of examples used to train the normalizer. /// Whether to map zero to zero, preserving sparsity. /// Whether to use CDF as the output. - /// - /// - /// - /// - /// public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, @@ -162,13 +141,6 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal /// List of Output and Input column pairs. /// Maximum number of examples used to train the normalizer. /// Whether to use CDF as the output. - /// - /// - /// - /// - /// public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf) => @@ -210,13 +182,6 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal /// Maximum number of examples used to train the normalizer. /// Whether to map zero to zero, preserving sparsity. /// Maximum number of bins (power of 2 recommended). - /// - /// - /// - /// - /// public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog, InputOutputColumnPair[] columns, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, @@ -265,13 +230,6 @@ public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCat /// Whether to map zero to zero, preserving sparsity. /// Maximum number of bins (power of 2 recommended). /// Minimum number of examples per bin. - /// - /// - /// - /// - /// public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog, InputOutputColumnPair[] columns, string labelColumnName = DefaultColumnNames.Label, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, From 43ca3e8e85ff0f156a35a040b692692a036fe772 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Thu, 11 Apr 2019 13:45:26 -0700 Subject: [PATCH 9/9] plurals --- .../Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs | 2 +- .../Dynamic/Transforms/NormalizeLogMeanVariance.cs | 2 +- .../Dynamic/Transforms/NormalizeMeanVariance.cs | 2 +- .../Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs | 2 +- .../Dynamic/Transforms/NormalizeSupervisedBinning.cs | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs index 7931fe6496..f6a3270430 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeBinning.cs @@ -58,7 +58,7 @@ public static void Example() // 0.0000, -0.3333, 0.0000, 1.0000 // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. - // If we have multiple column transformations we need to pass index of InputOutputColumnPair. + // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; var density = transformParams.Density[0]; var offset = (transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0]); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs index 454335d858..b577270622 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLogMeanVariance.cs @@ -56,7 +56,7 @@ public static void Example() // 3.8854,-3.8854,-3.5213, 0.0000 // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. - // If we have multiple column transformations we need to pass index of InputOutputColumnPair. + // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as CdfNormalizerModelParameters>; Console.WriteLine("The 1-index value in resulting array would be produce by:"); Console.WriteLine($"y = 0.5* (1 + ERF((Math.Log(x)- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs index 6c198cc38c..ad35d43e6f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMeanVariance.cs @@ -57,7 +57,7 @@ public static void Example() // -0.8165,-0.8165,-0.5164, 2.0000 // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. - // If we have multiple column transformations we need to pass index of InputOutputColumnPair. + // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as CdfNormalizerModelParameters>; Console.WriteLine($"The 1-index value in resulting array would be produce by:"); Console.WriteLine($" y = 0.5* (1 + ERF((x- {transformParams.Mean[1]}) / ({transformParams.StandardDeviation[1]} * sqrt(2)))"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs index 9dae0304a2..7b7a60d74e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeMinMax.cs @@ -58,7 +58,7 @@ public static void Example() // -0.5000,-0.5000,-0.3333, 1.0000 // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. - // If we have multiple column transformations we need to pass index of InputOutputColumnPair. + // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as AffineNormalizerModelParameters>; Console.WriteLine($"The 1-index value in resulting array would be produce by:"); Console.WriteLine($" y = (x - ({(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[1])})) * {transformParams.Scale[1]}"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs index 55fa9236ec..63fde50a9e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeSupervisedBinning.cs @@ -63,7 +63,7 @@ public static void Example() // 0.0000,-0.5000, 0.0000, 1.0000 // Let's get transformation parameters. Since we work with only one column we need to pass 0 as parameter for GetNormalizerModelParameters. - // If we have multiple column transformations we need to pass index of InputOutputColumnPair. + // If we have multiple columns transformations we need to pass index of InputOutputColumnPair. var transformParams = normalizeTransform.GetNormalizerModelParameters(0) as BinNormalizerModelParameters>; Console.WriteLine($"The 1-index value in resulting array would be produce by:"); Console.WriteLine($"y = (Index(x) / {transformParams.Density[0]}) - {(transformParams.Offset.Length == 0 ? 0 : transformParams.Offset[0])}");