revew comments

dotnet · Mar 18, 2019 · 9021a94 · 9021a94
1 parent 5ef42ab
commit 9021a94
Show file tree

Hide file tree

Showing 7 changed files with 22 additions and 22 deletions.
diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md
@@ -376,7 +376,7 @@ var testData = mlContext.Data.LoadFromTextFile<AdultData>(testDataPath,
     separatorChar: ','
 );
 // Calculate metrics of the model on the test data.
-var metrics = mlContext.Regression.Evaluate(model.Transform(testData), label: "Target");
+var metrics = mlContext.Regression.Evaluate(model.Transform(testData), labelColumnName: "Target");
 ```
 
 ## How do I save and load the model?

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs
@@ -14,7 +14,7 @@ public static void Example()
             // Leave out 10% of the dataset for testing. Since this is a ranking problem, we must ensure that the split
             // respects the GroupId column, i.e. rows with the same GroupId are either all in the train split or all in
             // the test split. The samplingKeyColumn parameter in Data.TrainTestSplit is used for this purpose.
-            var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");
+            var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumnName: "GroupId");
 
             // Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
             var pipeline = mlContext.Ranking.Trainers.LightGbm(

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs
@@ -17,7 +17,7 @@ public static void Example()
             // Leave out 10% of the dataset for testing. Since this is a ranking problem, we must ensure that the split
             // respects the GroupId column, i.e. rows with the same GroupId are either all in the train split or all in
             // the test split. The samplingKeyColumn parameter in Data.TrainTestSplit is used for this purpose.
-            var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");
+            var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumnName: "GroupId");
 
             // Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
             var pipeline = mlContext.Ranking.Trainers.LightGbm(

diff --git a/src/Microsoft.ML.Data/DataLoadSave/DataOperationsCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/DataOperationsCatalog.cs
@@ -365,32 +365,32 @@ public IDataView TakeRows(IDataView input, long count)
 
         /// <summary>
         /// Split the dataset into the train set and test set according to the given fraction.
-        /// Respects the <paramref name="samplingKeyColumn"/> if provided.
+        /// Respects the <paramref name="samplingKeyColumnName"/> if provided.
         /// </summary>
         /// <param name="data">The dataset to split.</param>
         /// <param name="testFraction">The fraction of data to go into the test set.</param>
-        /// <param name="samplingKeyColumn">Name of a column to use for grouping rows. If two examples share the same value of the <paramref name="samplingKeyColumn"/>,
+        /// <param name="samplingKeyColumnName">Name of a column to use for grouping rows. If two examples share the same value of the <paramref name="samplingKeyColumnName"/>,
         /// they are guaranteed to appear in the same subset (train or test). This can be used to ensure no label leakage from the train to the test set.
         /// If <see langword="null"/> no row grouping will be performed.</param>
         /// <param name="seed">Seed for the random number generator used to select rows for the train-test split.</param>
-        public TrainTestData TrainTestSplit(IDataView data, double testFraction = 0.1, string samplingKeyColumn = null, int? seed = null)
+        public TrainTestData TrainTestSplit(IDataView data, double testFraction = 0.1, string samplingKeyColumnName = null, int? seed = null)
         {
             _env.CheckValue(data, nameof(data));
             _env.CheckParam(0 < testFraction && testFraction < 1, nameof(testFraction), "Must be between 0 and 1 exclusive");
-            _env.CheckValueOrNull(samplingKeyColumn);
+            _env.CheckValueOrNull(samplingKeyColumnName);
 
-            EnsureGroupPreservationColumn(_env, ref data, ref samplingKeyColumn, seed);
+            EnsureGroupPreservationColumn(_env, ref data, ref samplingKeyColumnName, seed);
 
             var trainFilter = new RangeFilter(_env, new RangeFilter.Options()
             {
-                Column = samplingKeyColumn,
+                Column = samplingKeyColumnName,
                 Min = 0,
                 Max = testFraction,
                 Complement = true
             }, data);
             var testFilter = new RangeFilter(_env, new RangeFilter.Options()
             {
-                Column = samplingKeyColumn,
+                Column = samplingKeyColumnName,
                 Min = 0,
                 Max = testFraction,
                 Complement = false

diff --git a/src/Microsoft.ML.Data/TrainCatalog.cs b/src/Microsoft.ML.Data/TrainCatalog.cs
@@ -572,24 +572,24 @@ public RegressionMetrics Evaluate(IDataView data, string labelColumnName = Defau
 
         /// <summary>
         /// Run cross-validation over <paramref name="numberOfFolds"/> folds of <paramref name="data"/>, by fitting <paramref name="estimator"/>,
-        /// and respecting <paramref name="samplingKeyColumname"/> if provided.
+        /// and respecting <paramref name="samplingKeyColumnName"/> if provided.
         /// Then evaluate each sub-model against <paramref name="labelColumnName"/> and return metrics.
         /// </summary>
         /// <param name="data">The data to run cross-validation on.</param>
         /// <param name="estimator">The estimator to fit.</param>
         /// <param name="numberOfFolds">Number of cross-validation folds.</param>
         /// <param name="labelColumnName">The label column (for evaluation).</param>
-        /// <param name="samplingKeyColumname">Name of a column to use for grouping rows. If two examples share the same value of the <paramref name="samplingKeyColumname"/>,
+        /// <param name="samplingKeyColumnName">Name of a column to use for grouping rows. If two examples share the same value of the <paramref name="samplingKeyColumnName"/>,
         /// they are guaranteed to appear in the same subset (train or test). This can be used to ensure no label leakage from the train to the test set.
         /// If <see langword="null"/> no row grouping will be performed.</param>
         /// <param name="seed">Seed for the random number generator used to select rows for cross-validation folds.</param>
         /// <returns>Per-fold results: metrics, models, scored datasets.</returns>
         public CrossValidationResult<RegressionMetrics>[] CrossValidate(
             IDataView data, IEstimator<ITransformer> estimator, int numberOfFolds = 5, string labelColumnName = DefaultColumnNames.Label,
-            string samplingKeyColumname = null, int? seed = null)
+            string samplingKeyColumnName = null, int? seed = null)
         {
             Environment.CheckNonEmpty(labelColumnName, nameof(labelColumnName));
-            var result = CrossValidateTrain(data, estimator, numberOfFolds, samplingKeyColumname, seed);
+            var result = CrossValidateTrain(data, estimator, numberOfFolds, samplingKeyColumnName, seed);
             return result.Select(x => new CrossValidationResult<RegressionMetrics>(x.Model,
                 Evaluate(x.Scores, labelColumnName), x.Scores, x.Fold)).ToArray();
         }

diff --git a/src/Microsoft.ML.Recommender/RecommenderCatalog.cs b/src/Microsoft.ML.Recommender/RecommenderCatalog.cs
@@ -112,26 +112,26 @@ public RegressionMetrics Evaluate(IDataView data, string labelColumnName = Defau
 
         /// <summary>
         /// Run cross-validation over <paramref name="numberOfFolds"/> folds of <paramref name="data"/>, by fitting <paramref name="estimator"/>,
-        /// and respecting <paramref name="samplingKeyColumn"/> if provided.
+        /// and respecting <paramref name="samplingKeyColumnName"/> if provided.
         /// Then evaluate each sub-model against <paramref name="labelColumnName"/> and return metrics.
         /// </summary>
         /// <param name="data">The data to run cross-validation on.</param>
         /// <param name="estimator">The estimator to fit.</param>
         /// <param name="numberOfFolds">Number of cross-validation folds.</param>
         /// <param name="labelColumnName">The label column (for evaluation).</param>
-        /// <param name="samplingKeyColumn">Optional name of the column to use as a stratification column. If two examples share the same value of the <paramref name="samplingKeyColumn"/>
+        /// <param name="samplingKeyColumnName">Optional name of the column to use as a stratification column. If two examples share the same value of the <paramref name="samplingKeyColumnName"/>
         /// (if provided), they are guaranteed to appear in the same subset (train or test). Use this to make sure there is no label leakage from train to the test set.
         /// If this optional parameter is not provided, a stratification columns will be generated, and its values will be random numbers .</param>
-        /// <param name="seed">Optional parameter used in combination with the <paramref name="samplingKeyColumn"/>.
-        /// If the <paramref name="samplingKeyColumn"/> is not provided, the random numbers generated to create it, will use this seed as value.
+        /// <param name="seed">Optional parameter used in combination with the <paramref name="samplingKeyColumnName"/>.
+        /// If the <paramref name="samplingKeyColumnName"/> is not provided, the random numbers generated to create it, will use this seed as value.
         /// And if it is not provided, the default value will be used.</param>
         /// <returns>Per-fold results: metrics, models, scored datasets.</returns>
         public CrossValidationResult<RegressionMetrics>[] CrossValidate(
             IDataView data, IEstimator<ITransformer> estimator, int numberOfFolds = 5, string labelColumnName = DefaultColumnNames.Label,
-            string samplingKeyColumn = null, int? seed = null)
+            string samplingKeyColumnName = null, int? seed = null)
         {
             Environment.CheckNonEmpty(labelColumnName, nameof(labelColumnName));
-            var result = CrossValidateTrain(data, estimator, numberOfFolds, samplingKeyColumn, seed);
+            var result = CrossValidateTrain(data, estimator, numberOfFolds, samplingKeyColumnName, seed);
             return result.Select(x => new CrossValidationResult<RegressionMetrics>(x.Model, Evaluate(x.Scores, labelColumnName), x.Scores, x.Fold)).ToArray();
         }
     }

diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/TestApi.cs
@@ -324,7 +324,7 @@ public void TestTrainTestSplit()
             // Now let's do same thing but with presence of stratificationColumn.
             // Rows with same values in this stratificationColumn should end up in same subset (train or test).
             // So let's break dataset by "Workclass" column.
-            var stratSplit = mlContext.Data.TrainTestSplit(input, samplingKeyColumn: "Workclass");
+            var stratSplit = mlContext.Data.TrainTestSplit(input, samplingKeyColumnName: "Workclass");
             var stratTrainWorkclass = getWorkclass(stratSplit.TrainSet);
             var stratTestWorkClass = getWorkclass(stratSplit.TestSet);
             // Let's get unique values for "Workclass" column from train subset.
@@ -336,7 +336,7 @@ public void TestTrainTestSplit()
 
             // Let's do same thing, but this time we will choose different seed.
             // Stratification column should still break dataset properly without same values in both subsets.
-            var stratSeed = mlContext.Data.TrainTestSplit(input, samplingKeyColumn:"Workclass", seed: 1000000);
+            var stratSeed = mlContext.Data.TrainTestSplit(input, samplingKeyColumnName:"Workclass", seed: 1000000);
             var stratTrainWithSeedWorkclass = getWorkclass(stratSeed.TrainSet);
             var stratTestWithSeedWorkClass = getWorkclass(stratSeed.TestSet);
             // Let's get unique values for "Workclass" column from train subset.