diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs index c6e52b5e27..c70a1158d9 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs @@ -144,6 +144,15 @@ internal Context(Reconciler rec) /// The column representation. public Vector LoadBool(int minOrdinal, int? maxOrdinal) => Load(DataKind.BL, minOrdinal, maxOrdinal); + /// + /// Create a representation for a key loaded from TextLoader as an unsigned integer (32 bits). + /// + /// The zero-based index of the field to read from. + /// smallest value of the loaded key values + /// If specified, it's the largest allowed value of the loaded key values. Use null if key is unbounded. + /// The column representation. + public Key LoadKey(int ordinal, ulong minKeyValue, ulong? maxKeyValue) => Load(DataKind.U4, ordinal, minKeyValue, maxKeyValue); + /// /// Reads a scalar single-precision floating point column from a single field in the text file. /// @@ -209,6 +218,51 @@ private Vector Load(DataKind kind, int minOrdinal, int? maxOrdinal) return new MyVector(_rec, kind, minOrdinal, maxOrdinal); } + private Key Load(DataKind kind, int ordinal, ulong minKeyValue, ulong? maxKeyValue) + { + Contracts.CheckParam(ordinal >= 0, nameof(ordinal), "Should be non-negative"); + Contracts.CheckParam(minKeyValue >= 0, nameof(minKeyValue), "Should be non-negative"); + Contracts.CheckParam(maxKeyValue == null || maxKeyValue >= minKeyValue, nameof(maxKeyValue), "Should be greater than or eqaul to minimum key value or null"); + return new MyKey(_rec, kind, ordinal, minKeyValue, maxKeyValue); + } + + /// + /// A data type used to bridge and . It can be used as + /// in static-typed pipelines and provides for translating itself into . + /// + private class MyKey : Key, IPipelineArgColumn + { + // The storage type that the targeted content would be loaded as. + private readonly DataKind _kind; + // The position where the key value gets read from. + private readonly int _oridinal; + // The lower bound of the key value. + private readonly ulong _minKeyValue; + // The upper bound of the key value. Its value is null if unbounded. + private readonly ulong? _maxKeyValue; + + // Contstuct a representation for a key-typed column loaded from a text file. Key values are assumed to be contiguous. + public MyKey(Reconciler rec, DataKind kind, int oridinal, ulong minKeyValue, ulong? maxKeyValue=null) + : base(rec, null) + { + _kind = kind; + _oridinal = oridinal; + _minKeyValue = minKeyValue; + _maxKeyValue = maxKeyValue; + } + + // Translate the internal variable representation to columns of TextLoader. + public Column Create() + { + return new Column() + { + Type = _kind, + Source = new[] { new Range(_oridinal) }, + KeyRange = new KeyRange(_minKeyValue, _maxKeyValue) + }; + } + } + private class MyScalar : Scalar, IPipelineArgColumn { private readonly DataKind _kind; diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs index 7935e2bd7e..6f740dd559 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs @@ -14,13 +14,14 @@ using Microsoft.ML.Runtime.Recommender; using Microsoft.ML.Runtime.Recommender.Internal; using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.Recommender; [assembly: LoadableClass(typeof(MatrixFactorizationPredictor), null, typeof(SignatureLoadModel), "Matrix Factorization Predictor Executor", MatrixFactorizationPredictor.LoaderSignature)] [assembly: LoadableClass(typeof(MatrixFactorizationPredictionTransformer), typeof(MatrixFactorizationPredictionTransformer), null, typeof(SignatureLoadModel), "", MatrixFactorizationPredictionTransformer.LoaderSignature)] -namespace Microsoft.ML.Runtime.Recommender +namespace Microsoft.ML.Trainers.Recommender { /// /// stores two factor matrices, P and Q, for approximating the training matrix, R, by P * Q, diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs new file mode 100644 index 0000000000..5d53c1ec8d --- /dev/null +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs @@ -0,0 +1,126 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Core.Data; +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.StaticPipe.Runtime; +using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.Recommender; +using System; +using System.Collections.Generic; + +namespace Microsoft.ML.StaticPipe +{ + public static class MatrixFactorizationExtensions + { + /// + /// Predict matrix entry using matrix factorization + /// + /// The type of physical value of matrix's row and column index. It must be an integer type such as uint. + /// The regression context trainer object. + /// The label variable. + /// The column index of the considered matrix. + /// The row index of the considered matrix. + /// The frobenius norms of factor matrices. + /// Rank of the two factor matrices whose product is used to approximate the consdered matrix + /// Initial learning rate. + /// Number of training iterations. + /// A delegate to set more settings. + /// A delegate that is called every time the + /// method is called on the + /// instance created out of this. This delegate will receive + /// the model that was trained. Note that this action cannot change the result in any way; it is only a way for the caller to + /// be informed about what was learnt. + /// The predicted output. + public static Scalar MatrixFactorization(this RegressionContext.RegressionTrainers ctx, + Scalar label, Key matrixColumnIndex, Key matrixRowIndex, + float regularizationCoefficient = 0.1f, + int approximationRank = 8, + float learningRate = 0.1f, + int numIterations = 20, + Action advancedSettings = null, + Action onFit = null) + { + Contracts.CheckValue(label, nameof(label)); + Contracts.CheckValue(matrixColumnIndex, nameof(matrixColumnIndex)); + Contracts.CheckValue(matrixRowIndex, nameof(matrixRowIndex)); + + Contracts.CheckParam(regularizationCoefficient >= 0, nameof(regularizationCoefficient), "Must be non-negative"); + Contracts.CheckParam(approximationRank > 0, nameof(approximationRank), "Must be positive"); + Contracts.CheckParam(learningRate > 0, nameof(learningRate), "Must be positive"); + Contracts.CheckParam(numIterations > 0, nameof(numIterations), "Must be positive"); + Contracts.CheckValueOrNull(advancedSettings); + Contracts.CheckValueOrNull(onFit); + + var rec = new MatrixFactorizationReconciler((env, labelColName, matrixColumnIndexColName, matrixRowIndexColName) => + { + var trainer = new MatrixFactorizationTrainer(env, labelColName, matrixColumnIndexColName, matrixRowIndexColName, advancedSettings: + args => + { + args.Lambda = regularizationCoefficient; + args.K = approximationRank; + args.Eta = learningRate; + args.NumIterations = numIterations; + // The previous settings may be overwritten by the line below. + advancedSettings?.Invoke(args); + }); + if (onFit != null) + return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); + else + return trainer; + }, label, matrixColumnIndex, matrixRowIndex); + return rec.Output; + } + + private sealed class MatrixFactorizationReconciler : TrainerEstimatorReconciler + { + // Output column name of the trained estimator. + private static string FixedOutputName => DefaultColumnNames.Score; + + // A function used to create trainer of matrix factorization. It instantiates a trainer by indicating the + // expected inputs and output (IDataView's) column names. That trainer has a Fit(IDataView data) for learning + // a MatrixFactorizationPredictionTransformer from the data. + private readonly Func> _factory; + + /// + /// The only output produced by matrix factorization predictor + /// + public Scalar Output { get; } + + /// + /// The output columns. + /// + protected override IEnumerable Outputs { get; } + + public MatrixFactorizationReconciler(Func> factory, + Scalar label, Key matColumnIndex, Key matRowIndex) + : base(MakeInputs(Contracts.CheckRef(label, nameof(label)), Contracts.CheckRef(matColumnIndex, nameof(matColumnIndex)), Contracts.CheckRef(matRowIndex, nameof(matRowIndex))), + new string[] { FixedOutputName }) + { + Contracts.AssertValue(factory); + _factory = factory; + + Output = new Impl(this); + Outputs = new PipelineColumn[] { Output }; + } + + private static PipelineColumn[] MakeInputs(Scalar label, PipelineColumn matrixRowIndex, PipelineColumn matrixColumnIndex) + => new PipelineColumn[] { label, matrixRowIndex, matrixColumnIndex }; + + protected override IEstimator ReconcileCore(IHostEnvironment env, string[] inputNames) + { + Contracts.AssertValue(env); + + // The first, second, third names are label, matrix's column index, and matrix's row index, respectively. + return _factory(env, inputNames[0], inputNames[1], inputNames[2]); + } + + private sealed class Impl : Scalar + { + public Impl(MatrixFactorizationReconciler rec) : base(rec, rec.Inputs) { } + } + } + } +} diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs index 56165067ce..5d32a4fd66 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs @@ -16,6 +16,7 @@ using Microsoft.ML.Runtime.Recommender.Internal; using Microsoft.ML.Runtime.Training; using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.Recommender; [assembly: LoadableClass(MatrixFactorizationTrainer.Summary, typeof(MatrixFactorizationTrainer), typeof(MatrixFactorizationTrainer.Arguments), new Type[] { typeof(SignatureTrainer), typeof(SignatureMatrixRecommendingTrainer) }, diff --git a/test/Microsoft.ML.StaticPipelineTesting/Microsoft.ML.StaticPipelineTesting.csproj b/test/Microsoft.ML.StaticPipelineTesting/Microsoft.ML.StaticPipelineTesting.csproj index d1034c5c1e..532cff72cc 100644 --- a/test/Microsoft.ML.StaticPipelineTesting/Microsoft.ML.StaticPipelineTesting.csproj +++ b/test/Microsoft.ML.StaticPipelineTesting/Microsoft.ML.StaticPipelineTesting.csproj @@ -17,7 +17,8 @@ + - \ No newline at end of file + diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs index 01c498412c..5c6cb13755 100644 --- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs +++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs @@ -6,6 +6,7 @@ using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.Data.IO; using Microsoft.ML.Runtime.Internal.Utilities; +using Microsoft.ML.Runtime.Recommender; using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.StaticPipe; using Microsoft.ML.TestFramework; @@ -877,5 +878,6 @@ public void TestPcaStatic() Assert.True(type.IsVector && type.ItemType.RawKind == DataKind.R4); Assert.True(type.VectorSize == 5); } + } } \ No newline at end of file diff --git a/test/Microsoft.ML.StaticPipelineTesting/Training.cs b/test/Microsoft.ML.StaticPipelineTesting/Training.cs index 458f777d2b..ab0df3a834 100644 --- a/test/Microsoft.ML.StaticPipelineTesting/Training.cs +++ b/test/Microsoft.ML.StaticPipelineTesting/Training.cs @@ -18,6 +18,7 @@ using System.Linq; using Xunit; using Xunit.Abstractions; +using Microsoft.ML.Trainers.Recommender; namespace Microsoft.ML.StaticPipelineTesting { @@ -836,5 +837,54 @@ public void HogwildSGDBinaryClassification() Assert.InRange(metrics.Auc, 0, 1); Assert.InRange(metrics.Auprc, 0, 1); } + + [Fact] + public void MatrixFactorization() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(seed: 1, conc: 1); + + // Specify where to find data file + var dataPath = GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename); + var dataSource = new MultiFileSource(dataPath); + + // Read data file. The file contains 3 columns, label (float value), matrixColumnIndex (unsigned integer key), and matrixRowIndex (unsigned integer key). + // More specifically, LoadKey(1, 0, 19) means that the matrixColumnIndex column is read from the 2nd (indexed by 1) column in the data file and as + // a key type (stored as 32-bit unsigned integer) ranged from 0 to 19 (aka the training matrix has 20 columns). + var reader = mlContext.Data.TextReader(ctx => (label: ctx.LoadFloat(0), matrixColumnIndex: ctx.LoadKey(1, 0, 19), matrixRowIndex: ctx.LoadKey(2, 0, 39))); + + // The parameter that will be into the onFit method below. The obtained predictor will be assigned to this variable + // so that we will be able to touch it. + MatrixFactorizationPredictor pred = null; + + // Create a statically-typed matrix factorization estimator. The MatrixFactorization's input and output defined in MatrixFactorizationStatic + // tell what (aks a Scalar) is expected. Notice that only one thread is used for deterministic outcome. + var matrixFactorizationEstimator = reader.MakeNewEstimator() + .Append(r => (r.label, score: mlContext.Regression.Trainers.MatrixFactorization(r.label, r.matrixRowIndex, r.matrixColumnIndex, onFit: p => pred = p, + advancedSettings: args => { args.NumThreads = 1; }))); + + // Create a pipeline from the reader (the 1st step) and the matrix factorization estimator (the 2nd step). + var pipe = reader.Append(matrixFactorizationEstimator); + + // pred will be assigned by the onFit method once the training process is finished, so pred must be null before training. + Assert.Null(pred); + + // Train the pipeline on the given data file. Steps in the pipeline are sequentially fitted (by calling their Fit function). + var model = pipe.Fit(dataSource); + + // pred got assigned so that one can inspect the predictor trained in pipeline. + Assert.NotNull(pred); + + // Feed the data file into the trained pipeline. The data would be loaded by TextLoader (the 1st step) and then the output of the + // TextLoader would be fed into MatrixFactorizationEstimator. + var estimatedData = model.Read(dataSource); + + // After the training process, the metrics for regression problems can be computed. + var metrics = mlContext.Regression.Evaluate(estimatedData, r => r.label, r => r.score); + + // Naive test. Just make sure the pipeline runs. + Assert.InRange(metrics.L2, 0, 0.5); + } } } diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs index acb483bccf..3253c02591 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs @@ -2,9 +2,13 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.RunTests; using Microsoft.ML.Trainers; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; using Xunit; namespace Microsoft.ML.Tests.TrainerEstimators @@ -43,63 +47,73 @@ public void MatrixFactorization_Estimator() [Fact] public void MatrixFactorizationSimpleTrainAndPredict() { - using (var env = new LocalEnvironment(seed: 1, conc: 1)) - { - // Specific column names of the considered data set - string labelColumnName = "Label"; - string userColumnName = "User"; - string itemColumnName = "Item"; - string scoreColumnName = "Score"; + var mlContext = new MLContext(seed: 1, conc: 1); + + // Specific column names of the considered data set + string labelColumnName = "Label"; + string userColumnName = "User"; + string itemColumnName = "Item"; + string scoreColumnName = "Score"; - // Create reader for both of training and test data sets - var reader = new TextLoader(env, GetLoaderArgs(labelColumnName, userColumnName, itemColumnName)); + // Create reader for both of training and test data sets + var reader = new TextLoader(mlContext, GetLoaderArgs(labelColumnName, userColumnName, itemColumnName)); - // Read training data as an IDataView object - var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename))); + // Read training data as an IDataView object + var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename))); - // Create a pipeline with a single operator. - var pipeline = new MatrixFactorizationTrainer(env, labelColumnName, userColumnName, itemColumnName, - advancedSettings:s=> - { - s.NumIterations = 3; - s.NumThreads = 1; // To eliminate randomness, # of threads must be 1. - s.K = 7; - }); + // Create a pipeline with a single operator. + var pipeline = new MatrixFactorizationTrainer(mlContext, labelColumnName, userColumnName, itemColumnName, + advancedSettings:s=> + { + s.NumIterations = 3; + s.NumThreads = 1; // To eliminate randomness, # of threads must be 1. + s.K = 7; + }); - // Train a matrix factorization model. - var model = pipeline.Fit(data); + // Train a matrix factorization model. + var model = pipeline.Fit(data); - // Read the test data set as an IDataView - var testData = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename))); + // Read the test data set as an IDataView + var testData = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename))); - // Apply the trained model to the test set - var prediction = model.Transform(testData); + // Apply the trained model to the test set + var prediction = model.Transform(testData); - // Get output schema and check its column names - var outputSchema = model.GetOutputSchema(data.Schema); - var expectedOutputNames = new string[] { labelColumnName, userColumnName, itemColumnName, scoreColumnName }; - foreach (var (i, col) in outputSchema.GetColumns()) - Assert.True(col.Name == expectedOutputNames[i]); + // Get output schema and check its column names + var outputSchema = model.GetOutputSchema(data.Schema); + var expectedOutputNames = new string[] { labelColumnName, userColumnName, itemColumnName, scoreColumnName }; + foreach (var (i, col) in outputSchema.GetColumns()) + Assert.True(col.Name == expectedOutputNames[i]); - // Retrieve label column's index from the test IDataView - testData.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId); + // Retrieve label column's index from the test IDataView + testData.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId); - // Retrieve score column's index from the IDataView produced by the trained model - prediction.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId); + // Retrieve score column's index from the IDataView produced by the trained model + prediction.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId); - // Compute prediction errors - var mlContext = new MLContext(); - var metrices = mlContext.Regression.Evaluate(prediction, label: labelColumnName, score: scoreColumnName); + // Compute prediction errors + var metrices = mlContext.Regression.Evaluate(prediction, label: labelColumnName, score: scoreColumnName); - // Determine if the selected metric is reasonable for differen - var expectedWindowsL2Error = 0.61528733643754685; // Windows baseline + // Determine if the selected metric is reasonable for different platforms + double tolerance = Math.Pow(10, -7); + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + // Linux case + var expectedUnixL2Error = 0.616821448679879; // Linux baseline + Assert.InRange(metrices.L2, expectedUnixL2Error - tolerance, expectedUnixL2Error + tolerance); + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) + + { + // Mac case var expectedMacL2Error = 0.61192207960271; // Mac baseline - var expectedLinuxL2Error = 0.616821448679879; // Linux baseline - double tolerance = System.Math.Pow(10, -DigitsOfPrecision); - bool inWindowsRange = expectedWindowsL2Error - tolerance < metrices.L2 && metrices.L2 < expectedWindowsL2Error + tolerance; - bool inMacRange = expectedMacL2Error - tolerance < metrices.L2 && metrices.L2 < expectedMacL2Error + tolerance; - bool inLinuxRange = expectedLinuxL2Error - tolerance < metrices.L2 && metrices.L2 < expectedLinuxL2Error + tolerance; - Assert.True(inWindowsRange || inMacRange || inLinuxRange); + Assert.InRange(metrices.L2, expectedMacL2Error - 5e-3, expectedMacL2Error + 5e-3); // 1e-7 is too small for Mac so we try 1e-5 + } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + // Windows case + var expectedWindowsL2Error = 0.61528733643754685; // Windows baseline + Assert.InRange(metrices.L2, expectedWindowsL2Error - tolerance, expectedWindowsL2Error + tolerance); } } @@ -117,5 +131,64 @@ private TextLoader.Arguments GetLoaderArgs(string labelColumnName, string matrix } }; } + + // The following variables defines the shape of a matrix. Its shape is _synthesizedMatrixRowCount-by-_synthesizedMatrixColumnCount. + // The variable _synthesizedMatrixFirstRowIndex indicates the integer that would be mapped to the first row index. If user data uses + // 0-based indices for rows, _synthesizedMatrixFirstRowIndex can be set to 0. Similarly, for 1-based indices, _synthesizedMatrixFirstRowIndex + // could be 1. + const int _synthesizedMatrixFirstColumnIndex = 1; + const int _synthesizedMatrixFirstRowIndex = 1; + const int _synthesizedMatrixColumnCount = 60; + const int _synthesizedMatrixRowCount = 100; + + internal class MatrixElement + { + // Matrix column index starts from 1 and is at most _synthesizedMatrixColumnCount. + // Contieuous=true means that all values from 1 to _synthesizedMatrixColumnCount are allowed keys. + [KeyType(Contiguous=true, Count=_synthesizedMatrixColumnCount, Min=_synthesizedMatrixFirstColumnIndex)] + public uint MatrixColumnIndex; + // Matrix row index starts from 1 and is at most _synthesizedMatrixRowCount. + // Contieuous=true means that all values from 1 to _synthesizedMatrixRowCount are allowed keys. + [KeyType(Contiguous=true, Count=_synthesizedMatrixRowCount, Min=_synthesizedMatrixFirstRowIndex)] + public uint MatrixRowIndex; + // The value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row in the considered matrix. + public float Value; + } + + [Fact] + public void MatrixFactorizationInMemoryData() + { + // Create an in-memory matrix as a list of tuples (column index, row index, value). + var dataMatrix = new List(); + for (uint i = _synthesizedMatrixFirstColumnIndex; i < _synthesizedMatrixFirstColumnIndex + _synthesizedMatrixColumnCount; ++i) + for (uint j = _synthesizedMatrixFirstRowIndex; j < _synthesizedMatrixFirstRowIndex + _synthesizedMatrixRowCount; ++j) + dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5 }); + + // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it. + var dataView = ComponentCreation.CreateDataView(Env, dataMatrix); + + // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the + // matrix's column index, and "MatrixRowIndex" as the matrix's row index. + var mlContext = new MLContext(seed: 1, conc: 1); + var pipeline = new MatrixFactorizationTrainer(mlContext, "Value", "MatrixColumnIndex", "MatrixRowIndex", + advancedSettings:s=> + { + s.NumIterations = 10; + s.NumThreads = 1; // To eliminate randomness, # of threads must be 1. + s.K = 32; + }); + + // Train a matrix factorization model. + var model = pipeline.Fit(dataView); + + // Apply the trained model to the training set + var prediction = model.Transform(dataView); + + // Calculate regression matrices for the prediction result + var metrics = mlContext.Regression.Evaluate(prediction, label: "Value", score: "Score"); + + // Native test. Just check the pipeline runs. + Assert.True(metrics.L2 < 0.1); + } } }