diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs
index c6e52b5e27..c70a1158d9 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs
@@ -144,6 +144,15 @@ internal Context(Reconciler rec)
/// The column representation.
public Vector LoadBool(int minOrdinal, int? maxOrdinal) => Load(DataKind.BL, minOrdinal, maxOrdinal);
+ ///
+ /// Create a representation for a key loaded from TextLoader as an unsigned integer (32 bits).
+ ///
+ /// The zero-based index of the field to read from.
+ /// smallest value of the loaded key values
+ /// If specified, it's the largest allowed value of the loaded key values. Use null if key is unbounded.
+ /// The column representation.
+ public Key LoadKey(int ordinal, ulong minKeyValue, ulong? maxKeyValue) => Load(DataKind.U4, ordinal, minKeyValue, maxKeyValue);
+
///
/// Reads a scalar single-precision floating point column from a single field in the text file.
///
@@ -209,6 +218,51 @@ private Vector Load(DataKind kind, int minOrdinal, int? maxOrdinal)
return new MyVector(_rec, kind, minOrdinal, maxOrdinal);
}
+ private Key Load(DataKind kind, int ordinal, ulong minKeyValue, ulong? maxKeyValue)
+ {
+ Contracts.CheckParam(ordinal >= 0, nameof(ordinal), "Should be non-negative");
+ Contracts.CheckParam(minKeyValue >= 0, nameof(minKeyValue), "Should be non-negative");
+ Contracts.CheckParam(maxKeyValue == null || maxKeyValue >= minKeyValue, nameof(maxKeyValue), "Should be greater than or eqaul to minimum key value or null");
+ return new MyKey(_rec, kind, ordinal, minKeyValue, maxKeyValue);
+ }
+
+ ///
+ /// A data type used to bridge and . It can be used as
+ /// in static-typed pipelines and provides for translating itself into .
+ ///
+ private class MyKey : Key, IPipelineArgColumn
+ {
+ // The storage type that the targeted content would be loaded as.
+ private readonly DataKind _kind;
+ // The position where the key value gets read from.
+ private readonly int _oridinal;
+ // The lower bound of the key value.
+ private readonly ulong _minKeyValue;
+ // The upper bound of the key value. Its value is null if unbounded.
+ private readonly ulong? _maxKeyValue;
+
+ // Contstuct a representation for a key-typed column loaded from a text file. Key values are assumed to be contiguous.
+ public MyKey(Reconciler rec, DataKind kind, int oridinal, ulong minKeyValue, ulong? maxKeyValue=null)
+ : base(rec, null)
+ {
+ _kind = kind;
+ _oridinal = oridinal;
+ _minKeyValue = minKeyValue;
+ _maxKeyValue = maxKeyValue;
+ }
+
+ // Translate the internal variable representation to columns of TextLoader.
+ public Column Create()
+ {
+ return new Column()
+ {
+ Type = _kind,
+ Source = new[] { new Range(_oridinal) },
+ KeyRange = new KeyRange(_minKeyValue, _maxKeyValue)
+ };
+ }
+ }
+
private class MyScalar : Scalar, IPipelineArgColumn
{
private readonly DataKind _kind;
diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs
index 7935e2bd7e..6f740dd559 100644
--- a/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs
+++ b/src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs
@@ -14,13 +14,14 @@
using Microsoft.ML.Runtime.Recommender;
using Microsoft.ML.Runtime.Recommender.Internal;
using Microsoft.ML.Trainers;
+using Microsoft.ML.Trainers.Recommender;
[assembly: LoadableClass(typeof(MatrixFactorizationPredictor), null, typeof(SignatureLoadModel), "Matrix Factorization Predictor Executor", MatrixFactorizationPredictor.LoaderSignature)]
[assembly: LoadableClass(typeof(MatrixFactorizationPredictionTransformer), typeof(MatrixFactorizationPredictionTransformer),
null, typeof(SignatureLoadModel), "", MatrixFactorizationPredictionTransformer.LoaderSignature)]
-namespace Microsoft.ML.Runtime.Recommender
+namespace Microsoft.ML.Trainers.Recommender
{
///
/// stores two factor matrices, P and Q, for approximating the training matrix, R, by P * Q,
diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs
new file mode 100644
index 0000000000..5d53c1ec8d
--- /dev/null
+++ b/src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs
@@ -0,0 +1,126 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Core.Data;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.StaticPipe.Runtime;
+using Microsoft.ML.Trainers;
+using Microsoft.ML.Trainers.Recommender;
+using System;
+using System.Collections.Generic;
+
+namespace Microsoft.ML.StaticPipe
+{
+ public static class MatrixFactorizationExtensions
+ {
+ ///
+ /// Predict matrix entry using matrix factorization
+ ///
+ /// The type of physical value of matrix's row and column index. It must be an integer type such as uint.
+ /// The regression context trainer object.
+ /// The label variable.
+ /// The column index of the considered matrix.
+ /// The row index of the considered matrix.
+ /// The frobenius norms of factor matrices.
+ /// Rank of the two factor matrices whose product is used to approximate the consdered matrix
+ /// Initial learning rate.
+ /// Number of training iterations.
+ /// A delegate to set more settings.
+ /// A delegate that is called every time the
+ /// method is called on the
+ /// instance created out of this. This delegate will receive
+ /// the model that was trained. Note that this action cannot change the result in any way; it is only a way for the caller to
+ /// be informed about what was learnt.
+ /// The predicted output.
+ public static Scalar MatrixFactorization(this RegressionContext.RegressionTrainers ctx,
+ Scalar label, Key matrixColumnIndex, Key matrixRowIndex,
+ float regularizationCoefficient = 0.1f,
+ int approximationRank = 8,
+ float learningRate = 0.1f,
+ int numIterations = 20,
+ Action advancedSettings = null,
+ Action onFit = null)
+ {
+ Contracts.CheckValue(label, nameof(label));
+ Contracts.CheckValue(matrixColumnIndex, nameof(matrixColumnIndex));
+ Contracts.CheckValue(matrixRowIndex, nameof(matrixRowIndex));
+
+ Contracts.CheckParam(regularizationCoefficient >= 0, nameof(regularizationCoefficient), "Must be non-negative");
+ Contracts.CheckParam(approximationRank > 0, nameof(approximationRank), "Must be positive");
+ Contracts.CheckParam(learningRate > 0, nameof(learningRate), "Must be positive");
+ Contracts.CheckParam(numIterations > 0, nameof(numIterations), "Must be positive");
+ Contracts.CheckValueOrNull(advancedSettings);
+ Contracts.CheckValueOrNull(onFit);
+
+ var rec = new MatrixFactorizationReconciler((env, labelColName, matrixColumnIndexColName, matrixRowIndexColName) =>
+ {
+ var trainer = new MatrixFactorizationTrainer(env, labelColName, matrixColumnIndexColName, matrixRowIndexColName, advancedSettings:
+ args =>
+ {
+ args.Lambda = regularizationCoefficient;
+ args.K = approximationRank;
+ args.Eta = learningRate;
+ args.NumIterations = numIterations;
+ // The previous settings may be overwritten by the line below.
+ advancedSettings?.Invoke(args);
+ });
+ if (onFit != null)
+ return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
+ else
+ return trainer;
+ }, label, matrixColumnIndex, matrixRowIndex);
+ return rec.Output;
+ }
+
+ private sealed class MatrixFactorizationReconciler : TrainerEstimatorReconciler
+ {
+ // Output column name of the trained estimator.
+ private static string FixedOutputName => DefaultColumnNames.Score;
+
+ // A function used to create trainer of matrix factorization. It instantiates a trainer by indicating the
+ // expected inputs and output (IDataView's) column names. That trainer has a Fit(IDataView data) for learning
+ // a MatrixFactorizationPredictionTransformer from the data.
+ private readonly Func> _factory;
+
+ ///
+ /// The only output produced by matrix factorization predictor
+ ///
+ public Scalar Output { get; }
+
+ ///
+ /// The output columns.
+ ///
+ protected override IEnumerable Outputs { get; }
+
+ public MatrixFactorizationReconciler(Func> factory,
+ Scalar label, Key matColumnIndex, Key matRowIndex)
+ : base(MakeInputs(Contracts.CheckRef(label, nameof(label)), Contracts.CheckRef(matColumnIndex, nameof(matColumnIndex)), Contracts.CheckRef(matRowIndex, nameof(matRowIndex))),
+ new string[] { FixedOutputName })
+ {
+ Contracts.AssertValue(factory);
+ _factory = factory;
+
+ Output = new Impl(this);
+ Outputs = new PipelineColumn[] { Output };
+ }
+
+ private static PipelineColumn[] MakeInputs(Scalar label, PipelineColumn matrixRowIndex, PipelineColumn matrixColumnIndex)
+ => new PipelineColumn[] { label, matrixRowIndex, matrixColumnIndex };
+
+ protected override IEstimator ReconcileCore(IHostEnvironment env, string[] inputNames)
+ {
+ Contracts.AssertValue(env);
+
+ // The first, second, third names are label, matrix's column index, and matrix's row index, respectively.
+ return _factory(env, inputNames[0], inputNames[1], inputNames[2]);
+ }
+
+ private sealed class Impl : Scalar
+ {
+ public Impl(MatrixFactorizationReconciler rec) : base(rec, rec.Inputs) { }
+ }
+ }
+ }
+}
diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs
index 56165067ce..5d32a4fd66 100644
--- a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs
+++ b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs
@@ -16,6 +16,7 @@
using Microsoft.ML.Runtime.Recommender.Internal;
using Microsoft.ML.Runtime.Training;
using Microsoft.ML.Trainers;
+using Microsoft.ML.Trainers.Recommender;
[assembly: LoadableClass(MatrixFactorizationTrainer.Summary, typeof(MatrixFactorizationTrainer), typeof(MatrixFactorizationTrainer.Arguments),
new Type[] { typeof(SignatureTrainer), typeof(SignatureMatrixRecommendingTrainer) },
diff --git a/test/Microsoft.ML.StaticPipelineTesting/Microsoft.ML.StaticPipelineTesting.csproj b/test/Microsoft.ML.StaticPipelineTesting/Microsoft.ML.StaticPipelineTesting.csproj
index d1034c5c1e..532cff72cc 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/Microsoft.ML.StaticPipelineTesting.csproj
+++ b/test/Microsoft.ML.StaticPipelineTesting/Microsoft.ML.StaticPipelineTesting.csproj
@@ -17,7 +17,8 @@
+
-
\ No newline at end of file
+
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
index 01c498412c..5c6cb13755 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -6,6 +6,7 @@
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Data.IO;
using Microsoft.ML.Runtime.Internal.Utilities;
+using Microsoft.ML.Runtime.Recommender;
using Microsoft.ML.Runtime.RunTests;
using Microsoft.ML.StaticPipe;
using Microsoft.ML.TestFramework;
@@ -877,5 +878,6 @@ public void TestPcaStatic()
Assert.True(type.IsVector && type.ItemType.RawKind == DataKind.R4);
Assert.True(type.VectorSize == 5);
}
+
}
}
\ No newline at end of file
diff --git a/test/Microsoft.ML.StaticPipelineTesting/Training.cs b/test/Microsoft.ML.StaticPipelineTesting/Training.cs
index 458f777d2b..ab0df3a834 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/Training.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/Training.cs
@@ -18,6 +18,7 @@
using System.Linq;
using Xunit;
using Xunit.Abstractions;
+using Microsoft.ML.Trainers.Recommender;
namespace Microsoft.ML.StaticPipelineTesting
{
@@ -836,5 +837,54 @@ public void HogwildSGDBinaryClassification()
Assert.InRange(metrics.Auc, 0, 1);
Assert.InRange(metrics.Auprc, 0, 1);
}
+
+ [Fact]
+ public void MatrixFactorization()
+ {
+ // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
+ // as a catalog of available operations and as the source of randomness.
+ var mlContext = new MLContext(seed: 1, conc: 1);
+
+ // Specify where to find data file
+ var dataPath = GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename);
+ var dataSource = new MultiFileSource(dataPath);
+
+ // Read data file. The file contains 3 columns, label (float value), matrixColumnIndex (unsigned integer key), and matrixRowIndex (unsigned integer key).
+ // More specifically, LoadKey(1, 0, 19) means that the matrixColumnIndex column is read from the 2nd (indexed by 1) column in the data file and as
+ // a key type (stored as 32-bit unsigned integer) ranged from 0 to 19 (aka the training matrix has 20 columns).
+ var reader = mlContext.Data.TextReader(ctx => (label: ctx.LoadFloat(0), matrixColumnIndex: ctx.LoadKey(1, 0, 19), matrixRowIndex: ctx.LoadKey(2, 0, 39)));
+
+ // The parameter that will be into the onFit method below. The obtained predictor will be assigned to this variable
+ // so that we will be able to touch it.
+ MatrixFactorizationPredictor pred = null;
+
+ // Create a statically-typed matrix factorization estimator. The MatrixFactorization's input and output defined in MatrixFactorizationStatic
+ // tell what (aks a Scalar) is expected. Notice that only one thread is used for deterministic outcome.
+ var matrixFactorizationEstimator = reader.MakeNewEstimator()
+ .Append(r => (r.label, score: mlContext.Regression.Trainers.MatrixFactorization(r.label, r.matrixRowIndex, r.matrixColumnIndex, onFit: p => pred = p,
+ advancedSettings: args => { args.NumThreads = 1; })));
+
+ // Create a pipeline from the reader (the 1st step) and the matrix factorization estimator (the 2nd step).
+ var pipe = reader.Append(matrixFactorizationEstimator);
+
+ // pred will be assigned by the onFit method once the training process is finished, so pred must be null before training.
+ Assert.Null(pred);
+
+ // Train the pipeline on the given data file. Steps in the pipeline are sequentially fitted (by calling their Fit function).
+ var model = pipe.Fit(dataSource);
+
+ // pred got assigned so that one can inspect the predictor trained in pipeline.
+ Assert.NotNull(pred);
+
+ // Feed the data file into the trained pipeline. The data would be loaded by TextLoader (the 1st step) and then the output of the
+ // TextLoader would be fed into MatrixFactorizationEstimator.
+ var estimatedData = model.Read(dataSource);
+
+ // After the training process, the metrics for regression problems can be computed.
+ var metrics = mlContext.Regression.Evaluate(estimatedData, r => r.label, r => r.score);
+
+ // Naive test. Just make sure the pipeline runs.
+ Assert.InRange(metrics.L2, 0, 0.5);
+ }
}
}
diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs
index acb483bccf..3253c02591 100644
--- a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs
+++ b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs
@@ -2,9 +2,13 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.RunTests;
using Microsoft.ML.Trainers;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
using Xunit;
namespace Microsoft.ML.Tests.TrainerEstimators
@@ -43,63 +47,73 @@ public void MatrixFactorization_Estimator()
[Fact]
public void MatrixFactorizationSimpleTrainAndPredict()
{
- using (var env = new LocalEnvironment(seed: 1, conc: 1))
- {
- // Specific column names of the considered data set
- string labelColumnName = "Label";
- string userColumnName = "User";
- string itemColumnName = "Item";
- string scoreColumnName = "Score";
+ var mlContext = new MLContext(seed: 1, conc: 1);
+
+ // Specific column names of the considered data set
+ string labelColumnName = "Label";
+ string userColumnName = "User";
+ string itemColumnName = "Item";
+ string scoreColumnName = "Score";
- // Create reader for both of training and test data sets
- var reader = new TextLoader(env, GetLoaderArgs(labelColumnName, userColumnName, itemColumnName));
+ // Create reader for both of training and test data sets
+ var reader = new TextLoader(mlContext, GetLoaderArgs(labelColumnName, userColumnName, itemColumnName));
- // Read training data as an IDataView object
- var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename)));
+ // Read training data as an IDataView object
+ var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename)));
- // Create a pipeline with a single operator.
- var pipeline = new MatrixFactorizationTrainer(env, labelColumnName, userColumnName, itemColumnName,
- advancedSettings:s=>
- {
- s.NumIterations = 3;
- s.NumThreads = 1; // To eliminate randomness, # of threads must be 1.
- s.K = 7;
- });
+ // Create a pipeline with a single operator.
+ var pipeline = new MatrixFactorizationTrainer(mlContext, labelColumnName, userColumnName, itemColumnName,
+ advancedSettings:s=>
+ {
+ s.NumIterations = 3;
+ s.NumThreads = 1; // To eliminate randomness, # of threads must be 1.
+ s.K = 7;
+ });
- // Train a matrix factorization model.
- var model = pipeline.Fit(data);
+ // Train a matrix factorization model.
+ var model = pipeline.Fit(data);
- // Read the test data set as an IDataView
- var testData = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename)));
+ // Read the test data set as an IDataView
+ var testData = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename)));
- // Apply the trained model to the test set
- var prediction = model.Transform(testData);
+ // Apply the trained model to the test set
+ var prediction = model.Transform(testData);
- // Get output schema and check its column names
- var outputSchema = model.GetOutputSchema(data.Schema);
- var expectedOutputNames = new string[] { labelColumnName, userColumnName, itemColumnName, scoreColumnName };
- foreach (var (i, col) in outputSchema.GetColumns())
- Assert.True(col.Name == expectedOutputNames[i]);
+ // Get output schema and check its column names
+ var outputSchema = model.GetOutputSchema(data.Schema);
+ var expectedOutputNames = new string[] { labelColumnName, userColumnName, itemColumnName, scoreColumnName };
+ foreach (var (i, col) in outputSchema.GetColumns())
+ Assert.True(col.Name == expectedOutputNames[i]);
- // Retrieve label column's index from the test IDataView
- testData.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId);
+ // Retrieve label column's index from the test IDataView
+ testData.Schema.TryGetColumnIndex(labelColumnName, out int labelColumnId);
- // Retrieve score column's index from the IDataView produced by the trained model
- prediction.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId);
+ // Retrieve score column's index from the IDataView produced by the trained model
+ prediction.Schema.TryGetColumnIndex(scoreColumnName, out int scoreColumnId);
- // Compute prediction errors
- var mlContext = new MLContext();
- var metrices = mlContext.Regression.Evaluate(prediction, label: labelColumnName, score: scoreColumnName);
+ // Compute prediction errors
+ var metrices = mlContext.Regression.Evaluate(prediction, label: labelColumnName, score: scoreColumnName);
- // Determine if the selected metric is reasonable for differen
- var expectedWindowsL2Error = 0.61528733643754685; // Windows baseline
+ // Determine if the selected metric is reasonable for different platforms
+ double tolerance = Math.Pow(10, -7);
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ {
+ // Linux case
+ var expectedUnixL2Error = 0.616821448679879; // Linux baseline
+ Assert.InRange(metrices.L2, expectedUnixL2Error - tolerance, expectedUnixL2Error + tolerance);
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+
+ {
+ // Mac case
var expectedMacL2Error = 0.61192207960271; // Mac baseline
- var expectedLinuxL2Error = 0.616821448679879; // Linux baseline
- double tolerance = System.Math.Pow(10, -DigitsOfPrecision);
- bool inWindowsRange = expectedWindowsL2Error - tolerance < metrices.L2 && metrices.L2 < expectedWindowsL2Error + tolerance;
- bool inMacRange = expectedMacL2Error - tolerance < metrices.L2 && metrices.L2 < expectedMacL2Error + tolerance;
- bool inLinuxRange = expectedLinuxL2Error - tolerance < metrices.L2 && metrices.L2 < expectedLinuxL2Error + tolerance;
- Assert.True(inWindowsRange || inMacRange || inLinuxRange);
+ Assert.InRange(metrices.L2, expectedMacL2Error - 5e-3, expectedMacL2Error + 5e-3); // 1e-7 is too small for Mac so we try 1e-5
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ // Windows case
+ var expectedWindowsL2Error = 0.61528733643754685; // Windows baseline
+ Assert.InRange(metrices.L2, expectedWindowsL2Error - tolerance, expectedWindowsL2Error + tolerance);
}
}
@@ -117,5 +131,64 @@ private TextLoader.Arguments GetLoaderArgs(string labelColumnName, string matrix
}
};
}
+
+ // The following variables defines the shape of a matrix. Its shape is _synthesizedMatrixRowCount-by-_synthesizedMatrixColumnCount.
+ // The variable _synthesizedMatrixFirstRowIndex indicates the integer that would be mapped to the first row index. If user data uses
+ // 0-based indices for rows, _synthesizedMatrixFirstRowIndex can be set to 0. Similarly, for 1-based indices, _synthesizedMatrixFirstRowIndex
+ // could be 1.
+ const int _synthesizedMatrixFirstColumnIndex = 1;
+ const int _synthesizedMatrixFirstRowIndex = 1;
+ const int _synthesizedMatrixColumnCount = 60;
+ const int _synthesizedMatrixRowCount = 100;
+
+ internal class MatrixElement
+ {
+ // Matrix column index starts from 1 and is at most _synthesizedMatrixColumnCount.
+ // Contieuous=true means that all values from 1 to _synthesizedMatrixColumnCount are allowed keys.
+ [KeyType(Contiguous=true, Count=_synthesizedMatrixColumnCount, Min=_synthesizedMatrixFirstColumnIndex)]
+ public uint MatrixColumnIndex;
+ // Matrix row index starts from 1 and is at most _synthesizedMatrixRowCount.
+ // Contieuous=true means that all values from 1 to _synthesizedMatrixRowCount are allowed keys.
+ [KeyType(Contiguous=true, Count=_synthesizedMatrixRowCount, Min=_synthesizedMatrixFirstRowIndex)]
+ public uint MatrixRowIndex;
+ // The value at the MatrixColumnIndex-th column and the MatrixRowIndex-th row in the considered matrix.
+ public float Value;
+ }
+
+ [Fact]
+ public void MatrixFactorizationInMemoryData()
+ {
+ // Create an in-memory matrix as a list of tuples (column index, row index, value).
+ var dataMatrix = new List();
+ for (uint i = _synthesizedMatrixFirstColumnIndex; i < _synthesizedMatrixFirstColumnIndex + _synthesizedMatrixColumnCount; ++i)
+ for (uint j = _synthesizedMatrixFirstRowIndex; j < _synthesizedMatrixFirstRowIndex + _synthesizedMatrixRowCount; ++j)
+ dataMatrix.Add(new MatrixElement() { MatrixColumnIndex = i, MatrixRowIndex = j, Value = (i + j) % 5 });
+
+ // Convert the in-memory matrix into an IDataView so that ML.NET components can consume it.
+ var dataView = ComponentCreation.CreateDataView(Env, dataMatrix);
+
+ // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
+ // matrix's column index, and "MatrixRowIndex" as the matrix's row index.
+ var mlContext = new MLContext(seed: 1, conc: 1);
+ var pipeline = new MatrixFactorizationTrainer(mlContext, "Value", "MatrixColumnIndex", "MatrixRowIndex",
+ advancedSettings:s=>
+ {
+ s.NumIterations = 10;
+ s.NumThreads = 1; // To eliminate randomness, # of threads must be 1.
+ s.K = 32;
+ });
+
+ // Train a matrix factorization model.
+ var model = pipeline.Fit(dataView);
+
+ // Apply the trained model to the training set
+ var prediction = model.Transform(dataView);
+
+ // Calculate regression matrices for the prediction result
+ var metrics = mlContext.Regression.Evaluate(prediction, label: "Value", score: "Score");
+
+ // Native test. Just check the pipeline runs.
+ Assert.True(metrics.L2 < 0.1);
+ }
}
}