Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,15 @@ internal Context(Reconciler rec)
/// <returns>The column representation.</returns>
public Vector<bool> LoadBool(int minOrdinal, int? maxOrdinal) => Load<bool>(DataKind.BL, minOrdinal, maxOrdinal);

/// <summary>
/// Create a representation for a key loaded from TextLoader as an unsigned integer (32 bits).
/// </summary>
/// <param name="ordinal">The zero-based index of the field to read from.</param>
/// <param name="minKeyValue">smallest value of the loaded key values</param>
/// <param name="maxKeyValue">If specified, it's the largest allowed value of the loaded key values. Use null if key is unbounded.</param>
/// <returns>The column representation.</returns>
public Key<uint> LoadKey(int ordinal, ulong minKeyValue, ulong? maxKeyValue) => Load<uint>(DataKind.U4, ordinal, minKeyValue, maxKeyValue);

/// <summary>
/// Reads a scalar single-precision floating point column from a single field in the text file.
/// </summary>
Expand Down Expand Up @@ -209,6 +218,51 @@ private Vector<T> Load<T>(DataKind kind, int minOrdinal, int? maxOrdinal)
return new MyVector<T>(_rec, kind, minOrdinal, maxOrdinal);
}

private Key<T> Load<T>(DataKind kind, int ordinal, ulong minKeyValue, ulong? maxKeyValue)
{
Contracts.CheckParam(ordinal >= 0, nameof(ordinal), "Should be non-negative");
Contracts.CheckParam(minKeyValue >= 0, nameof(minKeyValue), "Should be non-negative");
Contracts.CheckParam(maxKeyValue == null || maxKeyValue >= minKeyValue, nameof(maxKeyValue), "Should be greater than or eqaul to minimum key value or null");
return new MyKey<T>(_rec, kind, ordinal, minKeyValue, maxKeyValue);
}

/// <summary>
/// A data type used to bridge <see cref="PipelineColumn"/> and <see cref="TextLoader.Column"/>. It can be used as <see cref="PipelineColumn"/>
/// in static-typed pipelines and provides <see cref="MyKey{T}.Create"/> for translating itself into <see cref="TextLoader.Column"/>.
/// </summary>
private class MyKey<T> : Key<T>, IPipelineArgColumn
{
// The storage type that the targeted content would be loaded as.
private readonly DataKind _kind;
// The position where the key value gets read from.
private readonly int _oridinal;
// The lower bound of the key value.
private readonly ulong _minKeyValue;
// The upper bound of the key value. Its value is null if unbounded.
private readonly ulong? _maxKeyValue;

// Contstuct a representation for a key-typed column loaded from a text file. Key values are assumed to be contiguous.
public MyKey(Reconciler rec, DataKind kind, int oridinal, ulong minKeyValue, ulong? maxKeyValue=null)
: base(rec, null)
{
_kind = kind;
_oridinal = oridinal;
_minKeyValue = minKeyValue;
_maxKeyValue = maxKeyValue;
}

// Translate the internal variable representation to columns of TextLoader.
public Column Create()
{
return new Column()
{
Type = _kind,
Source = new[] { new Range(_oridinal) },
KeyRange = new KeyRange(_minKeyValue, _maxKeyValue)
};
}
}

private class MyScalar<T> : Scalar<T>, IPipelineArgColumn
{
private readonly DataKind _kind;
Expand Down
3 changes: 2 additions & 1 deletion src/Microsoft.ML.Recommender/MatrixFactorizationPredictor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
using Microsoft.ML.Runtime.Recommender;
using Microsoft.ML.Runtime.Recommender.Internal;
using Microsoft.ML.Trainers;
using Microsoft.ML.Trainers.Recommender;

[assembly: LoadableClass(typeof(MatrixFactorizationPredictor), null, typeof(SignatureLoadModel), "Matrix Factorization Predictor Executor", MatrixFactorizationPredictor.LoaderSignature)]

[assembly: LoadableClass(typeof(MatrixFactorizationPredictionTransformer), typeof(MatrixFactorizationPredictionTransformer),
null, typeof(SignatureLoadModel), "", MatrixFactorizationPredictionTransformer.LoaderSignature)]

namespace Microsoft.ML.Runtime.Recommender
namespace Microsoft.ML.Trainers.Recommender
{
/// <summary>
/// <see cref="MatrixFactorizationPredictor"/> stores two factor matrices, P and Q, for approximating the training matrix, R, by P * Q,
Expand Down
126 changes: 126 additions & 0 deletions src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Core.Data;
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.StaticPipe.Runtime;
using Microsoft.ML.Trainers;
using Microsoft.ML.Trainers.Recommender;
using System;
using System.Collections.Generic;

namespace Microsoft.ML.StaticPipe
{
public static class MatrixFactorizationExtensions
{
/// <summary>
/// Predict matrix entry using matrix factorization
/// </summary>
/// <typeparam name="T">The type of physical value of matrix's row and column index. It must be an integer type such as uint.</typeparam>
/// <param name="ctx">The regression context trainer object.</param>
/// <param name="label">The label variable.</param>
/// <param name="matrixColumnIndex">The column index of the considered matrix.</param>
/// <param name="matrixRowIndex">The row index of the considered matrix.</param>
/// <param name="regularizationCoefficient">The frobenius norms of factor matrices.</param>
/// <param name="approximationRank">Rank of the two factor matrices whose product is used to approximate the consdered matrix</param>
/// <param name="learningRate">Initial learning rate.</param>
/// <param name="numIterations">Number of training iterations.</param>
/// <param name="advancedSettings">A delegate to set more settings.</param>
/// <param name="onFit">A delegate that is called every time the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
/// the model that was trained. Note that this action cannot change the result in any way; it is only a way for the caller to
/// be informed about what was learnt.</param>
/// <returns>The predicted output.</returns>
public static Scalar<float> MatrixFactorization<T>(this RegressionContext.RegressionTrainers ctx,
Scalar<float> label, Key<T> matrixColumnIndex, Key<T> matrixRowIndex,
float regularizationCoefficient = 0.1f,
int approximationRank = 8,
float learningRate = 0.1f,
int numIterations = 20,
Action<MatrixFactorizationTrainer.Arguments> advancedSettings = null,
Action<MatrixFactorizationPredictor> onFit = null)
{
Contracts.CheckValue(label, nameof(label));
Contracts.CheckValue(matrixColumnIndex, nameof(matrixColumnIndex));
Contracts.CheckValue(matrixRowIndex, nameof(matrixRowIndex));

Contracts.CheckParam(regularizationCoefficient >= 0, nameof(regularizationCoefficient), "Must be non-negative");
Contracts.CheckParam(approximationRank > 0, nameof(approximationRank), "Must be positive");
Contracts.CheckParam(learningRate > 0, nameof(learningRate), "Must be positive");
Contracts.CheckParam(numIterations > 0, nameof(numIterations), "Must be positive");
Contracts.CheckValueOrNull(advancedSettings);
Contracts.CheckValueOrNull(onFit);

var rec = new MatrixFactorizationReconciler<T>((env, labelColName, matrixColumnIndexColName, matrixRowIndexColName) =>
{
var trainer = new MatrixFactorizationTrainer(env, labelColName, matrixColumnIndexColName, matrixRowIndexColName, advancedSettings:
args =>
{
args.Lambda = regularizationCoefficient;
args.K = approximationRank;
args.Eta = learningRate;
args.NumIterations = numIterations;
// The previous settings may be overwritten by the line below.
advancedSettings?.Invoke(args);
});
if (onFit != null)
return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
else
return trainer;
}, label, matrixColumnIndex, matrixRowIndex);
return rec.Output;
}

private sealed class MatrixFactorizationReconciler<T> : TrainerEstimatorReconciler
{
// Output column name of the trained estimator.
private static string FixedOutputName => DefaultColumnNames.Score;

// A function used to create trainer of matrix factorization. It instantiates a trainer by indicating the
// expected inputs and output (IDataView's) column names. That trainer has a Fit(IDataView data) for learning
// a MatrixFactorizationPredictionTransformer from the data.
private readonly Func<IHostEnvironment, string, string, string, IEstimator<ITransformer>> _factory;

/// <summary>
/// The only output produced by matrix factorization predictor
/// </summary>
public Scalar<float> Output { get; }

/// <summary>
/// The output columns.
/// </summary>
protected override IEnumerable<PipelineColumn> Outputs { get; }

public MatrixFactorizationReconciler(Func<IHostEnvironment, string, string, string, IEstimator<ITransformer>> factory,
Scalar<float> label, Key<T> matColumnIndex, Key<T> matRowIndex)
: base(MakeInputs(Contracts.CheckRef(label, nameof(label)), Contracts.CheckRef(matColumnIndex, nameof(matColumnIndex)), Contracts.CheckRef(matRowIndex, nameof(matRowIndex))),
new string[] { FixedOutputName })
{
Contracts.AssertValue(factory);
_factory = factory;

Output = new Impl(this);
Outputs = new PipelineColumn[] { Output };
}

private static PipelineColumn[] MakeInputs(Scalar<float> label, PipelineColumn matrixRowIndex, PipelineColumn matrixColumnIndex)
=> new PipelineColumn[] { label, matrixRowIndex, matrixColumnIndex };

protected override IEstimator<ITransformer> ReconcileCore(IHostEnvironment env, string[] inputNames)
{
Contracts.AssertValue(env);

// The first, second, third names are label, matrix's column index, and matrix's row index, respectively.
return _factory(env, inputNames[0], inputNames[1], inputNames[2]);
}

private sealed class Impl : Scalar<float>
{
public Impl(MatrixFactorizationReconciler<T> rec) : base(rec, rec.Inputs) { }
}
}
}
}
1 change: 1 addition & 0 deletions src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
using Microsoft.ML.Runtime.Recommender.Internal;
using Microsoft.ML.Runtime.Training;
using Microsoft.ML.Trainers;
using Microsoft.ML.Trainers.Recommender;

[assembly: LoadableClass(MatrixFactorizationTrainer.Summary, typeof(MatrixFactorizationTrainer), typeof(MatrixFactorizationTrainer.Arguments),
new Type[] { typeof(SignatureTrainer), typeof(SignatureMatrixRecommendingTrainer) },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@

<NativeAssemblyReference Include="CpuMathNative" />
<NativeAssemblyReference Include="FactorizationMachineNative" />
<NativeAssemblyReference Include="MatrixFactorizationNative" />
<NativeAssemblyReference Include="FastTreeNative" />
<NativeAssemblyReference Include="LdaNative" />
</ItemGroup>
</Project>
</Project>
2 changes: 2 additions & 0 deletions test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Data.IO;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Runtime.Recommender;
using Microsoft.ML.Runtime.RunTests;
using Microsoft.ML.StaticPipe;
using Microsoft.ML.TestFramework;
Expand Down Expand Up @@ -877,5 +878,6 @@ public void TestPcaStatic()
Assert.True(type.IsVector && type.ItemType.RawKind == DataKind.R4);
Assert.True(type.VectorSize == 5);
}

}
}
50 changes: 50 additions & 0 deletions test/Microsoft.ML.StaticPipelineTesting/Training.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
using System.Linq;
using Xunit;
using Xunit.Abstractions;
using Microsoft.ML.Trainers.Recommender;

namespace Microsoft.ML.StaticPipelineTesting
{
Expand Down Expand Up @@ -836,5 +837,54 @@ public void HogwildSGDBinaryClassification()
Assert.InRange(metrics.Auc, 0, 1);
Assert.InRange(metrics.Auprc, 0, 1);
}

[Fact]
public void MatrixFactorization()
{
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext(seed: 1, conc: 1);

// Specify where to find data file
var dataPath = GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename);
var dataSource = new MultiFileSource(dataPath);

// Read data file. The file contains 3 columns, label (float value), matrixColumnIndex (unsigned integer key), and matrixRowIndex (unsigned integer key).
// More specifically, LoadKey(1, 0, 19) means that the matrixColumnIndex column is read from the 2nd (indexed by 1) column in the data file and as
// a key type (stored as 32-bit unsigned integer) ranged from 0 to 19 (aka the training matrix has 20 columns).
var reader = mlContext.Data.TextReader(ctx => (label: ctx.LoadFloat(0), matrixColumnIndex: ctx.LoadKey(1, 0, 19), matrixRowIndex: ctx.LoadKey(2, 0, 39)));

// The parameter that will be into the onFit method below. The obtained predictor will be assigned to this variable
// so that we will be able to touch it.
MatrixFactorizationPredictor pred = null;

// Create a statically-typed matrix factorization estimator. The MatrixFactorization's input and output defined in MatrixFactorizationStatic
// tell what (aks a Scalar<float>) is expected. Notice that only one thread is used for deterministic outcome.
var matrixFactorizationEstimator = reader.MakeNewEstimator()
.Append(r => (r.label, score: mlContext.Regression.Trainers.MatrixFactorization(r.label, r.matrixRowIndex, r.matrixColumnIndex, onFit: p => pred = p,
advancedSettings: args => { args.NumThreads = 1; })));

// Create a pipeline from the reader (the 1st step) and the matrix factorization estimator (the 2nd step).
var pipe = reader.Append(matrixFactorizationEstimator);

// pred will be assigned by the onFit method once the training process is finished, so pred must be null before training.
Assert.Null(pred);

// Train the pipeline on the given data file. Steps in the pipeline are sequentially fitted (by calling their Fit function).
var model = pipe.Fit(dataSource);

// pred got assigned so that one can inspect the predictor trained in pipeline.
Assert.NotNull(pred);

// Feed the data file into the trained pipeline. The data would be loaded by TextLoader (the 1st step) and then the output of the
// TextLoader would be fed into MatrixFactorizationEstimator.
var estimatedData = model.Read(dataSource);

// After the training process, the metrics for regression problems can be computed.
var metrics = mlContext.Regression.Evaluate(estimatedData, r => r.label, r => r.score);

// Naive test. Just make sure the pipeline runs.
Assert.InRange(metrics.L2, 0, 0.5);
}
}
}
Loading