Skip to content

Commit

Permalink
Remove some advanced parameters in simple API
Browse files Browse the repository at this point in the history
  • Loading branch information
wschin committed Mar 9, 2019
1 parent 5839e66 commit eac9227
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 21 deletions.
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public static void Example()

// A pipeline for featurizing the "Review" column
var pipeline = ml.Transforms.Text.ProduceWordBags(review).
Append(ml.Transforms.Text.LatentDirichletAllocation(review, ldaFeatures, numberOfTopics:3));
Append(ml.Transforms.Text.LatentDirichletAllocation(review, ldaFeatures, numberOfTopics: 3));

// The transformed data
var transformer = pipeline.Fit(trainData);
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Transforms/Text/LdaTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -945,10 +945,10 @@ internal static class Defaults
/// <param name="beta">Dirichlet prior on vocab-topic vectors.</param>
/// <param name="samplingStepCount">Number of Metropolis Hasting step.</param>
/// <param name="maximumNumberOfIterations">Number of iterations.</param>
/// <param name="likelihoodInterval">Compute log likelihood over local dataset on this iteration interval.</param>
/// <param name="numberOfThreads">The number of training threads. Default value depends on number of logical processors.</param>
/// <param name="maximumTokenCountPerDocument">The threshold of maximum count of tokens per doc.</param>
/// <param name="numberOfSummaryTermsPerTopic">The number of words to summarize the topic.</param>
/// <param name="likelihoodInterval">Compute log likelihood over local dataset on this iteration interval.</param>
/// <param name="numberOfBurninIterations">The number of burn-in iterations.</param>
/// <param name="resetRandomGenerator">Reset the random number generator for each document.</param>
internal LatentDirichletAllocationEstimator(IHostEnvironment env,
Expand All @@ -958,10 +958,10 @@ internal static class Defaults
float beta = Defaults.Beta,
int samplingStepCount = Defaults.SamplingStepCount,
int maximumNumberOfIterations = Defaults.MaximumNumberOfIterations,
int likelihoodInterval = Defaults.LikelihoodInterval,
int numberOfThreads = Defaults.NumberOfThreads,
int maximumTokenCountPerDocument = Defaults.MaximumTokenCountPerDocument,
int numberOfSummaryTermsPerTopic = Defaults.NumberOfSummaryTermsPerTopic,
int likelihoodInterval = Defaults.LikelihoodInterval,
int numberOfBurninIterations = Defaults.NumberOfBurninIterations,
bool resetRandomGenerator = Defaults.ResetRandomGenerator)
: this(env, new[] { new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName,
Expand Down
29 changes: 12 additions & 17 deletions src/Microsoft.ML.Transforms/Text/TextCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -580,16 +580,9 @@ public static class TextCatalog
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="numberOfTopics">The number of topics.</param>
/// <param name="alphaSum">Dirichlet prior on document-topic vectors.</param>
/// <param name="beta">Dirichlet prior on vocab-topic vectors.</param>
/// <param name="samplingStepCount">Number of Metropolis Hasting step.</param>
/// <param name="maximumNumberOfIterations">Number of iterations.</param>
/// <param name="likelihoodInterval">Compute log likelihood over local dataset on this iteration interval.</param>
/// <param name="numberOfThreads">The number of training threads. Default value depends on number of logical processors.</param>
/// <param name="maximumTokenCountPerDocument">The threshold of maximum count of tokens per doc.</param>
/// <param name="numberOfSummaryTermsPerTopic">The number of words to summarize the topic.</param>
/// <param name="numberOfBurninIterations">The number of burn-in iterations.</param>
/// <param name="resetRandomGenerator">Reset the random number generator for each document.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
Expand All @@ -601,19 +594,21 @@ public static class TextCatalog
string outputColumnName,
string inputColumnName = null,
int numberOfTopics = LatentDirichletAllocationEstimator.Defaults.NumberOfTopics,
float alphaSum = LatentDirichletAllocationEstimator.Defaults.AlphaSum,
float beta = LatentDirichletAllocationEstimator.Defaults.Beta,
int samplingStepCount = LatentDirichletAllocationEstimator.Defaults.SamplingStepCount,
int maximumNumberOfIterations = LatentDirichletAllocationEstimator.Defaults.MaximumNumberOfIterations,
int likelihoodInterval = LatentDirichletAllocationEstimator.Defaults.LikelihoodInterval,
int numberOfThreads = LatentDirichletAllocationEstimator.Defaults.NumberOfThreads,
int maximumTokenCountPerDocument = LatentDirichletAllocationEstimator.Defaults.MaximumTokenCountPerDocument,
int numberOfSummaryTermsPerTopic = LatentDirichletAllocationEstimator.Defaults.NumberOfSummaryTermsPerTopic,
int numberOfBurninIterations = LatentDirichletAllocationEstimator.Defaults.NumberOfBurninIterations,
bool resetRandomGenerator = LatentDirichletAllocationEstimator.Defaults.ResetRandomGenerator)
int numberOfSummaryTermsPerTopic = LatentDirichletAllocationEstimator.Defaults.NumberOfSummaryTermsPerTopic)
=> new LatentDirichletAllocationEstimator(CatalogUtils.GetEnvironment(catalog),
outputColumnName, inputColumnName, numberOfTopics, alphaSum, beta, samplingStepCount, maximumNumberOfIterations, likelihoodInterval, numberOfThreads,
maximumTokenCountPerDocument, numberOfSummaryTermsPerTopic, numberOfBurninIterations, resetRandomGenerator);
outputColumnName, inputColumnName, numberOfTopics,
LatentDirichletAllocationEstimator.Defaults.AlphaSum,
LatentDirichletAllocationEstimator.Defaults.Beta,
LatentDirichletAllocationEstimator.Defaults.SamplingStepCount,
maximumNumberOfIterations,
LatentDirichletAllocationEstimator.Defaults.NumberOfThreads,
maximumTokenCountPerDocument,
numberOfSummaryTermsPerTopic,
LatentDirichletAllocationEstimator.Defaults.LikelihoodInterval,
LatentDirichletAllocationEstimator.Defaults.NumberOfBurninIterations,
LatentDirichletAllocationEstimator.Defaults.ResetRandomGenerator);

/// <summary>
/// Uses <a href="https://arxiv.org/abs/1412.1576">LightLDA</a> to transform a document (represented as a vector of floats)
Expand Down
5 changes: 4 additions & 1 deletion test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms;
using Microsoft.ML.Transforms.Text;
using Xunit;

namespace Microsoft.ML.RunTests
Expand Down Expand Up @@ -1318,7 +1319,9 @@ public void TestLDATransform()
builder.AddColumn("F1V", NumberDataViewType.Single, data);
var srcView = builder.GetDataView();

var est = ML.Transforms.Text.LatentDirichletAllocation("F1V", numberOfTopics: 3, numberOfSummaryTermsPerTopic: 3, alphaSum: 3, numberOfThreads: 1, resetRandomGenerator: true);
var opt = new LatentDirichletAllocationEstimator.ColumnOptions(name: "F1V", numberOfTopics: 3,
numberOfSummaryTermsPerTopic: 3, alphaSum: 3, numberOfThreads: 1, resetRandomGenerator: true);
var est = ML.Transforms.Text.LatentDirichletAllocation(opt);
var ldaTransformer = est.Fit(srcView);
var transformedData = ldaTransformer.Transform(srcView);

Expand Down

0 comments on commit eac9227

Please sign in to comment.