Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
using Microsoft.ML.Data;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.Data;
using System;
using System.Collections.Generic;

namespace Microsoft.ML.Samples.Dynamic
{
public class LdaTransformExample
{
public static void LdaTransform()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var ml = new MLContext();

// Get a small dataset as an IEnumerable.
IEnumerable<SamplesUtils.DatasetUtils.SampleTopicsData> data = SamplesUtils.DatasetUtils.GetTopicsData();
var trainData = ml.CreateStreamingDataView(data);

// Preview of one of the columns of the the topics data.
// The Review column contains the keys associated with a particular body of text.
//
// Review
// "animals birds cats dogs fish horse"
// "horse birds house fish duck cats"
// "car truck driver bus pickup"
// "car truck driver bus pickup horse"

string review = nameof(SamplesUtils.DatasetUtils.SampleTopicsData.Review);
string ldaFeatures = "LdaFeatures";

// A pipeline for featurizing the "Review" column
var pipeline = ml.Transforms.Text.ProduceWordBags(review).
Append(ml.Transforms.Text.LatentDirichletAllocation(review, ldaFeatures, numTopic:3));

// The transformed data
var transformer = pipeline.Fit(trainData);
var transformed_data = transformer.Transform(trainData);

// Column obtained after processing the input.
var ldaFeaturesColumn = transformed_data.GetColumn<VBuffer<float>>(ml, ldaFeatures);

Console.WriteLine($"{ldaFeatures} column obtained post-transformation.");
foreach (var featureRow in ldaFeaturesColumn)
{
foreach (var value in featureRow.GetValues())
Console.Write($"{value} ");
Console.WriteLine("");
}

Console.WriteLine("===================================================");

// LdaFeatures column obtained post-transformation.
// For LDA, we had specified numTopic:3. Hence each row of text has been featurized as a vector of floats with length 3.

//0.1818182 0.4545455 0.3636364
//0.3636364 0.1818182 0.4545455
//0.2222222 0.2222222 0.5555556
//0.2727273 0.09090909 0.6363636
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
<NativeAssemblyReference Include="CpuMathNative" />
<NativeAssemblyReference Include="FastTreeNative" />
<NativeAssemblyReference Include="MatrixFactorizationNative" />
<NativeAssemblyReference Include="LdaNative" />

<ProjectReference Include="..\..\..\src\Microsoft.ML.Analyzer\Microsoft.ML.Analyzer.csproj">
<ReferenceOutputAssembly>false</ReferenceOutputAssembly>
Expand Down
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ internal static class Program
{
static void Main(string[] args)
{
MatrixFactorizationExample.MatrixFactorizationInMemoryData();
LdaTransformExample.LdaTransform();
}
}
}
7 changes: 7 additions & 0 deletions src/Microsoft.ML.Transforms/Text/TextCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,13 @@ public static NgramHashEstimator ProduceHashedNgrams(this TransformsCatalog.Text
/// <param name="numSummaryTermPerTopic">The number of words to summarize the topic.</param>
/// <param name="numBurninIterations">The number of burn-in iterations.</param>
/// <param name="resetRandomGenerator">Reset the random number generator for each document.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[LatentDirichletAllocation](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/LdaTransform.cs)]
/// ]]>
/// </format>
/// </example>
public static LatentDirichletAllocationEstimator LatentDirichletAllocation(this TransformsCatalog.TextTransforms catalog,
string inputColumn,
string outputColumn = null,
Expand Down