Skip to content

Commit

Permalink
Rename ProduceCharacterTokens to ProduceCharactersAsKeys
Browse files Browse the repository at this point in the history
  • Loading branch information
wschin committed Mar 13, 2019
1 parent 8e5c515 commit 883784a
Show file tree
Hide file tree
Showing 7 changed files with 8 additions and 8 deletions.
2 changes: 1 addition & 1 deletion docs/code/MlNetCookBook.md
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,7 @@ var pipeline =
ngramLength: 2, useAllLengths: false))

// NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
.Append(mlContext.Transforms.Text.ProduceCharacterTokens("MessageChars", "Message"))
.Append(mlContext.Transforms.Text.ProduceCharactersAsKeys("MessageChars", "Message"))
.Append(new NgramExtractingEstimator(mlContext, "BagOfTrichar", "MessageChars",
ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public static void NgramTransform()
// A pipeline to tokenize text as characters and then combine them together into ngrams
// The pipeline uses the default settings to featurize.

var charsPipeline = ml.Transforms.Text.ProduceCharacterTokens("Chars", "SentimentText", useMarkerCharacters: false);
var charsPipeline = ml.Transforms.Text.ProduceCharactersAsKeys("Chars", "SentimentText", useMarkerCharacters: false);
var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength: 1);
var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("CharsTwograms", "Chars");
var oneCharsPipeline = charsPipeline.Append(ngramOnePipeline);
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ public bool Equals(Reconciler other)
/// </summary>
/// <param name="input">The column to apply to.</param>
/// <param name="useMarkerCharacters">Whether to use marker characters to separate words.</param>
public static VarVector<Key<ushort, string>> ProduceCharacterTokens(this Scalar<string> input, bool useMarkerCharacters = true) => new OutPipelineColumn(input, useMarkerCharacters);
public static VarVector<Key<ushort, string>> ProduceCharactersAsKeys(this Scalar<string> input, bool useMarkerCharacters = true) => new OutPipelineColumn(input, useMarkerCharacters);
}

/// <summary>
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Transforms/Text/TextCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public static class TextCatalog
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning,
/// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
public static TokenizingByCharactersEstimator ProduceCharacterTokens(this TransformsCatalog.TextTransforms catalog,
public static TokenizingByCharactersEstimator ProduceCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
string outputColumnName,
string inputColumnName = null,
bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters)
Expand All @@ -72,7 +72,7 @@ public static class TextCatalog
/// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
/// <param name="columns">Pairs of columns to run the tokenization on.</param>

public static TokenizingByCharactersEstimator ProduceCharacterTokens(this TransformsCatalog.TextTransforms catalog,
public static TokenizingByCharactersEstimator ProduceCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters,
params ColumnOptions[] columns)
=> new TokenizingByCharactersEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), useMarkerCharacters, ColumnOptions.ConvertToValueTuples(columns));
Expand Down
2 changes: 1 addition & 1 deletion test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ public void Tokenize()
.Append(r => (
r.label,
tokens: r.text.ProduceWordTokens(),
chars: r.text.ProduceCharacterTokens()));
chars: r.text.ProduceCharactersAsKeys()));

var tdata = est.Fit(data).Transform(data);
var schema = tdata.AsDynamic.Schema;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,7 @@ private void TextFeaturizationOn(string dataPath)
BagOfBigrams: r.Message.NormalizeText().ProduceHashedWordBags(ngramLength: 2, useAllLengths: false),
// NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
BagOfTrichar: r.Message.ProduceCharacterTokens().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
BagOfTrichar: r.Message.ProduceCharactersAsKeys().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
// NLP pipeline 4: word embeddings.
// PretrainedModelKind.Sswe is used here for performance of the test. In a real
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ private void TextFeaturizationOn(string dataPath)
ngramLength: 2, useAllLengths: false))

// NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
.Append(mlContext.Transforms.Text.ProduceCharacterTokens("MessageChars", "Message"))
.Append(mlContext.Transforms.Text.ProduceCharactersAsKeys("MessageChars", "Message"))
.Append(new NgramExtractingEstimator(mlContext, "BagOfTrichar", "MessageChars",
ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf))

Expand Down

0 comments on commit 883784a

Please sign in to comment.