Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
36ffe1c
Addition of the ValueMappingEstimator and ValueMappingTransform.
singlis Nov 21, 2018
f31c7e6
Merge remote-tracking branch 'origin/master' into singlis/term
singlis Nov 21, 2018
7335dd4
Re-enabled workout test, fixed build errors
singlis Nov 21, 2018
fc39679
Merge remote-tracking branch 'origin/master' into singlis/term
singlis Nov 26, 2018
03950c8
- Added command line bindings
singlis Nov 27, 2018
b6d17e7
Merge remote-tracking branch 'origin/master' into singlis/term
singlis Nov 27, 2018
ec593b6
- Fix for ColumnCopyingTransformer
singlis Nov 28, 2018
74c700c
- Adding support for missing value.
singlis Nov 28, 2018
430e5ac
- Removed array types for keys
singlis Nov 29, 2018
03c0143
- Support for treat values as key types, and support for back-compat…
singlis Dec 3, 2018
68ec5bf
- TermLookup compatibility with tests
singlis Dec 4, 2018
1e6b09d
Merge remote-tracking branch 'origin/master' into singlis/term
singlis Dec 4, 2018
c210916
...updating...
singlis Dec 14, 2018
cf00b81
Merge remote-tracking branch 'origin/master' into singlis/term
singlis Dec 14, 2018
5445667
...updating...
singlis Dec 14, 2018
cefe1a8
- Added support for the values to be represented as key types
singlis Dec 19, 2018
6718322
- Renaming ValueMappingTransform to ValueMappingTransformer
singlis Dec 19, 2018
3711406
Merge remote-tracking branch 'origin/master' into singlis/term
singlis Dec 19, 2018
47b605d
- Updates after merging to master
singlis Dec 20, 2018
6588644
- Fixing release build.
singlis Dec 20, 2018
ce8c95b
- Updating based upon feedback."
singlis Dec 21, 2018
06d4dd2
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
singlis Dec 21, 2018
88f759e
- Updating from further feedback
singlis Dec 21, 2018
3624f5d
Merge remote-tracking branch 'origin/master' into singlis/term
singlis Dec 21, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions src/Microsoft.ML.Data/DataView/ArrayDataViewBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,18 @@ public void AddColumn<T>(string name, PrimitiveType type, params T[] values)
/// Constructs a new key column from an array where values are copied to output simply
/// by being assigned.
/// </summary>
public void AddColumn(string name, ValueGetter<VBuffer<ReadOnlyMemory<char>>> getKeyValues, ulong keyMin, int keyCount, params uint[] values)
/// <param name="name">The name of the column.</param>
/// <param name="getKeyValues">The delegate that does a reverse lookup based upon the given key. This is for metadata creation</param>
/// <param name="keyMin">The <see cref="KeyType"/> minimum to use.</param>
/// <param name="keyCount">The count of unique keys specified in values</param>
/// <param name="values">The values to add to the column. Note that since this is creating a <see cref="KeyType"/> column, the values will be offset by 1.</param>
public void AddColumn<T1>(string name, ValueGetter<VBuffer<ReadOnlyMemory<char>>> getKeyValues, ulong keyMin, int keyCount, params T1[] values)
{
_host.CheckValue(getKeyValues, nameof(getKeyValues));
_host.CheckParam(keyCount > 0, nameof(keyCount));
CheckLength(name, values);
_columns.Add(new AssignmentColumn<uint>(new KeyType(DataKind.U4, keyMin, keyCount), values));
values.GetType().GetElementType().TryGetDataKind(out DataKind kind);
_columns.Add(new AssignmentColumn<T1>(new KeyType(kind, keyMin, keyCount), values));
_getKeyValues.Add(name, getKeyValues);
_names.Add(name);
}
Expand Down
20 changes: 20 additions & 0 deletions src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Transforms;
using Microsoft.ML.Transforms.Conversions;
using System.Collections.Generic;

namespace Microsoft.ML
{
Expand Down Expand Up @@ -125,5 +127,23 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co
string termsColumn = null,
IComponentFactory<IMultiStreamSource, IDataLoader> loaderFactory = null)
=> new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, file, termsColumn, loaderFactory);

/// <summary>
/// Maps specified keys to specified values
/// </summary>
/// <typeparam name="TInputType">The key type.</typeparam>
/// <typeparam name="TOutputType">The value type.</typeparam>
/// <param name="catalog">The categorical transform's catalog</param>
/// <param name="keys">The list of keys to use for the mapping. The mapping is 1-1 with values. This list must be the same length as values and
/// cannot contain duplicate keys.</param>
/// <param name="values">The list of values to pair with the keys for the mapping. This list must be equal to the same length as keys.</param>
/// <param name="columns">The columns to apply this transform on.</param>
/// <returns></returns>
public static ValueMappingEstimator<TInputType, TOutputType> ValueMap<TInputType, TOutputType>(
Copy link
Contributor

@Ivanidzo4ka Ivanidzo4ka Dec 20, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ValueMappingEstimator [](start = 22, length = 21)

we like to have summary comments for our public code. #Resolved

this TransformsCatalog.ConversionTransforms catalog,
IEnumerable<TInputType> keys,
IEnumerable<TOutputType> values,
params (string source, string name)[] columns)
=> new ValueMappingEstimator<TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values, columns);
}
}
975 changes: 975 additions & 0 deletions src/Microsoft.ML.Data/Transforms/ValueMappingTransformer.cs

Large diffs are not rendered by default.

Loading