Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
4d7609e
Fixed problem when saving and loading normalizer transformers that wo…
antoniovs1029 Oct 8, 2019
89a54ef
Added test of normalizer loaded from disk for a multidimensional vector
antoniovs1029 Oct 9, 2019
7217cbf
Update test case
antoniovs1029 Oct 9, 2019
50baf55
Updated test
antoniovs1029 Oct 9, 2019
0406afd
Cleaning up
antoniovs1029 Oct 9, 2019
0699296
Added test for backward compatibility with normalizer transformer wit…
antoniovs1029 Oct 10, 2019
0982fb4
Added TODO comment to test
antoniovs1029 Oct 11, 2019
7f53c82
Changed the order of saving and loading ItemKind byte
antoniovs1029 Oct 11, 2019
d5639e7
Updating dependency on models repo to use one model in a backwardcomp…
antoniovs1029 Oct 11, 2019
da59714
Fixed test that checks backward compatibility of normalizer transformer
antoniovs1029 Oct 12, 2019
bc52016
Moved assert statement and removed redundant cast
antoniovs1029 Oct 12, 2019
5d61c22
Avoid loading column 'float0' which was actually not used by the tran…
antoniovs1029 Oct 14, 2019
662bb9d
Fixed indentation
antoniovs1029 Oct 14, 2019
0628770
Added test reproducing the original scenario
antoniovs1029 Oct 14, 2019
8975755
Changed visibility of classes made for test
antoniovs1029 Oct 14, 2019
58916b8
Minor fixes in the test case
antoniovs1029 Oct 15, 2019
87364c2
Changed "Fact" to "OnnxFact" on test from DnnImageFeaturizerTest.cs
antoniovs1029 Oct 17, 2019
2ac5b8e
Merge branch 'master' of https://github.com/dotnet/machinelearning in…
antoniovs1029 Oct 24, 2019
db96aaa
Merge remote-tracking branch 'upstream/master' into is04normalizer
antoniovs1029 Oct 25, 2019
740f267
Use WriteIntArray and ReadIntArray extension methods to make code mor…
antoniovs1029 Oct 28, 2019
499bf91
Merge remote-tracking branch 'upstream/master' into is04normalizer
antoniovs1029 Oct 28, 2019
6614c5f
Merge remote-tracking branch 'upstream/master' into is04normalizer
antoniovs1029 Oct 28, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build/Dependencies.props
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
<BenchmarkDotNetVersion>0.11.3</BenchmarkDotNetVersion>
<MicrosoftCodeAnalysisTestingVersion>1.0.0-beta1-63812-02</MicrosoftCodeAnalysisTestingVersion>
<MicrosoftMLTestDatabasesPackageVersion>0.0.5-test</MicrosoftMLTestDatabasesPackageVersion>
<MicrosoftMLTestModelsPackageVersion>0.0.5-test</MicrosoftMLTestModelsPackageVersion>
<MicrosoftMLTestModelsPackageVersion>0.0.6-test</MicrosoftMLTestModelsPackageVersion>
<MicrosoftMLTensorFlowTestModelsVersion>0.0.11-test</MicrosoftMLTensorFlowTestModelsVersion>
<MicrosoftMLOnnxTestModelsVersion>0.0.5-test</MicrosoftMLOnnxTestModelsVersion>
<SystemDataSqlClientVersion>4.6.1</SystemDataSqlClientVersion>
Expand Down
48 changes: 36 additions & 12 deletions src/Microsoft.ML.Data/Transforms/Normalizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Model.OnnxConverter;
using Microsoft.ML.Model.Pfa;
using Microsoft.ML.Runtime;
Expand Down Expand Up @@ -357,7 +358,8 @@ private static VersionInfo GetVersionInfo()
{
return new VersionInfo(
modelSignature: "NORMALZR",
verWrittenCur: 0x00010001, // Initial
// verWrittenCur: 0x00010001 // Initial
verWrittenCur: 0x00010002, // Support for multidimensional vectors
verReadableCur: 0x00010001,
verWeCanReadBack: 0x00010001,
loaderSignature: LoaderSignature,
Expand Down Expand Up @@ -385,39 +387,61 @@ internal ColumnOptions(string name, string inputColumnName, DataViewType inputTy
internal static DataViewType LoadType(ModelLoadContext ctx)
{
Contracts.AssertValue(ctx);

if (ctx.Header.ModelVerWritten < 0x00010002)
{
// *** Previous Binary format ***
// - bool: is vector
// - int: vector size
// - byte: ItemKind of input column (only R4 and R8 are valid)
bool isVectorOld = ctx.Reader.ReadBoolean();
int vectorSize = ctx.Reader.ReadInt32();
Contracts.CheckDecode(vectorSize >= 0);
Contracts.CheckDecode(vectorSize > 0 || !isVectorOld);
InternalDataKind itemKindOld = (InternalDataKind)ctx.Reader.ReadByte();
Contracts.CheckDecode(itemKindOld == InternalDataKind.R4 || itemKindOld == InternalDataKind.R8);
var itemTypeOld = ColumnTypeExtensions.PrimitiveTypeFromKind(itemKindOld);
return isVectorOld ? (DataViewType)(new VectorDataViewType(itemTypeOld, vectorSize)) : itemTypeOld;
}

// *** Binary format ***
// - bool: is vector
// - int: vector size
// - byte: ItemKind of input column (only R4 and R8 are valid)
bool isVector = ctx.Reader.ReadBoolean();
int vectorSize = ctx.Reader.ReadInt32();
Contracts.CheckDecode(vectorSize >= 0);
Contracts.CheckDecode(vectorSize > 0 || !isVector);
// If it is a vector:
// - int: number of dimensions
// - ints: as many as dimensions, each one represent the size of each dimension

bool isVector = ctx.Reader.ReadBoolean();
InternalDataKind itemKind = (InternalDataKind)ctx.Reader.ReadByte();
Contracts.CheckDecode(itemKind == InternalDataKind.R4 || itemKind == InternalDataKind.R8);

var itemType = ColumnTypeExtensions.PrimitiveTypeFromKind(itemKind);
return isVector ? (DataViewType)(new VectorDataViewType(itemType, vectorSize)) : itemType;

if (!isVector)
return itemType;
return new VectorDataViewType(itemType, ctx.Reader.ReadIntArray());
}

internal static void SaveType(ModelSaveContext ctx, DataViewType type)
{
Contracts.AssertValue(ctx);
// *** Binary format ***
// - bool: is vector
// - int: vector size
// - byte: ItemKind of input column (only R4 and R8 are valid)
// If it is a vector:
// - int: number of dimensions of the vector
// - ints: as many as dimensions, each one represents the size of each dimension

VectorDataViewType vectorType = type as VectorDataViewType;
ctx.Writer.Write(vectorType != null);

Contracts.Assert(vectorType == null || vectorType.IsKnownSize);
ctx.Writer.Write(vectorType?.Size ?? 0);

DataViewType itemType = vectorType?.ItemType ?? type;
itemType.RawType.TryGetDataKind(out InternalDataKind itemKind);
Contracts.Assert(itemKind == InternalDataKind.R4 || itemKind == InternalDataKind.R8);
ctx.Writer.Write((byte)itemKind);

Contracts.Assert(vectorType == null || vectorType.IsKnownSize);
if (vectorType != null)
ctx.Writer.WriteIntArray(vectorType.Dimensions.ToArray());
}
}

Expand Down
57 changes: 57 additions & 0 deletions test/Microsoft.ML.OnnxTransformerTest/DnnImageFeaturizerTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Model;
using Microsoft.ML.RunTests;
Expand Down Expand Up @@ -173,5 +174,61 @@ public void TestOldSavingAndLoading()
}
}
}

internal sealed class ModelInput
{
[ColumnName("ImagePath"), LoadColumn(0)]
public string ImagePath { get; set; }

[ColumnName("Label"), LoadColumn(1)]
public string Label { get; set; }
}

internal sealed class ModelOutput
{
// ColumnName attribute is used to change the column name from
// its default value, which is the name of the field.
[ColumnName("PredictedLabel")]
public String Prediction { get; set; }
public float[] Score { get; set; }
}

[OnnxFact]
public void TestLoadFromDiskAndPredictionEngine()
{
var dataFile = GetDataPath("images/images.tsv");
var imageFolder = Path.GetDirectoryName(dataFile);

var data = ML.Data.LoadFromTextFile<ModelInput>(
path: dataFile,
hasHeader: false,
separatorChar: '\t',
allowQuoting: true,
allowSparse: false);

var dataProcessPipeline = ML.Transforms.Conversion.MapValueToKey("Label", "Label")
.Append(ML.Transforms.LoadImages("ImagePath_featurized", imageFolder, "ImagePath"))
.Append(ML.Transforms.ResizeImages("ImagePath_featurized", 224, 224, "ImagePath_featurized"))
.Append(ML.Transforms.ExtractPixels("ImagePath_featurized", "ImagePath_featurized"))
.Append(ML.Transforms.DnnFeaturizeImage("ImagePath_featurized", m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), "ImagePath_featurized"))
.Append(ML.Transforms.Concatenate("Features", new[] { "ImagePath_featurized" }))
.Append(ML.Transforms.NormalizeMinMax("Features", "Features"))
.AppendCacheCheckpoint(ML);

var trainer = ML.MulticlassClassification.Trainers.OneVersusAll(ML.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "Label", numberOfIterations: 10, featureColumnName: "Features"), labelColumnName: "Label")
.Append(ML.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

var trainingPipeline = dataProcessPipeline.Append(trainer);
var model = trainingPipeline.Fit(data);

string modelPath = GetOutputPath("TestSaveToDiskAndPredictionEngine-model.zip");
ML.Model.Save(model, data.Schema, modelPath);
var loadedModel = ML.Model.Load(modelPath, out var inputSchema);

var predEngine = ML.Model.CreatePredictionEngine<ModelInput, ModelOutput>(loadedModel);
ModelInput sample = ML.Data.CreateEnumerable<ModelInput>(data, false).First();
ModelOutput result = predEngine.Predict(sample);
Assert.Equal("tomato", result.Prediction);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
</ItemGroup>

<ItemGroup>
<NativeAssemblyReference Include="CpuMathNative" />
<NativeAssemblyReference Include="MklImports" />
<NativeAssemblyReference Condition="'$(OS)' == 'Windows_NT'" Include="libiomp5md" />
</ItemGroup>
Expand Down
87 changes: 87 additions & 0 deletions test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
using Microsoft.ML.TestFramework.Attributes;
using Microsoft.ML.Tools;
using Microsoft.ML.Transforms;
using System.Linq;
using Xunit;
using Xunit.Abstractions;
using static Microsoft.ML.Transforms.NormalizingTransformer;
Expand Down Expand Up @@ -886,5 +887,91 @@ void TestNormalizeLogMeanVarianceFixZeroVec()
Assert.Equal(0f, transformedDataArray[2].Features[1]);
Assert.Equal(0f, transformedDataArray[2].Features[4]);
}

[Fact]
public void TestNormalizeBackCompatibility2()
{
// Tests backward compatibility with a normalizing transformer
// whose version is "verWrittenCur: 0x00010001"

string dataPath = GetDataPath(TestDatasets.iris.trainFilename);

var loader = new TextLoader(Env, new TextLoader.Options
{
Columns = new[] {
new TextLoader.Column("float1", DataKind.Single, 1),
new TextLoader.Column("float4", DataKind.Single, new[]{new TextLoader.Range(1, 4) }),
new TextLoader.Column("double1", DataKind.Double, 1),
new TextLoader.Column("double4", DataKind.Double, new[]{new TextLoader.Range(1, 4) }),
new TextLoader.Column("int1", DataKind.Int32, 0),
},
HasHeader = true
}, new MultiFileSource(dataPath));

var data = loader.Load(dataPath);

var modelPath = Path.Combine("TestModels", "normalizer_verwrit-00010001.zip");
var normalizer = ML.Model.Load(modelPath, out var schema);

var outputPath = GetOutputPath("NormalizerEstimator", "normalized2.tsv");
using (var ch = Env.Start("save"))
{
var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true });
using (var fs = File.Create(outputPath))
{
var transformedData = normalizer.Transform(data);
DataSaverUtils.SaveDataView(ch, saver, transformedData, fs, keepHidden: true);
}
}

CheckEquality("NormalizerEstimator", "normalized2.tsv", "normalized.tsv");

Done();
}

public class TensorData
{
private const int dim1 = 2;
private const int dim2 = 3;
private const int dim3 = 4;
private const int size = dim1 * dim2 * dim3;

[VectorType(dim1, dim2, dim3)]
public float[] input { get; set; }

public static TensorData[] GetTensorData()
{
var tensor1 = Enumerable.Range(0, size).Select(
x => (float)x).ToArray();

var tensor2 = Enumerable.Range(0, size).Select(
x => (float)(x + 10000)).ToArray();

return new TensorData[]
{
new TensorData() { input = tensor1},
new TensorData() { input = tensor2}
};
}
}

[Fact]
void TestSavingNormalizerWithMultidimensionalVectorInput()
{
var samples = TensorData.GetTensorData();
var data = ML.Data.LoadFromEnumerable(samples);
var model = ML.Transforms.NormalizeMinMax("output", "input").Fit(data);
var transformedData = model.Transform(data);

var modelAndSchemaPath = GetOutputPath("TestSavingNormalizerWithMultidimensionalVectorInput.zip");
ML.Model.Save(model, data.Schema, modelAndSchemaPath);
var loadedModel = ML.Model.Load(modelAndSchemaPath, out var schema);
var transformedData2 = loadedModel.Transform(data);

var dimensions1 = (transformedData.Schema["output"].Type as VectorDataViewType).Dimensions;
var dimensions2 = (transformedData2.Schema["output"].Type as VectorDataViewType).Dimensions;

Assert.True(dimensions1.SequenceEqual(dimensions2));
}
}
}