Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 1 addition & 13 deletions test/Microsoft.ML.FSharp.Tests/Microsoft.ML.FSharp.Tests.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -22,25 +22,13 @@
<ProjectReference Include="..\..\src\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Ensemble\Microsoft.ML.Ensemble.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.EntryPoints\Microsoft.ML.EntryPoints.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.FastTree\Microsoft.ML.FastTree.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.LightGBM\Microsoft.ML.LightGBM.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Maml\Microsoft.ML.Maml.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Onnx\Microsoft.ML.Onnx.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Parquet\Microsoft.ML.Parquet.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.PCA\Microsoft.ML.PCA.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.ResultProcessor\Microsoft.ML.ResultProcessor.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.StandardLearners\Microsoft.ML.StandardLearners.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Sweeper\Microsoft.ML.Sweeper.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Legacy\Microsoft.ML.Legacy.csproj" />
</ItemGroup>
<ItemGroup>
<NativeAssemblyReference Include="FastTreeNative" />
<NativeAssemblyReference Include="CpuMathNative" />
<NativeAssemblyReference Include="FactorizationMachineNative" />
</ItemGroup>

</Project>
</Project>
177 changes: 45 additions & 132 deletions test/Microsoft.ML.FSharp.Tests/SmokeTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,16 @@ namespace Microsoft.ML.FSharp.Tests
#nowarn "44"
open System
open Microsoft.ML
open Microsoft.ML.Legacy.Data
open Microsoft.ML.Legacy.Trainers
open Microsoft.ML.Legacy.Transforms
open Microsoft.ML.Data
open Xunit

module SmokeTest1 =

type SentimentData() =
[<LoadColumn(columnIndex = 0); DefaultValue>]
[<LoadColumn(columnIndex = 0); ColumnName("Label"); DefaultValue>]
val mutable Sentiment : bool
[<LoadColumn(columnIndex = 1); DefaultValue>]
val mutable SentimentText : string
[<LoadColumn(columnIndex = 1); ColumnName("Label"); DefaultValue>]
val mutable Sentiment : float32

type SentimentPrediction() =
[<ColumnName("PredictedLabel"); DefaultValue>]
Expand All @@ -77,51 +74,23 @@ module SmokeTest1 =
[<Fact>]
let ``FSharp-Sentiment-Smoke-Test`` () =

// See https://github.com/dotnet/machinelearning/issues/401: forces the loading of ML.NET component assemblies
let _load =
[ typeof<Microsoft.ML.Transforms.Text.TextNormalizingEstimator>;
typeof<Microsoft.ML.Trainers.FastTree.FastTree>;
typeof<Microsoft.ML.EntryPoints.CVSplit>] // ML.EntryPoints

let testDataPath = __SOURCE_DIRECTORY__ + @"/../data/wikipedia-detox-250-line-data.tsv"

let pipeline = Legacy.LearningPipeline()

pipeline.Add(
Microsoft.ML.Legacy.Data.TextLoader(testDataPath).CreateFrom<SentimentData>(
Arguments =
TextLoaderArguments(
HasHeader = true,
Column = [| TextLoaderColumn(Name = "Label",
Source = [| TextLoaderRange(0) |],
Type = Nullable (Legacy.Data.DataKind.Num))
TextLoaderColumn(Name = "SentimentText",
Source = [| TextLoaderRange(1) |],
Type = Nullable (Legacy.Data.DataKind.Text)) |]
)))

pipeline.Add(
TextFeaturizer(
"Features", [| "SentimentText" |],
KeepPunctuations = false,
OutputTokens = true,
VectorNormalizer = TextFeaturizingEstimatorTextNormKind.L2
))

pipeline.Add(
FastTreeBinaryClassifier(
NumLeaves = 5,
NumTrees = 5,
MinDocumentsInLeafs = 2
))

let model = pipeline.Train<SentimentData, SentimentPrediction>()
let ml = MLContext(seed = new System.Nullable<int>(1), conc = 1)
let data = ml.Data.ReadFromTextFile<SentimentData>(testDataPath, hasHeader = true)

let pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MinDocumentsInLeafts is no longer available in the new APIs?

Copy link
Contributor Author

@artidoro artidoro Jan 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By default it is set to 10 (instead of 2 in the old test). I am not sure how to create an Action object in F#, and I believe this will not change the code coverage. We have tests that are specific to FastTree for binary classification that set the MinDocumentsInLeafs to something different.

.Append(ml.BinaryClassification.Trainers.FastTree(numLeaves = 5, numTrees = 5))

let model = pipeline.Fit(data)

let engine = model.CreatePredictionEngine<SentimentData, SentimentPrediction>(ml)

let predictions =
[ SentimentData(SentimentText = "This is a gross exaggeration. Nobody is setting a kangaroo court. There was a simple addition.")
SentimentData(SentimentText = "Sort of ok")
SentimentData(SentimentText = "Joe versus the Volcano Coffee Company is a great film.") ]
|> model.Predict
|> List.map engine.Predict

let predictionResults = [ for p in predictions -> p.Sentiment ]
Assert.Equal<bool list>(predictionResults, [ false; true; true ])
Expand All @@ -131,11 +100,11 @@ module SmokeTest2 =

[<CLIMutable>]
type SentimentData =
{ [<LoadColumn(columnIndex = 0)>]
SentimentText : string

[<LoadColumn(columnIndex = 1); ColumnName("Label")>]
Sentiment : float32 }
{ [<LoadColumn(columnIndex = 0); ColumnName("Label")>]
Sentiment : bool
[<LoadColumn(columnIndex = 1)>]
SentimentText : string }

[<CLIMutable>]
type SentimentPrediction =
Expand All @@ -145,63 +114,35 @@ module SmokeTest2 =
[<Fact>]
let ``FSharp-Sentiment-Smoke-Test`` () =

// See https://github.com/dotnet/machinelearning/issues/401: forces the loading of ML.NET component assemblies
let _load =
[ typeof<Microsoft.ML.Transforms.Text.TextNormalizingEstimator>;
typeof<Microsoft.ML.Trainers.FastTree.FastTree>;
typeof<Microsoft.ML.EntryPoints.CVSplit>] // ML.EntryPoints

let testDataPath = __SOURCE_DIRECTORY__ + @"/../data/wikipedia-detox-250-line-data.tsv"

let ml = MLContext(seed = new System.Nullable<int>(1), conc = 1)
let data = ml.Data.ReadFromTextFile<SentimentData>(testDataPath, hasHeader = true)

let pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features")
.Append(ml.BinaryClassification.Trainers.FastTree(numLeaves = 5, numTrees = 5))

let model = pipeline.Fit(data)

let pipeline = Legacy.LearningPipeline()

pipeline.Add(
Microsoft.ML.Legacy.Data.TextLoader(testDataPath).CreateFrom<SentimentData>(
Arguments =
TextLoaderArguments(
HasHeader = true,
Column = [| TextLoaderColumn(Name = "Label",
Source = [| TextLoaderRange(0) |],
Type = Nullable (Legacy.Data.DataKind.Num))
TextLoaderColumn(Name = "SentimentText",
Source = [| TextLoaderRange(1) |],
Type = Nullable (Legacy.Data.DataKind.Text)) |]
)))

pipeline.Add(
TextFeaturizer(
"Features", [| "SentimentText" |],
KeepPunctuations = false,
OutputTokens = true,
VectorNormalizer = TextFeaturizingEstimatorTextNormKind.L2
))

pipeline.Add(
FastTreeBinaryClassifier(
NumLeaves = 5,
NumTrees = 5,
MinDocumentsInLeafs = 2
))

let model = pipeline.Train<SentimentData, SentimentPrediction>()
let engine = model.CreatePredictionEngine<SentimentData, SentimentPrediction>(ml)

let predictions =
[ { SentimentText = "This is a gross exaggeration. Nobody is setting a kangaroo court. There was a simple addition."; Sentiment = 0.0f }
{ SentimentText = "Sort of ok"; Sentiment = 0.0f }
{ SentimentText = "Joe versus the Volcano Coffee Company is a great film."; Sentiment = 0.0f } ]
|> model.Predict
[ { SentimentText = "This is a gross exaggeration. Nobody is setting a kangaroo court. There was a simple addition."; Sentiment = false }
{ SentimentText = "Sort of ok"; Sentiment = false }
{ SentimentText = "Joe versus the Volcano Coffee Company is a great film."; Sentiment = false } ]
|> List.map engine.Predict

let predictionResults = [ for p in predictions -> p.Sentiment ]
Assert.Equal<bool list>(predictionResults, [ false; true; true ])

module SmokeTest3 =

type SentimentData() =
[<LoadColumn(columnIndex = 0)>]
member val SentimentText = "".AsMemory() with get, set
[<LoadColumn(columnIndex = 0); ColumnName("Label")>]
member val Sentiment = false with get, set

[<LoadColumn(columnIndex = 1); ColumnName("Label")>]
member val Sentiment = 0.0 with get, set
[<LoadColumn(columnIndex = 1)>]
member val SentimentText = "".AsMemory() with get, set

type SentimentPrediction() =
[<ColumnName("PredictedLabel")>]
Expand All @@ -210,51 +151,23 @@ module SmokeTest3 =
[<Fact>]
let ``FSharp-Sentiment-Smoke-Test`` () =

// See https://github.com/dotnet/machinelearning/issues/401: forces the loading of ML.NET component assemblies
let _load =
[ typeof<Microsoft.ML.Transforms.Text.TextNormalizingEstimator>;
typeof<Microsoft.ML.Trainers.FastTree.FastTree>;
typeof<Microsoft.ML.EntryPoints.CVSplit>] // ML.EntryPoints

let testDataPath = __SOURCE_DIRECTORY__ + @"/../data/wikipedia-detox-250-line-data.tsv"

let pipeline = Legacy.LearningPipeline()

pipeline.Add(
Microsoft.ML.Legacy.Data.TextLoader(testDataPath).CreateFrom<SentimentData>(
Arguments =
TextLoaderArguments(
HasHeader = true,
Column = [| TextLoaderColumn(Name = "Label",
Source = [| TextLoaderRange(0) |],
Type = Nullable (Legacy.Data.DataKind.Num))
TextLoaderColumn(Name = "SentimentText",
Source = [| TextLoaderRange(1) |],
Type = Nullable (Legacy.Data.DataKind.Text)) |]
)))

pipeline.Add(
TextFeaturizer(
"Features", [| "SentimentText" |],
KeepPunctuations = false,
OutputTokens = true,
VectorNormalizer = TextFeaturizingEstimatorTextNormKind.L2
))

pipeline.Add(
FastTreeBinaryClassifier(
NumLeaves = 5,
NumTrees = 5,
MinDocumentsInLeafs = 2
))

let model = pipeline.Train<SentimentData, SentimentPrediction>()
let ml = MLContext(seed = new System.Nullable<int>(1), conc = 1)
let data = ml.Data.ReadFromTextFile<SentimentData>(testDataPath, hasHeader = true)

let pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features")
.Append(ml.BinaryClassification.Trainers.FastTree(numLeaves = 5, numTrees = 5))

let model = pipeline.Fit(data)

let engine = model.CreatePredictionEngine<SentimentData, SentimentPrediction>(ml)

let predictions =
[ SentimentData(SentimentText = "This is a gross exaggeration. Nobody is setting a kangaroo court. There was a simple addition.".AsMemory())
SentimentData(SentimentText = "Sort of ok".AsMemory())
SentimentData(SentimentText = "Joe versus the Volcano Coffee Company is a great film.".AsMemory()) ]
|> model.Predict
|> List.map engine.Predict

let predictionResults = [ for p in predictions -> p.Sentiment ]
Assert.Equal<bool list>(predictionResults, [ false; true; true ])
Expand Down