Skip to content

Commit

Permalink
Added exception for NaN eigenvectors on PCA (#5349)
Browse files Browse the repository at this point in the history
Added exception for NaN eigenvectors on PCA
  • Loading branch information
antoniovs1029 committed Aug 17, 2020
1 parent 2933216 commit 1b1872f
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 4 deletions.
3 changes: 3 additions & 0 deletions src/Microsoft.ML.PCA/PcaTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,9 @@ internal PcaModelParameters(IHostEnvironment env, int rank, float[][] eigenVecto
{
_eigenVectors[i] = new VBuffer<float>(eigenVectors[i].Length, eigenVectors[i]);
_meanProjected[i] = VectorUtils.DotProduct(in _eigenVectors[i], in mean);
Host.CheckParam(_eigenVectors[i].GetValues().All(FloatUtils.IsFinite),
nameof(eigenVectors),
"The learnt eigenvectors contained NaN values, consider modifying the dataset or lower the rank or oversampling parameters");
}

_mean = mean;
Expand Down
47 changes: 43 additions & 4 deletions test/Microsoft.ML.Tests/AnomalyDetectionTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,10 @@ private static void ExecutePipelineWithGivenRandomizedPcaTrainer(MLContext mlCon
}


/// <summary>
/// Help function used to execute trainers defined in <see cref="RandomizedPcaInMemory"/>.
/// </summary>
private static void ExecuteRandomizedPcaTrainerChangeThreshold(MLContext mlContext, Trainers.RandomizedPcaTrainer trainer)
/// <summary>
/// Help function used to execute trainers defined in <see cref="RandomizedPcaInMemory"/>.
/// </summary>
private static void ExecuteRandomizedPcaTrainerChangeThreshold(MLContext mlContext, Trainers.RandomizedPcaTrainer trainer)
{
var samples = new List<DataPoint>()
{
Expand Down Expand Up @@ -251,5 +251,44 @@ private IDataView DetectAnomalyInMnistOneClass(string trainPath, string testPath
var model = trainer.Fit(trainData);
return model.Transform(testData);
}

/// <summary>
/// Check that when PCA created invalid eigenvectors with NaNs a readable exception message is thrown.
/// </summary>
[Fact]

public void PcaTrainerInvalidEigenvectorsException()
{
var mlContext = new MLContext(seed: 0);

var trainer = mlContext.AnomalyDetection.Trainers.RandomizedPca(
featureColumnName: nameof(DataPoint.Features), rank: 3);

var samples = new List<DataPoint>()
{
new DataPoint(){ Features = new float[3] {1, 0, 2} },
new DataPoint(){ Features = new float[3] {2, 0, 4} },
new DataPoint(){ Features = new float[3] {4, 0, 8} },
new DataPoint(){ Features = new float[3] {8, 0, 16} }
};

var data = mlContext.Data.LoadFromEnumerable(samples);

bool exceptionThrown = false;
try
{
// Since we provided a dataset where all rows are linearly dependent,
// the PCA algorithm will likely fail when extracting 3 eigenvectors
// and produce eigenvectors with NaN.
var model = trainer.Fit(data);
}
catch(ArgumentOutOfRangeException ex)
{
exceptionThrown = true;
Assert.Contains("The learnt eigenvectors contained NaN values", ex.Message);
}

Assert.True(exceptionThrown);
}
}
}

0 comments on commit 1b1872f

Please sign in to comment.