Skip to content

Commit

Permalink
Modify ImageClassification API to use a workspace for saving data (#4410
Browse files Browse the repository at this point in the history
)

Originally this API saved data to the same directory as the DLL, this
could cause issues if the DLL was in a read only path. Instead moving to
default to a temporary workspace path which can be defined in the
options by the user. This will allow all the data to be saved in one
path.
  • Loading branch information
bpstark authored and codemzs committed Oct 30, 2019
1 parent 9215ba9 commit a40df86
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 32 deletions.
6 changes: 6 additions & 0 deletions src/Microsoft.ML.TensorFlow/TensorflowUtils.cs
Expand Up @@ -561,5 +561,11 @@ public Tensor[] Run()
}

}
internal static string GetTemporaryDirectory()
{
string tempDirectory = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
Directory.CreateDirectory(tempDirectory);
return tempDirectory;
}
}
}
75 changes: 49 additions & 26 deletions src/Microsoft.ML.Vision/ImageClassificationTrainer.cs
Expand Up @@ -392,10 +392,10 @@ public sealed class Options : TrainerInputBaseWithLabel
public Action<ImageClassificationMetrics> MetricsCallback = null;

/// <summary>
/// Indicates the path where the newly retrained model should be saved.
/// Indicates the path where the models get downloaded to and cache files saved, default is a new temporary directory
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Indicates the path where the newly retrained model should be saved.", SortOrder = 15)]
public string ModelSavePath = null;
[Argument(ArgumentType.AtMostOnce, HelpText = "Indicates the path where the models get downloaded to and cache files saved, default is a new temporary directory.", SortOrder = 15)]
public string WorkspacePath = null;

/// <summary>
/// Indicates to evaluate the model on train set after every epoch.
Expand All @@ -422,16 +422,16 @@ public sealed class Options : TrainerInputBaseWithLabel
public IDataView ValidationSet;

/// <summary>
/// Indicates the file path to store trainset bottleneck values for caching.
/// Indicates the file name within the workspace to store trainset bottleneck values for caching.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Indicates the file path to store trainset bottleneck values for caching.", SortOrder = 15)]
public string TrainSetBottleneckCachedValuesFilePath = "trainSetBottleneckFile.csv";
[Argument(ArgumentType.AtMostOnce, HelpText = "Indicates the file name to store trainset bottleneck values for caching.", SortOrder = 15)]
public string TrainSetBottleneckCachedValuesFileName = "trainSetBottleneckFile.csv";

/// <summary>
/// Indicates the file path to store validationset bottleneck values for caching.
/// Indicates the file name within the workspace to store validationset bottleneck values for caching.
/// </summary>
[Argument(ArgumentType.AtMostOnce, HelpText = "Indicates the file path to store validationset bottleneck values for caching.", SortOrder = 15)]
public string ValidationSetBottleneckCachedValuesFilePath = "validationSetBottleneckFile.csv";
[Argument(ArgumentType.AtMostOnce, HelpText = "Indicates the file name to store validationset bottleneck values for caching.", SortOrder = 15)]
public string ValidationSetBottleneckCachedValuesFileName = "validationSetBottleneckFile.csv";

/// <summary>
/// A class that performs learning rate scheduling.
Expand Down Expand Up @@ -515,10 +515,26 @@ internal ImageClassificationTrainer(IHostEnvironment env, Options options)
Host.CheckNonEmpty(options.ScoreColumnName, nameof(options.ScoreColumnName));
Host.CheckNonEmpty(options.PredictedLabelColumnName, nameof(options.PredictedLabelColumnName));

if (string.IsNullOrEmpty(options.WorkspacePath))
{
options.WorkspacePath = GetTemporaryDirectory();
}

if (string.IsNullOrEmpty(options.TrainSetBottleneckCachedValuesFileName))
{
//If the user decided to set to null reset back to default value
options.TrainSetBottleneckCachedValuesFileName = _options.TrainSetBottleneckCachedValuesFileName;
}

if (string.IsNullOrEmpty(options.ValidationSetBottleneckCachedValuesFileName))
{
//If the user decided to set to null reset back to default value
options.ValidationSetBottleneckCachedValuesFileName = _options.ValidationSetBottleneckCachedValuesFileName;
}

_options = options;
_useLRScheduling = _options.LearningRateScheduler != null;
_checkpointPath = _options.ModelSavePath ??
Path.Combine(Directory.GetCurrentDirectory(), _options.FinalModelPrefix +
_checkpointPath = Path.Combine(_options.WorkspacePath, _options.FinalModelPrefix +
ModelFileName[_options.Arch]);

// Configure bottleneck tensor based on the model.
Expand Down Expand Up @@ -558,7 +574,7 @@ private void InitializeTrainingGraph(IDataView input)

_classCount = labelCount == 1 ? 2 : (int)labelCount;
var imageSize = ImagePreprocessingSize[_options.Arch];
_session = LoadTensorFlowSessionFromMetaGraph(Host, _options.Arch).Session;
_session = LoadTensorFlowSessionFromMetaGraph(Host, _options.Arch, _options.WorkspacePath).Session;
(_jpegData, _resizedImage) = AddJpegDecoding(imageSize.Item1, imageSize.Item2, 3);
_jpegDataTensorName = _jpegData.name;
_resizedImageTensorName = _resizedImage.name;
Expand Down Expand Up @@ -604,12 +620,14 @@ private protected override ImageClassificationModelParameters TrainModelCore(Tra
var validationSet = trainContext.ValidationSet?.Data ?? _options.ValidationSet;
var imageProcessor = new ImageProcessor(_session, _jpegDataTensorName, _resizedImageTensorName);
int trainingsetSize = -1;
string trainSetBottleneckCachedValuesFilePath = Path.Combine(_options.WorkspacePath, _options.TrainSetBottleneckCachedValuesFileName);
string validationSetBottleneckCachedValuesFilePath = Path.Combine(_options.WorkspacePath, _options.ValidationSetBottleneckCachedValuesFileName);
if (!_options.ReuseTrainSetBottleneckCachedValues ||
!File.Exists(_options.TrainSetBottleneckCachedValuesFilePath))
!File.Exists(trainSetBottleneckCachedValuesFilePath))
{
trainingsetSize = CacheFeaturizedImagesToDisk(trainContext.TrainingSet.Data, _options.LabelColumnName,
_options.FeatureColumnName, imageProcessor,
_inputTensorName, _bottleneckTensor.name, _options.TrainSetBottleneckCachedValuesFilePath,
_inputTensorName, _bottleneckTensor.name, trainSetBottleneckCachedValuesFilePath,
ImageClassificationMetrics.Dataset.Train, _options.MetricsCallback);

// Write training set size to a file for use during training
Expand All @@ -618,16 +636,16 @@ private protected override ImageClassificationModelParameters TrainModelCore(Tra

if (validationSet != null &&
(!_options.ReuseTrainSetBottleneckCachedValues ||
!File.Exists(_options.ValidationSetBottleneckCachedValuesFilePath)))
!File.Exists(validationSetBottleneckCachedValuesFilePath)))
{
CacheFeaturizedImagesToDisk(validationSet, _options.LabelColumnName,
_options.FeatureColumnName, imageProcessor, _inputTensorName, _bottleneckTensor.name,
_options.ValidationSetBottleneckCachedValuesFilePath,
validationSetBottleneckCachedValuesFilePath,
ImageClassificationMetrics.Dataset.Validation, _options.MetricsCallback);
}

TrainAndEvaluateClassificationLayer(_options.TrainSetBottleneckCachedValuesFilePath, _options,
_options.ValidationSetBottleneckCachedValuesFilePath, trainingsetSize);
TrainAndEvaluateClassificationLayer(trainSetBottleneckCachedValuesFilePath, _options,
validationSetBottleneckCachedValuesFilePath, trainingsetSize);

// Leave the ownership of _session so that it is not disposed/closed when this object goes out of scope
// since it will be used by ImageClassificationModelParameters class (new owner that will take care of
Expand Down Expand Up @@ -858,7 +876,7 @@ private int GetNumSamples(string path)
Saver trainSaver = null;
FileWriter trainWriter = null;
Tensor merged = tf.summary.merge_all();
trainWriter = tf.summary.FileWriter(Path.Combine(Directory.GetCurrentDirectory(), "train"),
trainWriter = tf.summary.FileWriter(Path.Combine(_options.WorkspacePath, "train"),
_session.graph);

trainSaver = tf.train.Saver();
Expand Down Expand Up @@ -1109,7 +1127,7 @@ private int GetNumSamples(string path)

private (Session, Tensor, Tensor, Tensor) BuildEvaluationSession(int classCount)
{
var evalGraph = LoadMetaGraph(ModelFileName[_options.Arch]);
var evalGraph = LoadMetaGraph(Path.Combine(_options.WorkspacePath, ModelFileName[_options.Arch]));
var evalSess = tf.Session(graph: evalGraph);
Tensor evaluationStep = null;
Tensor prediction = null;
Expand Down Expand Up @@ -1267,20 +1285,25 @@ private void VariableSummaries(RefVariable var)

}

private static TensorFlowSessionWrapper LoadTensorFlowSessionFromMetaGraph(IHostEnvironment env, Architecture arch)
private static TensorFlowSessionWrapper LoadTensorFlowSessionFromMetaGraph(IHostEnvironment env, Architecture arch, string path)
{
if (string.IsNullOrEmpty(path))
{
path = GetTemporaryDirectory();
}

var modelFileName = ModelFileName[arch];
var modelFilePath = Path.Combine(path, modelFileName);
int timeout = 10 * 60 * 1000;
string currentDirectory = Directory.GetCurrentDirectory();
DownloadIfNeeded(env, modelFileName, currentDirectory, modelFileName, timeout);
DownloadIfNeeded(env, modelFileName, path, modelFileName, timeout);
if (arch == Architecture.InceptionV3)
{
DownloadIfNeeded(env, @"tfhub_modules.zip", currentDirectory, @"tfhub_modules.zip", timeout);
DownloadIfNeeded(env, @"tfhub_modules.zip", path, @"tfhub_modules.zip", timeout);
if (!Directory.Exists(@"tfhub_modules"))
ZipFile.ExtractToDirectory(Path.Combine(currentDirectory, @"tfhub_modules.zip"), @"tfhub_modules");
ZipFile.ExtractToDirectory(Path.Combine(path, @"tfhub_modules.zip"), @"tfhub_modules");
}

return new TensorFlowSessionWrapper(GetSession(env, modelFileName, true), modelFileName);
return new TensorFlowSessionWrapper(GetSession(env, modelFilePath, true), modelFilePath);
}

~ImageClassificationTrainer()
Expand Down
6 changes: 2 additions & 4 deletions test/Microsoft.ML.Benchmarks/ImageClassificationBench.cs
Expand Up @@ -20,7 +20,6 @@ namespace Microsoft.ML.Benchmarks
[Config(typeof(TrainConfig))]
public class ImageClassificationBench
{
private string assetsPath;
private MLContext mlContext;
private IDataView trainDataset;
private IDataView testDataset;
Expand All @@ -36,7 +35,7 @@ public void SetupData()
* level up to prevent issues with saving data.
*/
string assetsRelativePath = @"../../../../assets";
assetsPath = GetAbsolutePath(assetsRelativePath);
string assetsPath = GetAbsolutePath(assetsRelativePath);

var outputMlNetModelFilePath = Path.Combine(assetsPath, "outputs",
"imageClassifier.zip");
Expand Down Expand Up @@ -87,8 +86,7 @@ public TransformerChain<KeyToValueMappingTransformer> TrainResnetV250()
BatchSize = 10,
LearningRate = 0.01f,
EarlyStoppingCriteria = new ImageClassificationTrainer.EarlyStopping(minDelta: 0.001f, patience: 20, metric: ImageClassificationTrainer.EarlyStoppingMetric.Loss),
ValidationSet = testDataset,
ModelSavePath = assetsPath
ValidationSet = testDataset
};
var pipeline = mlContext.MulticlassClassification.Trainers.ImageClassification(options)
.Append(mlContext.Transforms.Conversion.MapKeyToValue(
Expand Down
Expand Up @@ -1493,11 +1493,12 @@ internal void TensorFlowImageClassificationWithLRScheduling(LearningRateSchedule
// Using Exponential Decay for learning rate scheduling
// You can also try other types of Learning rate scheduling methods
// available in LearningRateScheduler.cs
LearningRateScheduler = learningRateScheduler
LearningRateScheduler = learningRateScheduler,
WorkspacePath = GetTemporaryDirectory()
};

var pipeline = mlContext.Transforms.LoadRawImageBytes("Image", fullImagesetFolderPath, "ImagePath")
.Append(mlContext.MulticlassClassification.Trainers.ImageClassification(options))
.Append(mlContext.MulticlassClassification.Trainers.ImageClassification(options))
.Append(mlContext.Transforms.Conversion.MapKeyToValue(
outputColumnName: "PredictedLabel",
inputColumnName: "PredictedLabel"));
Expand Down Expand Up @@ -1577,6 +1578,11 @@ internal void TensorFlowImageClassificationWithLRScheduling(LearningRateSchedule
Assert.Equal("roses", predictionSecond.PredictedLabel);
Assert.True(Array.IndexOf(labels, predictionFirst.PredictedLabel) > -1);
Assert.True(Array.IndexOf(labels, predictionSecond.PredictedLabel) > -1);

Assert.True(File.Exists(Path.Combine(options.WorkspacePath, options.TrainSetBottleneckCachedValuesFileName)));
Assert.True(File.Exists(Path.Combine(options.WorkspacePath, options.ValidationSetBottleneckCachedValuesFileName)));
Assert.True(File.Exists(Path.Combine(options.WorkspacePath, ImageClassificationTrainer.ModelFileName[options.Arch])));
Directory.Delete(options.WorkspacePath, true);
}

[TensorFlowFact]
Expand Down Expand Up @@ -1952,5 +1958,11 @@ public class ImagePrediction
public string PredictedLabel;
}

private static string GetTemporaryDirectory()
{
string tempDirectory = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
Directory.CreateDirectory(tempDirectory);
return tempDirectory;
}
}
}

0 comments on commit a40df86

Please sign in to comment.