### Image classification

**Rationale**: Predict the type product based on images of merchandise


In [None]:
// ML.NET Nuget packages 
#r "nuget:Microsoft.ML,1.4.0"
#r "nuget:Microsoft.ML.Vision,1.4.0"
#r "nuget:Microsoft.ML.ImageAnalytics,1.4.0"
#r "nuget:SciSharp.TensorFlow.Redist,1.14.0"
    
//CSV Helper
#r "nuget:CsvHelper,12.1.2"
    
//Install XPlot package
#r "nuget:XPlot.Plotly,2.0.0"

using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;
using Microsoft.ML.Vision;
using XPlot.Plotly;
using CsvHelper;
using System.IO;

In [None]:
public class ImageData
{
    public string ImagePath { get; set; }

    public string Label { get; set; }
}

In [None]:
public class Article
{
    public string Id { get; set; }
    public string Gender { get; set; }
    public string MasterCategory { get; set; }
    public string SubCategory { get; set; }
    public string ArticleType { get; set; }
    public string BaseColor { get; set; }
    public string Season { get; set; }
    public string Year { get; set; }
    public string Usage { get; set; }
    public string ProductDisplayName { get; set; }
}

In [None]:
//Please note to change the ImagePath here to your local path of the repo. Tensorflow needs an absolute path instead of a relative path
var imagePath = @"C:\Git\mlnet-jupyter\src\Datasets\Computer Vision\fashion-product-images-small\images";
var csvPath = @"..\Datasets\Computer Vision\fashion-product-images-small\testtrain.csv";

In [None]:
public IEnumerable<ImageData> GetImages(string path)
{
    using (var reader = new StreamReader(path))
    using (var csv = new CsvReader(reader))
    {
        return csv.GetRecords<Article>()
            .Select(x => new ImageData
            {
                Label = x.ArticleType,
                ImagePath = Path.Combine(imagePath, x.Id) + ".jpg"
            })
            .Where(y => File.Exists(y.ImagePath))
            .ToList();
    }
}

In [None]:
IEnumerable<ImageData> images = GetImages(csvPath);

In [None]:
var mlContext = new MLContext(seed: 1);

var dataView = mlContext.Data.LoadFromEnumerable(images);
dataView = mlContext.Data.ShuffleRows(dataView);

#### Load images in to memory

In [None]:
IDataView shuffledFullImagesDataset = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "LabelAsKey",
                                        inputColumnName: "Label",
                                        keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue)
                                        .Append(mlContext.Transforms.LoadRawImageBytes(outputColumnName: "Image",
                                                                    imageFolder: imagePath,
                                                                    inputColumnName: "ImagePath"))
                                        .Fit(dataView)
                                        .Transform(dataView);

#### Data exploration

#### Plotting

In [None]:
var categories = shuffledFullImagesDataset.GetColumn<string>("Label");

var categoriesHistogram = Chart.Plot(new Graph.Histogram 
    {
        x = categories,  
    });

categoriesHistogram.WithLayout(new Layout.Layout { title="Types of images" });
display(categoriesHistogram);

#### Split our data

In [None]:
var trainTestSplit = mlContext.Data.TrainTestSplit(shuffledFullImagesDataset);

var testSet = trainTestSplit.TestSet;
var trainSet = trainTestSplit.TrainSet;

#### Data Transformation

In [None]:
var options = new ImageClassificationTrainer.Options()
{
    FeatureColumnName = "Image",
    LabelColumnName = "LabelAsKey",
    Arch = ImageClassificationTrainer.Architecture.ResnetV250,
    Epoch = 30,
    BatchSize = 10,
    LearningRate = 0.01f,
    MetricsCallback = (metrics) => Console.WriteLine(metrics),
    ValidationSet = testSet
};

var trainingPipeline = mlContext.MulticlassClassification.Trainers.ImageClassification(options)
        .Append(mlContext.Transforms.Conversion.MapKeyToValue(
            outputColumnName: "PredictedLabel",
            inputColumnName: "PredictedLabel"));

##### Train the model

In [None]:
ITransformer model = trainingPipeline.Fit(trainSet);

#### Evaluate the model

In [None]:
var predicitions = model.Transform(testSet);
var metrics = mlContext.MulticlassClassification.Evaluate(predicitions, labelColumnName: "LabelAsKey", predictedLabelColumnName: "PredictedLabel");

In [None]:
var confusionMatrix = Chart.Plot(new Graph.Heatmap 
    {
        x = categories.Distinct(),
        y = categories.Distinct().Reverse(),
        z = metrics.ConfusionMatrix.Counts.Reverse(),   
        zmin = 0,
        zmax = 5,
        colorscale = "off"
    });

confusionMatrix.WithLayout(new Layout.Layout { title="Confusion Matrix" });
confusionMatrix

#### Save the model

In [None]:
mlContext.Model.Save(model, trainSet.Schema, "imageClassifier.zip");