### Image classification

**Rationale**: Predict the type product based on images of merchandise


In [1]:
// ML.NET Nuget packages 
#r "nuget:Microsoft.ML,1.4.0"
#r "nuget:Microsoft.ML.ImageAnalytics,1.4.0"
#r "nuget:Microsoft.ML.Dnn,0.16.0-preview2"
    
//CSV Helper
#r "nuget:CsvHelper"
    
//Install XPlot package
#r "nuget:XPlot.Plotly,2.0.0"

using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;
using XPlot.Plotly;
using CsvHelper;
using System.IO;

In [2]:
public class ImageData
{
    public string ImagePath { get; set; }

    public string Label { get; set; }
}

In [3]:
public class Article
{
    public string Id { get; set; }
    public string Gender { get; set; }
    public string MasterCategory { get; set; }
    public string SubCategory { get; set; }
    public string ArticleType { get; set; }
    public string BaseColor { get; set; }
    public string Season { get; set; }
    public string Year { get; set; }
    public string Usage { get; set; }
    public string ProductDisplayName { get; set; }
}

In [4]:
var imagePath = @".\Datasets\Computer Vision\fashion-product-images-small\images";
var csvPath = @".\Datasets\Computer Vision\fashion-product-images-small\testtrain.csv";

In [5]:
public IEnumerable<ImageData> GetImages(string path)
{
    using (var reader = new StreamReader(path))
    using (var csv = new CsvReader(reader))
    {
        return csv.GetRecords<Article>()
            .Select(x => new ImageData
            {
                Label = x.ArticleType,
                ImagePath = Path.Combine(imagePath, x.Id) + ".jpg"
            })
            .Where(y => File.Exists(y.ImagePath))
            .ToList();
    }
}

In [6]:
IEnumerable<ImageData> images = GetImages(csvPath);

In [7]:
var mlContext = new MLContext(seed: 1);

var dataView = mlContext.Data.LoadFromEnumerable(images);
dataView = mlContext.Data.ShuffleRows(dataView);

#### Load images in to memory

In [8]:
IDataView shuffledFullImagesDataset = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "LabelAsKey",
                                        inputColumnName: "Label",
                                        keyOrdinality: ValueToKeyMappingEstimator.KeyOrdinality.ByValue)
                                        .Append(mlContext.Transforms.LoadImages(outputColumnName: "Image",
                                                                    imageFolder: imagePath,
                                                                    inputColumnName: "ImagePath"))
                                        .Fit(dataView)
                                        .Transform(dataView);

Unhandled Exception: (6,69): error CS1739: The best overload for 'LoadImages' does not have a parameter named 'useImageType'

#### Data exploration

#### Plotting

In [9]:
var categories = shuffledFullImagesDataset.GetColumn<string>("Label");

var categoriesHistogram = Chart.Plot(new Graph.Histogram 
    {
        x = categories,  
    });

categoriesHistogram.WithLayout(new Layout.Layout { title="Types of images" });
display(categoriesHistogram);

Unhandled Exception: (1,18): error CS0103: The name 'shuffledFullImagesDataset' does not exist in the current context

#### Split our data

In [10]:
var trainTestSplit = mlContext.Data.TrainTestSplit(shuffledFullImagesDataset);

var testSet = trainTestSplit.TestSet;
var trainSet = trainTestSplit.TrainSet;

Unhandled Exception: (1,52): error CS0103: The name 'shuffledFullImagesDataset' does not exist in the current context

#### Data Transformation

In [11]:
var trainingPipeline = mlContext.Model.ImageClassification("Image", "LabelAsKey",
                        arch: ImageClassificationEstimator.Architecture.InceptionV3,
                        epoch: 30,
                        batchSize: 10,
                        metricsCallback: (metrics) => Console.WriteLine(metrics),
                        validationSet: testSet)
                .Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName: "PredictedLabel", inputColumnName: "PredictedLabel"));

Unhandled Exception: (6,40): error CS0103: The name 'testSet' does not exist in the current context

##### Train the model

In [12]:
ITransformer model = trainingPipeline.Fit(trainSet);

Unhandled Exception: (1,22): error CS0103: The name 'trainingPipeline' does not exist in the current context
(1,43): error CS0103: The name 'trainSet' does not exist in the current context

#### Evaluate the model

In [13]:
var predicitions = model.Transform(testSet);
var metrics = mlContext.MulticlassClassification.Evaluate(predictionsDataView, labelColumnName: "LabelAsKey", predictedLabelColumnName: "PredictedLabel");


Unhandled Exception: (1,20): error CS0103: The name 'model' does not exist in the current context
(1,36): error CS0103: The name 'testSet' does not exist in the current context
(2,59): error CS0103: The name 'predictionsDataView' does not exist in the current context

In [14]:
var confusionMatrix = Chart.Plot(new Graph.Heatmap 
    {
        x = categories.Distinct(),
        y = categories.Distinct().Reverse(),
        z = metrics.ConfusionMatrix.Counts.Reverse(),   
        zmin = 0,
        zmax = 5,
        colorscale = "off"
    });

confusionMatrix.WithLayout(new Layout.Layout { title="Confusion Matrix" });
confusionMatrix

Unhandled Exception: (3,13): error CS0103: The name 'categories' does not exist in the current context
(4,13): error CS0103: The name 'categories' does not exist in the current context
(5,13): error CS0103: The name 'metrics' does not exist in the current context

#### Save the model

In [15]:
mlContext.Model.Save(model, trainSet.Schema, "imageClassifier.zip");

Unhandled Exception: (1,22): error CS0103: The name 'model' does not exist in the current context
(1,29): error CS0103: The name 'trainSet' does not exist in the current context