### ML.NET Binary Classification
Creates a binary classification model to predict the quality of wine using 11 physicochemical features

### NuGet package installation

In [1]:
#r "nuget:Microsoft.ML"
#r "nuget:XPlot.Plotly"

### Namespaces

In [2]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
using XPlot.Plotly;

### Input class definition

In [3]:
public class BinaryClassificationData
{
    [LoadColumn(0)]
    public float FixedAcidity;

    [LoadColumn(1)]
    public float VolatileAcidity;

    [LoadColumn(2)]
    public float CitricAcid;

    [LoadColumn(3)]
    public float ResidualSugar;

    [LoadColumn(4)]
    public float Chlorides;

    [LoadColumn(5)]
    public float FreeSulfurDioxide;

    [LoadColumn(6)]
    public float TotalSulfurDioxide;

    [LoadColumn(7)]
    public float Density;

    [LoadColumn(8)]
    public float Ph;

    [LoadColumn(9)]
    public float Sulphates;

    [LoadColumn(10)]
    public float Alcohol;

    [LoadColumn(11)]
    public float Quality;
}

public class RichBinaryClassificationData: BinaryClassificationData
{
    public bool Label => Quality > 5;
}

### Output class definition

In [4]:
public class BinaryClassificationPrediction
{
    public bool Label;

    [ColumnName("PredictedLabel")]
    public bool PredictedLabel;

    public int LabelAsNumber => PredictedLabel ? 1 : 0;
}

### Read the raw data

In [5]:
var mlContext = new MLContext(seed: null);

var trainingData = mlContext.Data.LoadFromTextFile<BinaryClassificationData>(
    "./WineQuality_White_Train.csv", 
    separatorChar: ';',
    hasHeader: true);

### Prepare the data

In [6]:
// OK, this is a bit of a hack to let C# calculate the Label.

var stronglyTypedTrainingData = mlContext.Data.CreateEnumerable<RichBinaryClassificationData>(trainingData, false);
trainingData = mlContext.Data.LoadFromEnumerable(stronglyTypedTrainingData);

var pipeline =
        mlContext.Transforms.ReplaceMissingValues(
            outputColumnName: "FixedAcidity",
            replacementMode: MissingValueReplacingEstimator.ReplacementMode.Mean)
        .Append(mlContext.Transforms.Concatenate("Features",
            new[]
            {
                "FixedAcidity",
                "VolatileAcidity",
                "CitricAcid",
                "ResidualSugar",
                "Chlorides",
                "FreeSulfurDioxide",
                "TotalSulfurDioxide",
                "Density",
                "Ph",
                "Sulphates",
                "Alcohol"
            }))
        .Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression());

### Train the model

In [None]:
var model = pipeline.Fit(trainingData);

### Evaluate the model

In [None]:
var testData = mlContext.Data.LoadFromTextFile<BinaryClassificationData>(
    "./WineQuality_White_Test.csv", 
    separatorChar: ';',
    hasHeader: true);
    
// Same hack: IDataView to Enumerable to IDataView.    
var stronglyTypedTestData = mlContext.Data.CreateEnumerable<RichBinaryClassificationData>(trainingData, false);
testData = mlContext.Data.LoadFromEnumerable(stronglyTypedTestData);

var scoredData = model.Transform(testData);

var qualityMetrics =  mlContext.BinaryClassification.Evaluate(scoredData);
display(qualityMetrics);

### TODO: Plot the quality metrics

In [None]:
// ...

### TODO: Plot the confusion matrix

In [None]:
// ...