## ML.NET Regression
Creates a regression model to predict the quality of wine using 11 physicochemical features

### NuGet package installation

In [20]:
#r "nuget:Microsoft.ML, 1.4.0"
#r "nuget:XPlot.Plotly, 3.0.1"

### Namespaces

In [21]:
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
using XPlot.Plotly;

### Input Class Definition

In [22]:
public class RegressionData
{
    [LoadColumn(0)]
    public float FixedAcidity;

    [LoadColumn(1)]
    public float VolatileAcidity;

    [LoadColumn(2)]
    public float CitricAcid;

    [LoadColumn(3)]
    public float ResidualSugar;

    [LoadColumn(4)]
    public float Chlorides;

    [LoadColumn(5)]
    public float FreeSulfurDioxide;

    [LoadColumn(6)]
    public float TotalSulfurDioxide;

    [LoadColumn(7)]
    public float Density;

    [LoadColumn(8)]
    public float Ph;

    [LoadColumn(9)]
    public float Sulphates;

    [LoadColumn(10)]
    public float Alcohol;

    [LoadColumn(11)]
    public float Label;
}


### Output Class Definition

In [23]:
public class RegressionPrediction
{
    [ColumnName("Label")]
    public float Label;

    [ColumnName("Score")]
    public float PredictedLabel;
}

### Read the Raw Data

In [24]:
var mlContext = new MLContext(seed: null);

var trainingData = mlContext.Data.LoadFromTextFile<RegressionData>(
    "./WineQuality_White_Train.csv", 
    separatorChar: ';',
    hasHeader: true);

### Prepare the Data

In [25]:
var pipeline =
    mlContext.Transforms.ReplaceMissingValues(
        outputColumnName: "PreparedFixedAcidity",
        inputColumnName: "FixedAcidity",
        replacementMode: MissingValueReplacingEstimator.ReplacementMode.Mean)
    .Append(mlContext.Transforms.DropColumns("FixedAcidity"))
    .Append(mlContext.Transforms.Concatenate("Features",
        new[]
        {
          "PreparedFixedAcidity",
          "VolatileAcidity",
          "CitricAcid",
          "ResidualSugar",
          "Chlorides",
          "FreeSulfurDioxide",
          "TotalSulfurDioxide",
          "Density",
          "Ph",
          "Sulphates",
          "Alcohol"
        }))
    .Append(mlContext.Transforms.NormalizeMeanVariance("Features"))
    .Append(mlContext.Regression.Trainers.Sdca());

### Train the Model

In [26]:
var model = pipeline.Fit(trainingData);

### Evaluate the Model

In [27]:
// Load the raw test data.
var testData = mlContext.Data.LoadFromTextFile<RegressionData>(
    "./WineQuality_White_Test.csv", 
    separatorChar: ';',
    hasHeader: true);

// Score the test data and calculate the metrics.
var scoredData = model.Transform(testData);
var qualityMetrics =  mlContext.Regression.Evaluate(scoredData);
display(qualityMetrics);

MeanAbsoluteError,MeanSquaredError,RootMeanSquaredError,LossFunction,RSquared
0.5470906873969371,0.5076144395121397,0.7124706587026162,0.5076144403309368,0.1560359423672352


### Visualize the Quality Metrics

In [28]:
string[] metricNames = 
    { 
        "Mean Absolute Error", 
        "Mean Squared Error", 
        "Root Mean Squared Error", 
        "Loss Function", 
        "R Squared"
    };

double[] metricValues = 
    { 
        qualityMetrics.MeanAbsoluteError, 
        qualityMetrics.MeanSquaredError, 
        qualityMetrics.RootMeanSquaredError, 
        qualityMetrics.LossFunction, 
        qualityMetrics.RSquared
    };

var graph = new Graph.Bar()
{
    x = metricValues,
    y = metricNames,
    orientation = "h",
    marker = new Graph.Marker { color = "darkred" }
};

var chart = Chart.Plot(graph);

var layout = new Layout.Layout(){ title="Quality Metrics" };
chart.WithLayout(layout);

display(chart);

### Calculate and Visualize the Feature Contributions

In [29]:
var regressionModel = model.Last() as RegressionPredictionTransformer<LinearRegressionModelParameters>;
var contributions = regressionModel.Model.Weights;
// display(contributions);

string[] featureNames = 
    { 
          "FixedAcidity",
          "VolatileAcidity",
          "CitricAcid",
          "ResidualSugar",
          "Chlorides",
          "FreeSulfurDioxide",
          "TotalSulfurDioxide",
          "Density",
          "Ph",
          "Sulphates",
          "Alcohol"
    };

var graph2 = new Graph.Bar()
{
    x = contributions,
    y = featureNames,
    orientation = "h",
    marker = new Graph.Marker { color = "darkblue" }
};

var chart2 = Chart.Plot(graph2);

var layout2 = new Layout.Layout(){ title="Feature Contributions" };
chart2.WithLayout(layout2);

display(chart2);

### Create a prediction engine and use it on a random sample

In [35]:
// Create prediction engine
var predictionEngine = mlContext.Model.CreatePredictionEngine<RegressionData, RegressionPrediction>(model);

// Get a random data sample
var shuffledData = mlContext.Data.ShuffleRows(trainingData);
var rawSample = mlContext.Data.TakeRows(shuffledData, 1);
var sample = mlContext.Data.CreateEnumerable<RegressionData>(rawSample, false).First();
display(sample);

// Predict quality of sample
var prediction = predictionEngine.Predict(sample);
display(prediction);

FixedAcidity,VolatileAcidity,CitricAcid,ResidualSugar,Chlorides,FreeSulfurDioxide,TotalSulfurDioxide,Density,Ph,Sulphates,Alcohol,Label
7.2,0.23,0.38,14.3,0.058,55,194,0.9979,3.09,0.44,9,6


Label,PredictedLabel
6,5.5824947
