# Regression model for Taxi fares using ML .NET

Regression is a ML task type of supervised machine learning algorithms. 
A regression ML model predicts continuous value outputs (such as numbers). 
For instance, predicting the fare of a Taxi trip or predicting the price of a car is a regression problem.

# Install the NuGet packages you use in the notebook

In [None]:
#r "nuget:Microsoft.ML,1.4.0"
#r "nuget:Microsoft.Data.Analysis,0.1.0"
#r "nuget:XPlot.Plotly,2.0.0"
#r "nuget:Microsoft.ML.Mkl.Components,1.4.0"
using XPlot.Plotly;
using Microsoft.Data.Analysis;
using Microsoft.ML;
using Microsoft.ML.Data;

# Declare data-classes for input data and predictions

In [None]:
display(h4("Declaring data-classes to use across the code in this notebook."));

public class TaxiTrip
{
    [LoadColumn(0)]
    public string VendorId;

    [LoadColumn(1)]
    public string RateCode;

    [LoadColumn(2)]
    public float PassengerCount;

    [LoadColumn(3)]
    public float TripTime;

    [LoadColumn(4)]
    public float TripDistance;

    [LoadColumn(5)]
    public string PaymentType;

    [LoadColumn(6)]
    public float FareAmount;
}

public class TaxiTripFarePrediction
{
    [ColumnName("Score")]
    public float Score;
}

In [None]:
// this is just boilerplate code to visualize tables neatly,
// try removing it, training still works, but output is very unreadable...

using Microsoft.AspNetCore.Html;
using System.Collections;
Formatter<DataDebuggerPreview>.Register((data, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(data.ColumnView.Where(col => !col.Column.IsHidden).Select(c => (IHtmlContent) th(c.Column.Name)));
    var colIndices = new List<int>();
    int index = 0;
    foreach(var col in data.ColumnView)
    {
        if(!col.Column.IsHidden)
        {
            colIndices.Add(index);
        }
        
        index++;
    }
    
    var rows = new List<List<IHtmlContent>>();
    var take = 100;
    for (var i = 0; i < Math.Min(take, data.RowView.Length); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var localIndex in colIndices)
        {
            var value = data.RowView[i].Values[localIndex];
            if(value.Value.GetType().IsGenericType && value.Value.GetType().GetGenericTypeDefinition() == typeof(VBuffer<>))
            {
                var denseVals = (IEnumerable)(value.Value.GetType().GetMethod("DenseValues").Invoke(value.Value, null));
                List<string> innerValList = new List<string>();
                foreach(var innerVal in denseVals)
                {
                    innerValList.Add(Convert.ToString(innerVal));
                }
                var v = string.Join(",", innerValList);
                cells.Add(td(v));
            }
            else
                cells.Add(td(value.Value));
        }
        rows.Add(cells);
    }
    
    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));
    
    writer.Write(t);
}, "text/html");

# Load datasets into IDataView and display the schema 

In [None]:
display(h1("Code for loading the data into IDataViews: training dataset and test dataset"));

MLContext mlContext = new MLContext(seed: 0);

string TrainDataPath = "../datasets/taxi/taxi-fare-train.csv";
string TestDataPath = "../datasets/taxi/taxi-fare-test.csv";

IDataView trainDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ',');
IDataView testDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ',');

display(h4("Schema of training DataView:"));
display(trainDataView.Schema);


## Show a few rows of loaded data 

In [None]:
display(string.Format("DataView: Showing 10 rows with the columns"));
display(mlContext.Data.CreateEnumerable<TaxiTrip>(trainDataView, reuseRowObject: false).Take(20).ToList());

# Data transformations pipeline for ML.NET model

In [None]:
display(h1("Apply Data Transformations pipeline"));

var dataProcessPipeline = mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "VendorIdEncoded", inputColumnName: nameof(TaxiTrip.VendorId))
                  .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "RateCodeEncoded", inputColumnName: nameof(TaxiTrip.RateCode)))
                  .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "PaymentTypeEncoded",inputColumnName: nameof(TaxiTrip.PaymentType)))
                  .Append(mlContext.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.PassengerCount)))
                  .Append(mlContext.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.TripTime)))
                  .Append(mlContext.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.TripDistance)))
                  .Append(mlContext.Transforms.Concatenate("Features", "VendorIdEncoded", "RateCodeEncoded", "PaymentTypeEncoded", 
                                                           nameof(TaxiTrip.PassengerCount), nameof(TaxiTrip.TripTime), nameof(TaxiTrip.TripDistance)));

display(h3("Show transformed data..."));

var transformation = dataProcessPipeline.Fit(trainDataView).Transform(trainDataView);
transformation.Preview()


# Append the trainer/algorithm to pipeline and train the model

In [None]:
%%time
display(h1("Build Training Pipeline and Train the model"));
display(h4("Creating the Training Pipeline with trainer/algorithm"));

// STEP 3: Set the training algorithm - Selected Trainer (SDCA Regression algorithm)                            
var trainer = mlContext.Regression.Trainers.Ols(labelColumnName: "FareAmount", featureColumnName: "Features");
var trainingPipeline = dataProcessPipeline.Append(trainer);

// STEP 4: Train the model fitting to the DataSet
//The pipeline is trained on the dataset that has been loaded and transformed.
display("=============== Training the model ===============");
var trainedModel = trainingPipeline.Fit(trainDataView);

## Make predictions in bulk from the TestDataset to be used for the metrics

In [None]:
// Make predictions to plot agaist actual values
display(h3("===== Making predictions in bulk for the whole Test Dataset ====="));
// Make predictions in bulk (Transformed IDataView will have the predictions plus the actual/true values)
IDataView predictionsDataView = trainedModel.Transform(testDataView);
predictionsDataView.Preview()

## Display the metrics (Model quality evaluation)

In [None]:
display(h3("===== Evaluating Model's accuracy with Test dataset ====="));

var metrics = mlContext.Regression.Evaluate(predictionsDataView, labelColumnName: "FareAmount", scoreColumnName: "Score");
display(metrics);

## Bar chart showing 'Actual fares vs. Predicted fares Comparison' 

In [None]:
// Number of rows to use for Bar chart
int totalNumberForBarChart  = 20;

float[] actualFares = predictionsDataView.GetColumn<float>("FareAmount").Take(totalNumberForBarChart).ToArray();
float[] predictionFares = predictionsDataView.GetColumn<float>("Score").Take(totalNumberForBarChart).ToArray();
int[] elements = Enumerable.Range(0, totalNumberForBarChart).ToArray();

// Define group for Actual values 
var ActualValuesGroupBarGraph = new Graph.Bar()
{
    x = elements,
    y = actualFares,
    name = "Actual"
};

// Define group for Prediction values 
var PredictionValuesGroupBarGraph = new Graph.Bar()
{
    x = elements,
    y = predictionFares,
    name = "Predicted"
};

var chart = Chart.Plot(new[] {ActualValuesGroupBarGraph, PredictionValuesGroupBarGraph});

var layout = new Layout.Layout(){barmode = "group", title="Actual fares vs. Predicted fares Comparison"};
chart.WithLayout(layout);
chart.WithXTitle("Cases");
chart.WithYTitle("Fare");
chart.WithLegend(true);
chart.Width = 700;
chart.Height = 400;

display(chart);

# Save the ML model as a file

In [None]:
display(h1("Saving the ML.NET Model as a file..."));

string modelFilePath = "models/ml_net_taxi_csharp.zip";

// GetAbsolutePath(modelRelativePath)
mlContext.Model.Save(trainedModel, trainDataView.Schema, modelFilePath);

display(h3($"The model was saved to: {modelFilePath}"));
