# ML.NET demo

## Chuẩn bị môi trường cho ML.NET

**1. Cài đặt .NET Interactive, dotnet try**
```bash
dotnet tool install -g --add-source "https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json" Microsoft.dotnet-try

```

**2. Cài đặt Jupyter Notebook**

https://jupyter.org/install

**3. Cài đặt .NET Interactive**

```bash
dotnet tool install -g --add-source "https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json" Microsoft.dotnet-interactive
```

## Demo 1 - Xin chào ML.NET

### Cài đặt nuget packages

In [None]:
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet6/nuget/v3/index.json" 
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json" 

#r "nuget:Microsoft.ML, 1.7.0"
#r "nuget:Microsoft.ML.AutoML, 0.19.0"
#r "nuget:Microsoft.Data.Analysis, 0.19.0"
#r "nuget:XPlot.Plotly.Interactive, 4.0.4"


### Chuẩn bị Dữ liệu

In [None]:
using Microsoft.ML;
using Microsoft.ML.Data;

public class HousingData
{
    [LoadColumn(0)]
    public float Size { get; set; }

    [LoadColumn(1, 3)]
    [VectorType(3)]
    public float[] HistoricalPrices { get; set; }

    [LoadColumn(4)]
    [ColumnName("Label")]
    public float CurrentPrice { get; set; }
}

public class Prediction
{
    [ColumnName("Score")]
    public float PredictedPrice { get; set; }
}


Giả sữ ta có dữ liệu như sau:

In [None]:

HousingData[] housingData = new HousingData[]
{
    new HousingData
    {
        Size = 600f,
        HistoricalPrices = new float[] { 100000f ,125000f ,122000f },
        CurrentPrice = 170000f
    },
    new HousingData
    {
        Size = 1000f,
        HistoricalPrices = new float[] { 200000f, 250000f, 230000f },
        CurrentPrice = 225000f
    },
    new HousingData
    {
        Size = 1000f,
        HistoricalPrices = new float[] { 126000f, 130000f, 200000f },
        CurrentPrice = 195000f
    },
    new HousingData
    {
        Size = 850f,
        HistoricalPrices = new float[] { 150000f,175000f,210000f },
        CurrentPrice = 205000f
    },
    new HousingData
    {
        Size = 900f,
        HistoricalPrices = new float[] { 155000f, 190000f, 220000f },
        CurrentPrice = 210000f
    },
    new HousingData
    {
        Size = 550f,
        HistoricalPrices = new float[] { 99000f, 98000f, 130000f },
        CurrentPrice = 180000f
    }
};


### Huấn luyện model

In [None]:
MLContext mlContext = new MLContext();

// Load dữ liệu lên
IDataView trainingData = mlContext.Data.LoadFromEnumerable(housingData);

// Chia dữ liệu ra 2 phần, trainset & testset
var dataSplit = mlContext.Data.TrainTestSplit(trainingData, testFraction: 0.2);

IDataView trainData = dataSplit.TrainSet;
IDataView testData = dataSplit.TestSet;

// Nomornalize dữ liệu
var pipeline = mlContext.Transforms.Concatenate("Features", new[] { "Size", "HistoricalPrices" })
                    .Append(mlContext.Transforms.NormalizeMinMax("Features"));
                    
ITransformer dataPrepTransformer = pipeline.Fit(trainData);
IDataView transformedTrainingData = dataPrepTransformer.Transform(trainData);

// Train model
var sdcaEstimator =  pipeline.Append(mlContext.Regression.Trainers.Sdca());
var trainedModel = sdcaEstimator.Fit(transformedTrainingData);


### Đánh giá model

In [None]:
IDataView transformedTestData = dataPrepTransformer.Transform(testData);
IDataView testDataPredictions = trainedModel.Transform(transformedTestData);
RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions);
double rSquared = trainedModelMetrics.RSquared;

// Coefficient of Determination
display(rSquared)

### Lưu model

In [None]:
// Save model
mlContext.Model.Save(trainedModel, trainingData.Schema, "./model.zip");

### Sử dụng model

Có thể deploy ở bất kỳ đâu: on-premise, cloud (Azure Functions, AKS, ...)

In [None]:
// Load Model
DataViewSchema modelSchema;
ITransformer trainedModel = mlContext.Model.Load("./model.zip", out modelSchema);

// Make a prediction
var newHouse = new HousingData
{
    Size = 750f,
    HistoricalPrices = new float[] { 100000f ,125000f ,122000f }
};

var prediction = mlContext.Model.CreatePredictionEngine<HousingData, Prediction>(trainedModel)
                .Predict(newHouse);

Console.WriteLine($"Predicted price for size: {newHouse.Size} sq ft= {prediction.PredictedPrice}k$");

Predicted price for size: 750 sq ft= 174941.47k$


## Demo 2 - Thực hành xây dự một model với AutoML

In [None]:
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json" 
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json" 

#r "nuget:Microsoft.ML, 1.7.0"
#r "nuget:Microsoft.ML.AutoML, 0.19.0"
#r "nuget:Microsoft.Data.Analysis, 0.19.0"
#r "nuget:XPlot.Plotly.Interactive, 4.0.4"

### Cấu hình cho DataFrame

In [None]:
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;
using Microsoft.DotNet.Interactive.Formatting;
using Microsoft.Data.Analysis;
using XPlot.Plotly;
using Microsoft.AspNetCore.Html;
using Microsoft.DotNet.Interactive.Formatting;
using static Microsoft.DotNet.Interactive.Formatting.PocketViewTags;

Formatter.Register<DataFrame>((df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));
    var rows = new List<List<IHtmlContent>>();
    var take = 20;
    for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df.Rows[i])
        {
            cells.Add(td(obj));
        }
        rows.Add(cells);
    }
    
    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));
    
    writer.Write(t);
}, "text/html");

### Load & hiển thị dữ liệu

In [None]:
var housingData = DataFrame.LoadCsv("./housing.csv");
housingData

In [None]:
housingData.Description()

### Xem dưới dạng Histogram

In [None]:
Chart.Plot(
    new Histogram()
    {
        x = housingData.Columns["median_house_value"],
        nbinsx = 20
    }
)

### Xem dưới dạng Scatter

In [None]:
var chart = Chart.Plot(
    new Scattergl()
    {
        x = housingData.Columns["longitude"],
        y = housingData.Columns["latitude"],
        mode = "markers",
        marker = new Marker()
        {
            color = housingData.Columns["median_house_value"],
            colorscale = "Jet"
        }
    }
);

chart.Width = 600;
chart.Height = 600;
chart.Display();

### Trộn và chia tập dữ liệu train, test

In [None]:
static T[] Shuffle<T>(T[] array)
{
    Random rand = new Random();
    for (int i = 0; i < array.Length; i++)
    {
        int r = i + rand.Next(array.Length - i);
        T temp = array[r];
        array[r] = array[i];
        array[i] = temp;
    }
    return array;
}

int[] randomIndices = Shuffle(Enumerable.Range(0, (int)housingData.Rows.Count).ToArray());
int testSize = (int)(housingData.Rows.Count * .1);
int[] trainRows = randomIndices[testSize..];
int[] testRows = randomIndices[..testSize];

DataFrame housing_train = housingData[trainRows];
DataFrame housing_test = housingData[testRows];

housing_train.Rows.Count.Display();
housing_test.Rows.Count.Display();

### Tạo Experiment với AutoML

In [None]:

using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.AutoML;

In [None]:
#!time

var mlContext = new MLContext();

var experiment = mlContext.Auto().CreateRegressionExperiment(maxExperimentTimeInSeconds: 20);
var result = experiment.Execute(housing_train, labelColumnName:"median_house_value");

### Hiển thị xem thuật toán nào tốt nhất

In [None]:
var scatters = result.RunDetails.Where(d => d.ValidationMetrics != null).GroupBy(
    r => r.TrainerName,
    (name, details) => new Scattergl()
    {
        name = name,
        x = details.Select(r => r.RuntimeInSeconds),
        y = details.Select(r => r.ValidationMetrics.MeanAbsoluteError),
        mode = "markers",
        marker = new Marker() { size = 12 }
    });

var chart = Chart.Plot(scatters);
chart.WithXTitle("Training Time");
chart.WithYTitle("Error");
chart.Display();

Console.WriteLine($"Best Trainer:{result.BestRun.TrainerName}");

### Đánh giá best model

In [None]:
var testResults = result.BestRun.Model.Transform(housing_test);

var trueValues = testResults.GetColumn<float>("median_house_value");
var predictedValues = testResults.GetColumn<float>("Score");

var predictedVsTrue = new Scattergl()
{
    x = trueValues,
    y = predictedValues,
    mode = "markers",
};

var maximumValue = Math.Max(trueValues.Max(), predictedValues.Max());

var perfectLine = new Scattergl()
{
    x = new[] {0, maximumValue},
    y = new[] {0, maximumValue},
    mode = "lines",
};

var chart = Chart.Plot(new[] {predictedVsTrue, perfectLine });
chart.WithXTitle("True Values");
chart.WithYTitle("Predicted Values");
chart.WithLegend(false);
chart.Width = 600;
chart.Height = 600;
chart.Display();