### Multi-class classification using a DataFrame

**Rationale**: Predict the product category based on description

In [None]:
// ML.NET Nuget packages 
#r "nuget:Microsoft.ML,1.4.0"  
    
//Install XPlot package
#r "nuget:XPlot.Plotly,2.0.0"

// Install DataFrame
#r "nuget:Microsoft.Data.Analysis"
    
using Microsoft.ML;
using Microsoft.ML.Data;
using XPlot.Plotly;
using Microsoft.Data.Analysis; 

In [None]:
//This is boilerplate code for formatting the DataFrame

using Microsoft.AspNetCore.Html;
Formatter<DataFrame>.Register((df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));
    var rows = new List<List<IHtmlContent>>();
    var take = 20;
    for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df.Rows[i])
        {
            cells.Add(td(obj));
        }
        rows.Add(cells);
    }

    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));

    writer.Write(t);
}, "text/html");

In [None]:
var dataPath = "../Datasets/Multi-Class-Classification Category/flipkart_com-ecommerce_sample.csv";
var df = DataFrame.LoadCsv(dataPath);

display(df.Info());

#### Data exploration

In [None]:
var mlContext = new MLContext(seed: 1);

display(df.Head(1));

#### Plotting

In [None]:
var categories = df["category"];

var categoriesHistogram = Chart.Plot(new Graph.Histogram 
    {
        x = categories,  
    });

categoriesHistogram.WithLayout(new Layout.Layout { title="Distribution of categories" });
display(categoriesHistogram);

#### Data Transformation

In [None]:
var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey("category", "category")
                              .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("product_name"))
                              .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("brand"))
                              .Append(mlContext.Transforms.Text.FeaturizeText("description"))
                              .Append(mlContext.Transforms.Concatenate("Features", new[] { "product_name", "brand", "description" }))
                              .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"));

In [None]:
var trainer = mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy("category", "Features")
                .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel", "PredictedLabel"));

var trainingPipeline = dataProcessPipeline.Append(trainer);

##### Cross-validate the model (5-times fold)

In [None]:
var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(df, trainingPipeline, numberOfFolds: 5, labelColumnName: "category");

var avgAccuracy = crossValidationResults.Average(x => x.Metrics.MicroAccuracy);
var metrics = crossValidationResults.OrderByDescending(x => x.Metrics.MicroAccuracy).FirstOrDefault().Metrics;

display("Accuracy:" + " " + avgAccuracy)

##### Train the model

In [None]:
ITransformer model = trainingPipeline.Fit(dataView);

#### Evaluate the model

In [None]:
var confusionMatrix = Chart.Plot(new Graph.Heatmap 
    {
        x = categories.Distinct(),
        y = categories.Distinct().Reverse(),
        z = metrics.ConfusionMatrix.Counts.Reverse(),   
        zmin = 0,
        zmax = 10,
        colorscale = "off"
    });

confusionMatrix.WithLayout(new Layout.Layout { title="Confusion Matrix" });
confusionMatrix

#### Save the model

In [None]:
mlContext.Model.Save(model, dataView.Schema, "productCategoryClassifier.zip");