### Binary classification using a DataFrame

**Rationale**: Predict the sentiment of customer reviews

In [None]:
// ML.NET Nuget packages 
#r "nuget:Microsoft.ML,1.4.0"  
    
// Install XPlot package
#r "nuget:XPlot.Plotly,2.0.0"
    
// Install DataFrame
#r "nuget:Microsoft.Data.Analysis"
    
using Microsoft.ML;
using Microsoft.ML.Data;
using XPlot.Plotly;
using Microsoft.Data.Analysis;    

In [None]:
//This is boilerplate code for formatting the DataFrame

using Microsoft.AspNetCore.Html;
Formatter<DataFrame>.Register((df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));
    var rows = new List<List<IHtmlContent>>();
    var take = 20;
    for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df.Rows[i])
        {
            cells.Add(td(obj));
        }
        rows.Add(cells);
    }

    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));

    writer.Write(t);
}, "text/html");

In [None]:
var dataPath = "../Datasets/Binary-Classification Sentiment/product_reviews.csv";
var df = DataFrame.LoadCsv(dataPath);

display(df.Info());

#### Data exploration

In [None]:
display(df.Head(3));

#### Plotting

In [None]:
//We group by sentiment (0 or 1) and count the number of each type. 
var groupedBySentiment = df.GroupBy("Sentiment")
                            .Count()
                            .Sort("Sentiment")
                            ["Summary"];

//The DataFrame stores the counts as type object
var countNegative = Convert.ToInt32(groupedBySentiment[0]);
var countPositive = Convert.ToInt32(groupedBySentiment[1]);

var values = new List<int> { countNegative, countPositive };
var labels = new List<string> { "Negative", "Positive" };

var pieChart = Chart.Plot(new Graph.Pie 
{
    labels = labels,
    values = values
});

pieChart

#### Split our data

In [None]:
//We need to convert the Sentiment columns 0s and 1s to a boolean type
var sentimentValues = new List<bool>();
foreach (var row in df.Rows)
{
    sentimentValues.Add(Convert.ToBoolean(row[0]));
}

df.Columns.Remove(df["Sentiment"]);
df.Columns.Add(new PrimitiveDataFrameColumn<bool>("Sentiment", sentimentValues));

In [None]:
var mlContext = new MLContext(seed: 1);

var trainTestSplit = mlContext.Data.TrainTestSplit(df);

#### Data Transformation

In [None]:
var dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText("Review")
        .Append(mlContext.Transforms.CopyColumns("Features", "Review"))
        .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"));

In [None]:
var trainer = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(labelColumnName: "Sentiment", featureColumnName: "Features");

var trainingPipeline = dataProcessPipeline.Append(trainer);

##### Train the model

In [None]:
ITransformer model = trainingPipeline.Fit(trainTestSplit.TrainSet);

#### Evaluate the model

In [None]:
var predicitions = model.Transform(trainTestSplit.TestSet);
var metrics = mlContext.BinaryClassification.Evaluate(predicitions, labelColumnName: "Sentiment");

display("Recall:" + " " + metrics.PositiveRecall);
display("Accuracy:" + " " + metrics.Accuracy);
display("F1Score:" + " " + metrics.F1Score); 

#### Save the model

In [None]:
mlContext.Model.Save(model, trainTestSplit.TrainSet.Schema, "sentimentClassifier.zip");