### Binary classification

**Rationale**: Predict the sentiment of customer reviews

In [1]:
// ML.NET Nuget packages 
#r "nuget:Microsoft.ML,1.4.0-preview2"  
    
//Install XPlot package
#r "nuget:XPlot.Plotly,2.0.0"

//Install Databaseloader
#r "nuget:Microsoft.ML.Experimental,0.16.0-preview"
#r "nuget:System.Data.SqlClient,4.6.0"

using Microsoft.ML;
using Microsoft.ML.Data;
using XPlot.Plotly;
using System.Data.SqlClient;

In [5]:
using Microsoft.ML.Data;

public class ProductReview 
{   
    [LoadColumn(0)]
    public bool Sentiment;
       
    [LoadColumn(1)]
    public string Review;     
}

In [3]:
var mlContext = new MLContext(seed: 1);

var loader = mlContext.Data.CreateDatabaseLoader<ProductReview>();

string connectionString = @"Data Source=DESKTOP-H8AH0LO\SQLEXPRESS;Initial Catalog=mlnet;Integrated Security=True";
string sqlCommand = "SELECT Sentiment, Review FROM Product";

var dbSource = new DatabaseSource(SqlClientFactory.Instance, connectionString, sqlCommand);

IDataView dataView = loader.Load(dbSource);

Unhandled Exception: Could not load file or assembly 'System.Data.SqlClient, Version=4.5.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a'. Reference assemblies should not be loaded for execution.  They can only be loaded in the Reflection-only loader context. (0x80131058)

In [6]:
var dataPath = "./Datasets/Binary-Classification Sentiment/product_reviews.csv";

var mlContext = new MLContext(seed: 1);

IDataView dataView = mlContext.Data.LoadFromTextFile<ProductReview>(dataPath, hasHeader: true, separatorChar: ',');

#### Data exploration

In [7]:
var rows = mlContext.Data.CreateEnumerable<ProductReview>(dataView, reuseRowObject: false)
                    .Take(5)
                    .ToList();

display(rows);

index,Sentiment,Review
0,True,I like the item pricing. My granddaughter wanted to mark on it but I wanted it just for the letters.
1,True,Love the magnet easel... great for moving to different areas... Wish it had some sort of non skid pad on bottom though...
2,True,"""Both sides are magnetic. A real plus when you're entertaining more than one child. The four-year old can find the letters for the words"
3,True,"""Bought one a few years ago for my daughter and she loves it"
4,True,I have a stainless steel refrigerator therefore there are not much space for my son to play with his magnet. Brought this for him to put his magnet on. He enjoys sticking his magnet on it. Great to have so he can play with his alphabet magnets.


#### Plotting

In [8]:
var countPositiveSentiment = dataView.GetColumn<bool>("Sentiment").Count(x => x == true);
var countNegativeSentiment = dataView.GetColumn<bool>("Sentiment").Count(x => x == false);

var values = new List<int> { countPositiveSentiment, countNegativeSentiment};
var labels = new List<string> { "Positive", "Negative" };

var pieChart = Chart.Plot(new Graph.Pie 
{
    labels = labels,
    values = values
});

pieChart

#### Split our data

In [9]:
var trainTestSplit = mlContext.Data.TrainTestSplit(dataView);

#### Data Transformation

In [10]:
var dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText("Review")
        .Append(mlContext.Transforms.CopyColumns("Features", "Review"))
        .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"));

In [12]:
var trainer = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(labelColumnName: "Sentiment", featureColumnName: "Features");

var trainingPipeline = dataProcessPipeline.Append(trainer);

##### Train the model

In [13]:
ITransformer model = trainingPipeline.Fit(trainTestSplit.TrainSet);

#### Evaluate the model

In [17]:
var predicitions = model.Transform(trainTestSplit.TestSet);
var metrics = mlContext.BinaryClassification.Evaluate(predicitions, labelColumnName: "Sentiment");

display("Accuracy:" + " " + metrics.Accuracy);
display("F1Score:" + " " + metrics.F1Score); 

Accuracy: 0.9378064824781724

F1Score: 0.9675790261238231

#### Save the model

In [18]:
mlContext.Model.Save(model, trainTestSplit.TrainSet.Schema, "sentimentClassifier.zip");