In [1]:
#r "nuget:Microsoft.ML"

In [2]:
using System;
using System.IO;
using Microsoft.ML;
using Microsoft.ML.Data;
using static Microsoft.ML.DataOperationsCatalog;

In [3]:
public class SentimentIssue
{
    [LoadColumn(0)]
    public bool Label { get; set; }
    [LoadColumn(2)]
    public string Text { get; set; }
}

In [4]:
public class SentimentPrediction : SentimentIssue
{
    // ColumnName attribute is used to change the column name from
    // its default value, which is the name of the field.
    [ColumnName("PredictedLabel")]
    public bool Prediction { get; set; }

    // No need to specify ColumnName attribute, because the field
    // name "Probability" is the column name we want.
    public float Probability { get; set; }

    public float Score { get; set; }
}

In [5]:
var mlContext = new MLContext();

In [6]:
IDataView dataView = mlContext.Data.LoadFromTextFile<SentimentIssue>("wikiDetoxAnnotated40kRows.tsv", hasHeader: true);

In [7]:
display(dataView.Preview(10).RowView);

index,Values
0,"KeyValuePair<String,Object>[] Label: False Text: "" ==He is a Rapist!!!!!== Please edit the article to include this important fact. Thank You. — Preceding unsigned comment added by • """
1,"KeyValuePair<String,Object>[] Label: False Text: The other two films Hitch and Magnolia are also directly related to the community in question, and may be of interest to those who see those films. So why not link to them?"
2,"KeyValuePair<String,Object>[] Label: False Text: == blocking people == how can i block lambs12 from my page and editing my stuff she's saying bad things she's my sister"
3,"KeyValuePair<String,Object>[] Label: True Text: == Are you a female????? == mootmootmootmootmootmootmootmootmootmoot"
4,"KeyValuePair<String,Object>[] Label: False Text: :::::Good enough for me; thanks for the info. I'll check the Sheffield references later. -"
5,"KeyValuePair<String,Object>[] Label: False Text: == Motherjane concert location == Hi, Motherjane, being a popular band, has headlined a lot of college rock festivals in India. But quoting the name of every college that they have performed is not quite right. So please refrain from adding the names of any more colleges/institutes. Thanks! —"
6,"KeyValuePair<String,Object>[] Label: False Text: == warning == don't vandalize"
7,"KeyValuePair<String,Object>[] Label: False Text: "" :There were reviews from critics in her song? """
8,"KeyValuePair<String,Object>[] Label: False Text: "" Jack, is it a misrepresentation for you to write more than 500,000 nucleotide base pairs, or should you write 582 base pair. Lets start watching """"RNA world"""" page too. """
9,"KeyValuePair<String,Object>[] Label: False Text: "":Thank you, I will use these tool responsibly. (Editor Review) """


In [8]:
TrainTestData trainTestSplit = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
IDataView trainingData = trainTestSplit.TrainSet;
IDataView testData = trainTestSplit.TestSet;

In [9]:
display(trainingData.Preview(10).RowView);

index,Values
0,"KeyValuePair<String,Object>[] Label: False Text: "" ==He is a Rapist!!!!!== Please edit the article to include this important fact. Thank You. — Preceding unsigned comment added by • """
1,"KeyValuePair<String,Object>[] Label: False Text: The other two films Hitch and Magnolia are also directly related to the community in question, and may be of interest to those who see those films. So why not link to them?"
2,"KeyValuePair<String,Object>[] Label: False Text: == blocking people == how can i block lambs12 from my page and editing my stuff she's saying bad things she's my sister"
3,"KeyValuePair<String,Object>[] Label: True Text: == Are you a female????? == mootmootmootmootmootmootmootmootmootmoot"
4,"KeyValuePair<String,Object>[] Label: False Text: :::::Good enough for me; thanks for the info. I'll check the Sheffield references later. -"
5,"KeyValuePair<String,Object>[] Label: False Text: "" :There were reviews from critics in her song? """
6,"KeyValuePair<String,Object>[] Label: False Text: "" Jack, is it a misrepresentation for you to write more than 500,000 nucleotide base pairs, or should you write 582 base pair. Lets start watching """"RNA world"""" page too. """
7,"KeyValuePair<String,Object>[] Label: False Text: "":Thank you, I will use these tool responsibly. (Editor Review) """
8,"KeyValuePair<String,Object>[] Label: False Text: HELLO SITUSH ARE YOU THERE?? PEOPLE HAVE SOME QUESTIONS"
9,"KeyValuePair<String,Object>[] Label: False Text: -FREE MYANMAR!!! STOP THE MILITARY REGIME!!!! PATS1 DOESN't CARE!!! HE ONLY CARES ABOUT FOOTBALL AND ORDER!!!!!!!!!!!!!!!!"


In [10]:
display(testData.Preview(10).RowView);

index,Values
0,"KeyValuePair<String,Object>[] Label: False Text: == Motherjane concert location == Hi, Motherjane, being a popular band, has headlined a lot of college rock festivals in India. But quoting the name of every college that they have performed is not quite right. So please refrain from adding the names of any more colleges/institutes. Thanks! —"
1,"KeyValuePair<String,Object>[] Label: False Text: == warning == don't vandalize"
2,"KeyValuePair<String,Object>[] Label: False Text: ""uranium""""] Inhaled Particles, vol. 2, pp. 819-38, at p. 836."""
3,"KeyValuePair<String,Object>[] Label: False Text: Welcome Hello and welcome to Wikipedia. We appreciate encyclopedic contributions, but some of your recent contributions seem to be advertising or for promotional purposes. Wikipedia does not allow advertising in articles. For more information on this, see *Policy on neutral point of view *Guideline on spam *Guideline on external links *Guideline on conflict of interest If you still have questions, there is a new contributor's help page, or you can write {{helpme}} below this message along with a question and someone will be along to answer it shortly. You may also find the following pages useful for a general introduction to Wikipedia. *The five pillars of Wikipedia *How to edit a page *Help pages *Tutorial *How to write a great article *Manual of Style I hope you enjoy editing Wikipedia! Please sign your name on talk pages using four tildes (~~~~); this will automatically produce your name and the date. Feel free to write a note on the bottom of my talk page if you want to get in touch with me. Again, welcome!"
4,"KeyValuePair<String,Object>[] Label: False Text: racisim. Obviously it has failed.]]"
5,"KeyValuePair<String,Object>[] Label: True Text: FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING BITCHES THAT ARE READING THIS. JASENM222 SUCKS FAT DICK.FUCKING"
6,"KeyValuePair<String,Object>[] Label: False Text: :Yes. I'm doing it often enough, I'll have to create a redirect. —"
7,"KeyValuePair<String,Object>[] Label: False Text: So... the proof there was a massacre rather than a lack of planning as indicated by overwhelming evidence is that Joachim Pieper's troops killed other people? By that definition all armies in the world are responsible for every massacre throughout history. Also, IHR is respecable as a source, it publishes a peer reviewed magazine which is sometimes used under google scholar, just look it up @ scholar.google.com."
8,"KeyValuePair<String,Object>[] Label: False Text: Mccready, I strongly advise you to take Jim Butler's words to heart. He's pointing out the very same problems with your editing that others have pointed out before. If someone disagrees with your edit, do not just keep putting it back. Continued biased editing and excessive reverts can be grounds for a disruption block. We have a high degree of tolerance here, but continuing to try the community's patience is a bad thing."
9,"KeyValuePair<String,Object>[] Label: False Text: == Please help me to stop my page being deleted == Hi I do not want my wiki page (The Celestial hunt) to be deleted. Please help me and tell me fixes about the page."


In [11]:
// STEP 2: Common data process configuration with pipeline data transformations          
var dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText(
    outputColumnName: "Features", 
    inputColumnName: nameof(SentimentIssue.Text));

In [12]:
// STEP 3: Set the training algorithm, then create and config the modelBuilder                            
var trainer = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features");
var trainingPipeline = dataProcessPipeline.Append(trainer);

In [13]:
// STEP 4: Train the model fitting to the DataSet
var trainedModel = trainingPipeline.Fit(trainingData);

In [14]:
display(trainedModel);

index,type,FeatureColumnName,FeatureColumnType,Model
Dimensions,IsKnownSize,ItemType,Size,RawType
SubModel,Calibrator,SubModel,Calibrator,Unnamed: 4_level_2
0,Microsoft.ML.Transforms.Text.TextFeaturizingEstimator+Transformer,,,
1,"Microsoft.ML.Data.BinaryPredictionTransformer<Microsoft.ML.Calibrators.CalibratedModelParametersBase<Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator>>",Features,DimensionsIsKnownSizeItemTypeSizeRawType[ 1018635 ]TrueNumberDataViewType  RawType: System.Single1018635Microsoft.ML.Data.VBuffer<System.Single>,"SubModelCalibratorSubModelCalibratorMicrosoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]"
Dimensions,IsKnownSize,ItemType,Size,RawType
[ 1018635 ],True,NumberDataViewType  RawType: System.Single,1018635,Microsoft.ML.Data.VBuffer<System.Single>
SubModel,Calibrator,SubModel,Calibrator,
"Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]","Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]","Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]","Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]",

Dimensions,IsKnownSize,ItemType,Size,RawType
[ 1018635 ],True,NumberDataViewType  RawType: System.Single,1018635,Microsoft.ML.Data.VBuffer<System.Single>

SubModel,Calibrator,SubModel.1,Calibrator.1
"Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]","Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]","Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]","Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]"


In [15]:
// STEP 5: Evaluate the model and show accuracy stats
var predictions = trainedModel.Transform(testData);
var metrics = mlContext.BinaryClassification.Evaluate(data: predictions, labelColumnName: "Label", scoreColumnName: "Score");

In [16]:
display(trainer.ToString())

Microsoft.ML.Trainers.SdcaLogisticRegressionBinaryTrainer

In [17]:
display(metrics);

LogLoss,LogLossReduction,Entropy,AreaUnderRocCurve,Accuracy,PositivePrecision,PositiveRecall,NegativePrecision,NegativeRecall,F1Score,AreaUnderPrecisionRecallCurve,ConfusionMatrix
0.2224249069615786,0.532475826253963,0.4757506016841851,0.9465779892260632,0.9461883408071748,0.9369369369369368,0.5073170731707317,0.9467299578059072,0.9961154273029966,0.6582278481012658,0.7916849357061797,"ConfusionMatrix  PerClassPrecision: [ 0.9369369369369369, 0.9467299578059072 ]  PerClassRecall: [ 0.5073170731707317, 0.9961154273029966 ]  Counts: ReadOnlyCollection<IReadOnlyList<Double>> [ 416, 404 ] [ 28, 7180 ]  NumberOfClasses: 2"


In [22]:
SentimentIssue sampleStatement = new SentimentIssue { Text = "Your service is wonderful.  Thank you." };

In [19]:
var predictionEngine = mlContext.Model.CreatePredictionEngine<SentimentIssue, SentimentPrediction>(trainedModel);

In [23]:
var prediction = predictionEngine.Predict(sampleStatement);

In [24]:
display(prediction);

Prediction,Probability,Score,Label,Text
False,0.16967155,-1.5879568,False,Your service is wonderful. Thank you.
