In [2]:
#r "nuget:Microsoft.ML,1.3.1"
#r "nuget:XPlot.Plotly,2.0.0"
#r "D:\Development\VisualStudio\ITMO\ML\Bayes\bin\Release\netstandard2.1\Bayes.dll"

using XPlot.Plotly;
using System.IO;
using System.Linq;
using Bayes;

Installed package XPlot.Plotly version 2.0.0

Installed package Microsoft.ML version 1.3.1

In [3]:
private static List<Message> ParsePart(String partFolderPath, int n)
        {
            List<Message> messages = new List<Message>();
 
            foreach (String dataFile in Directory.EnumerateFiles(partFolderPath))
            {
                var data = File
                    .ReadAllLines(dataFile)
                    .SelectMany(k => k
                        .Trim('\n', ' ')
                        .Split(' '))
                    .Where(k => !String.IsNullOrWhiteSpace(k))
                    .Skip(1)
                    .Select(Int32.Parse)
                    .ToArray();

                var message = new Message(data, dataFile.Contains("spmsg"), n);
                messages.Add(message);
            }

            return messages;
        }

In [4]:
string dataFolderPath = @"D:\RandomTrash\Bayes";
var dataPartPaths = Directory.GetDirectories(dataFolderPath);
List<List<Message>> dataSet = dataPartPaths
                .Select(k => ParsePart(k, 2))
                .ToList();

int spamCount = dataSet.Select(k => k.Count(e => e.IsSpam)).Sum();
int normalCount = dataSet.Select(k => k.Count(e => !e.IsSpam)).Sum();
(double rightStep, double upStep) = ((double) 1 / spamCount, (double)1 / normalCount);
var confusionMatrix = CrossValidation.Validate(dataSet, 10, 0.015, 2.5, 1, out var rocData);
rocData = rocData.OrderBy(k => k.Item1).ToList();

List<double> xValues = new List<Double>{0};
List<double> yValues = new List<Double>{0};

foreach (var tuple in rocData)
{
    if (tuple.Item2 == 0)
    {
        xValues.Add(xValues[^1]);
        yValues.Add(yValues[^1] + upStep);
    }
    else
    {
        yValues.Add(yValues[^1]);
        xValues.Add(xValues[^1] + rightStep);
    }
}

In [5]:
var scatters = new List<Graph.Scatter>
{
    new Graph.Scatter{name = "Naive Bayes", y = yValues, x = xValues},
    new Graph.Scatter
    {
        name = "Random",
        y = Enumerable.Range(1, 1000).Select(k => (double) k / 1000),
        x = Enumerable.Range(1, 1000).Select(k => (double)k / 1000)
    },
};
var chart = Chart.Plot(scatters);
chart.WithHeight(400);
chart.WithWidth(400);
display(chart);

In [7]:
List<Double> x = new List<Double>();
List<Double> y = new List<Double>();
List<Double> y2 = new List<Double>();
for (Double i = 1; i < 2.5; i+= 0.05)
{
    var confusionMatrix = CrossValidation.Validate(dataSet, 10, 0.015, i, 1, out var rocData);
    x.Add(i);
    y.Add(confusionMatrix.Precision(0));
    y2.Add(confusionMatrix.Precision(1));
}

scatters = new List<Graph.Scatter>
{
    new Graph.Scatter{y = y, x = x, y0 = 0, dy = 0.1, name = "Normal messages precision"},
    new Graph.Scatter{y = y2, x = x, y0 = 0, dy = 0.1, name = "Spam messages precision"},
};
var chart2 = Chart.Plot(scatters);
display(chart2);