In [None]:
#r "nuget:Microsoft.ML,1.4.0-preview"
#r "nuget:Microsoft.ML.FastTree, 1.4.0-preview"
#r "nuget:Microsoft.ML.AutoML"
#r "nuget:Microsoft.Data.DataFrame,0.1.1-e190920-1"

#### Register a HTML formatter for the DataFrame

In [None]:
using Microsoft.Data;
using XPlot.Plotly;

In [None]:
using Microsoft.AspNetCore.Html;
Formatter<DataFrame>.Register((df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c)));
    var rows = new List<List<IHtmlContent>>();
    var take = 20;
    for (var i = 0; i < Math.Min(take, df.RowCount); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df[i])
        {
            cells.Add(td(obj));
        }
        rows.Add(cells);
    }
    
    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));
    
    writer.Write(t);
}, "text/html");

#### Fetch the data

In [None]:
using System.IO;
using System.Net.Http;

string dataPath = "data.csv";

if (!File.Exists(dataPath))
{
    var contents = new HttpClient()
        .GetStringAsync("https://aslottepublic.blob.core.windows.net/public/data-small.csv").Result;
        
    File.WriteAllText("data.csv", contents);
}

In [None]:
var dataFrame = DataFrame.ReadCsv(dataPath);
dataFrame

#### Explore the data

In [None]:
dataFrame.Description()

In [None]:
Chart.Plot(
    new Graph.Histogram()
    {
        x = dataFrame["amount"],
        nbinsx = 100
    }
)

In [None]:
Chart.Plot(
    new Graph.Histogram()
    {
        x = dataFrame["newbalanceOrig"],
        nbinsx = 100
    }
)

#### Train your model

In [None]:
using Microsoft.ML;
using Microsoft.ML.Trainers.FastTree;
using Microsoft.ML.Data;

In [None]:
internal sealed class Transaction
{
        [ColumnName("step"), LoadColumn(0)]
        public float Step { get; set; }

        [ColumnName("type"), LoadColumn(1)]
        public string Type { get; set; }

        [ColumnName("amount"), LoadColumn(2)]
        public float Amount { get; set; }

        [ColumnName("nameOrig"), LoadColumn(3)]
        public string NameOrig { get; set; }

        [ColumnName("oldbalanceOrg"), LoadColumn(4)]
        public float OldbalanceOrg { get; set; }

        [ColumnName("newbalanceOrig"), LoadColumn(5)]
        public float NewbalanceOrig { get; set; }

        [ColumnName("nameDest"), LoadColumn(6)]
        public string NameDest { get; set; }

        [ColumnName("oldbalanceDest"), LoadColumn(7)]
        public float OldbalanceDest { get; set; }

        [ColumnName("newbalanceDest"), LoadColumn(8)]
        public float NewbalanceDest { get; set; }

        [ColumnName("isFraud"), LoadColumn(9)]
        public bool IsFraud { get; set; }

        [ColumnName("isFlaggedFraud"), LoadColumn(10)]
        public float IsFlaggedFraud { get; set; }
}

#### Load the data

In [None]:
var mlContext = new MLContext(seed: 1);

var data = mlContext.Data.LoadFromTextFile<Transaction>(dataPath, hasHeader: true, separatorChar: ',');
var testTrainData = mlContext.Data.TrainTestSplit(data);

#### Create a data processing pipeline

In [None]:
var dataProcessingPipeline = mlContext.Transforms.Categorical.OneHotEncoding("type")
    .Append(mlContext.Transforms.Categorical.OneHotHashEncoding("nameDest"))
    .Append(mlContext.Transforms.Concatenate("Features", "type", "nameDest", "amount", "oldbalanceOrg", "oldbalanceDest", "newbalanceOrig", "newbalanceDest")
    .Append(mlContext.Transforms.NormalizeMinMax("Features")));

#### Create a training pipeline

In [None]:
var trainingPipeline = dataProcessingPipeline.Append(mlContext.BinaryClassification.Trainers.FastTree(
new FastTreeBinaryTrainer.Options 
{ 
    NumberOfLeaves = 10, 
    NumberOfTrees = 10, 
    LabelColumnName = "isFraud", 
    FeatureColumnName = "Features" 
}));

#### Train our model

In [None]:
var trainedModel = trainingPipeline.Fit(testTrainData.TrainSet);

#### Evaluate performance

In [None]:
var predictions = trainedModel.Transform(testTrainData.TestSet);

var metrics = mlContext.BinaryClassification.Evaluate(predictions, labelColumnName: "isFraud");
display(metrics)

## AutoML

In [None]:
%%time 

using Microsoft.ML.AutoML;

var result = mlContext.Auto()
                .CreateBinaryClassificationExperiment(50)
                .Execute(testTrainData.TrainSet, labelColumnName: "isFraud");

In [None]:
var scatters = result.RunDetails.Where(d => d.ValidationMetrics != null).GroupBy(    
    r => r.TrainerName,
    (name, details) => new Graph.Scatter()
    {
        name = name,
        x = details.Select(r => r.RuntimeInSeconds),
        y = details.Select(r => r.ValidationMetrics.Accuracy),
        mode = "markers",
        marker = new Graph.Marker() { size = 12 }
    });

var chart = Chart.Plot(scatters);
chart.WithXTitle("Training Time");
chart.WithYTitle("Accuracy");
display(chart);

Console.WriteLine($"Best Trainer:{result.BestRun.TrainerName}");

#### Evaluate AutoML

In [None]:
var predictions = result.BestRun.Model.Transform(testTrainData.TestSet);
var metrics = mlContext.BinaryClassification.Evaluate(predictions, labelColumnName: "isFraud");
display(metrics)

#### Save

In [None]:
mlContext.Model.Save(trainedModel, data.Schema, "MLModel.zip");