# Sensors

### Install packages

In [1]:
#r "nuget:Microsoft.ML,1.4.0"
#r "nuget:Octokit, 0.32.0"
#r "nuget:MathNet.Numerics, 4.9.1"

In [3]:
using Octokit;
using XPlot.Plotly;
using System;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;
using MathNet.Numerics.Statistics;
using System.Collections.Immutable;

In [4]:
private const string DATASET_PATH = "./sensors_data.csv";

private static readonly MLContext mlContext = new MLContext(2020);

### Declare data models

In [5]:
public class ModelInput
{
    [ColumnName("Temperature"), LoadColumn(0)]
    public float Temperature { get; set; }

    [ColumnName("Luminosity"), LoadColumn(1)]
    public float Luminosity { get; set; }

    [ColumnName("Infrared"), LoadColumn(2)]
    public float Infrared { get; set; }

    [ColumnName("Distance"), LoadColumn(3)]
    public float Distance { get; set; }

    [ColumnName("CreatedAt"), LoadColumn(4)]
    public string CreatedAt { get; set; }

    [ColumnName("Label"), LoadColumn(5)]
    public string Label { get; set; }
}

public class ModelOutput
{
    [ColumnName("PredictedLabel")]
    public string PredictedLabel;

    [ColumnName("Score")]
    public float[] Score;
}

### Load data

In [6]:
IDataView data = mlContext.Data.LoadFromTextFile<ModelInput>(
    path: DATASET_PATH,
    hasHeader: true,
    separatorChar: ',');

### Shuffle and split data to train and test by a fraction of 0.2

In [7]:
var shuffledData = mlContext.Data.ShuffleRows(data, seed: 2020);
var split = mlContext.Data.TrainTestSplit(shuffledData, testFraction: 0.2);
var trainingData = split.TrainSet;
var testingData = split.TestSet;

In [8]:
public class CustomInputRow
{
    public string CreatedAt;
}

public class CustomOutputRow
{
    public string Day;
    public string Month;
}

In [9]:
Action<CustomInputRow, CustomOutputRow> parseDateTime = (input, output) =>
{
    output.Day = (DateTime.Parse(input.CreatedAt)).Date.DayOfWeek.ToString();
    output.Month = (DateTime.Parse(input.CreatedAt)).Date.Month.ToString();
};

### Concatenate features

In [10]:
var featureColumns = new string[] { "Temperature", "Luminosity", "Infrared", "Distance" };
var preprocessingPipeline = mlContext.Transforms.Conversion.MapValueToKey("Label")
    .Append(mlContext.Transforms.CustomMapping(parseDateTime, null))
    .Append(mlContext.Transforms.Concatenate("Features", featureColumns))
    .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"));

### Plot diagram

In [26]:
var normalizedData = preprocessingPipeline.Fit(trainingData).Transform(trainingData);
var features = normalizedData.GetColumn<float[]>(normalizedData.Schema["Features"]).ToArray();
var featuresTemperatures = features.Select(f => f[0]);
var featuresLuminosities = features.Select(f => f[1]);
var featuresInfrareds = features.Select(f => f[2]);
var featuresDistances = features.Select(f => f[3]);

var categoriesDiagram = Chart.Plot(new[] {
    new Graph.Box { y = featuresTemperatures, name = "Temperature" },
    new Graph.Box { y = featuresLuminosities, name = "Luminosity" },
    new Graph.Box { y = featuresInfrareds, name = "Infrared" },
    new Graph.Box { y = featuresDistances, name = "Distance" }
});
var layout = new Layout.Layout()
{
    title = "Segmentation box plot - normalized"
};
categoriesDiagram.WithLayout(layout);
display(categoriesDiagram);

### Build model pipeline

In [12]:
var modelPipeline = preprocessingPipeline
    .Append(mlContext.MulticlassClassification.Trainers.SdcaNonCalibrated("Label", "Features"));

### Map key to value

In [13]:
var postprocessingPipeline = modelPipeline.Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

### Train the model

In [14]:
var model = postprocessingPipeline.Fit(trainingData);

### Evaluate the model

In [15]:
var predictions = model.Transform(testingData);
var metrics = mlContext.MulticlassClassification.Evaluate(predictions, "Label", "Score", "PredictedLabel");

display($"MacroAccuracy: {metrics.MacroAccuracy:0.000} (the closer to 1, the better)");
display($"MicroAccuracy: {metrics.MicroAccuracy:0.000} (the closer to 1, the better)");
display($"LogLoss:       {metrics.LogLoss:0.000} (the closer to 0, the better)");
display($"LogLoss by class:");
for (int i = 0; i < metrics.PerClassLogLoss.Count; i++)
{
  display($"            {i + 1}: {metrics.PerClassLogLoss[i]:#0.000}");
}

MacroAccuracy: 0.912 (the closer to 1, the better)

MicroAccuracy: 0.947 (the closer to 1, the better)

LogLoss:       0.004 (the closer to 0, the better)

LogLoss by class:

            1: 0.000

            2: 0.000

            3: 0.000

            4: 0.039

### Get weights and biases

In [16]:
var modelForContributions = modelPipeline.Fit(trainingData);
var modelParameters = modelForContributions.Last() as MulticlassPredictionTransformer<LinearMulticlassModelParameters>;
VBuffer<float>[] weights = default;
modelParameters.Model.GetWeights(ref weights, out int numClasses);
var weightsArray = weights.Select(w => (w as VBuffer<float>?).Value.DenseValues());
display(weightsArray);
var biases = modelParameters.Model.GetBiases();
display(biases);

index,Unnamed: 1
0,"[ -5.916448, -12.011368, -14.804254, 1.0057353 ]"
1,"[ 8.418186, 10.075042, 15.954171, -0.3886034 ]"
2,"[ -3.20351, 4.3624053, -16.292542, -1.000746 ]"
3,"[ -0.59928787, -3.5768526, 15.043341, 0.5018154 ]"


index,value
0,6.452884
1,-11.257763
2,2.09163
3,1.1378928


In [25]:
var transformedData1 = modelForContributions.Transform(trainingData);
var linearPredictor1 = modelForContributions.LastTransformer;
var pfi1 = mlContext.MulticlassClassification.PermutationFeatureImportance(linearPredictor1, transformedData1, permutationCount: 3);
var sortedMetrics = pfi1.Select((metrics, index) => new { index, metrics.MacroAccuracy })
    .OrderBy(feature => Math.Abs(feature.MacroAccuracy.Mean)).Select(feature => feature.MacroAccuracy.Mean);

var pfiDiagram = Chart.Plot(new[] {
    new Graph.Bar { x = sortedMetrics, y = featureColumns.Reverse(), orientation = "h", name = "PFI" }
});
var layout = new Layout.Layout()
{
    title = "PFI diagram (Permutation Feature Importance)"
};
pfiDiagram.WithLayout(layout);
display(pfiDiagram);

In [23]:
var correlationMatrix = new List<List<double>>();
correlationMatrix.Add(featuresTemperatures.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresLuminosities.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresInfrareds.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresDistances.Select(x => (double)x).ToList());

var length = featureColumns.Length;

var z = new double[length, length];
for (int x = 0; x < length; ++x)
{
    for (int y = 0; y < length - 1 - x; ++y)
    {
        var seriesA = correlationMatrix[x];
        var seriesB = correlationMatrix[length - 1 - y];

        var value = Correlation.Pearson(seriesA, seriesB);

        z[x, y] = value;
        z[length - 1 - y, length - 1 - x] = value;
    }

    z[x, length - 1 - x] = 1;
}

var histogramCorrelationMatrix = Chart.Plot(
    new Graph.Heatmap 
    {
        x = featureColumns,
        y = featureColumns.Reverse(),
        z = z,
        zmin = -1,
        zmax = 1
    }
);

var layout = new Layout.Layout(){ title = "Correlation Matrix", margin { left = 100, right = 100, top = 100, bottom = 100 }};
histogramCorrelationMatrix.WithLayout(layout);
display(histogramCorrelationMatrix);

Unhandled exception: (37,72): error CS1003: Syntax error, ',' expected

### Confusion Matrix

In [None]:
display(metrics.ConfusionMatrix.GetFormattedConfusionTable());

In [None]:
display(metrics.ConfusionMatrix);

In [None]:
#r "nuget:Microsoft.Data.Analysis,0.3.0"
using Microsoft.Data.Analysis;
using Microsoft.AspNetCore.Html;

In [None]:
Formatter<ConfusionMatrix>.Register((df, writer) =>
{
    var rows = new List<IHtmlContent>();

    var cells = new List<IHtmlContent>();
    var n = df.Counts[0][0] + df.Counts[0][1] + df.Counts[1][0] + df.Counts[1][1];
    cells.Add(td[rowspan: 2, colspan: 2, style: "text-align: center; background-color: transparent"]("n = " + n));
    cells.Add(td[colspan: 2, style: "border: 1px solid black; text-align: center; padding: 24px; background-color: lightsteelblue"](b("Predicted")));
    rows.Add(tr[style: "background-color: transparent"](cells));
    
    cells = new List<IHtmlContent>();
    cells.Add(td[style:"border: 1px solid black; padding: 24px; background-color: #E3EAF3"](b("True")));
    cells.Add(td[style:"border: 1px solid black; padding: 24px; background-color: #E3EAF3"](b("False")));
    rows.Add(tr[style: "background-color: transparent"](cells));
    
    cells = new List<IHtmlContent>();
    cells.Add(td[rowspan: 2, style:"border: 1px solid black; text-align: center; padding: 24px;  background-color: lightsteelblue"](b("Actual")));
    cells.Add(td[style:"border: 1px solid black; text-align: center; padding: 24px; background-color: #E3EAF3"](b("True")));    
    cells.Add(td[style:"border: 1px solid black; padding: 24px"](df.Counts[0][0]));
    cells.Add(td[style:"border: 1px solid black; padding: 24px"](df.Counts[0][1]));
    rows.Add(tr[style: "background-color: transparent"](cells));
    
    cells = new List<IHtmlContent>();
    cells.Add(td[style:"border: 1px solid black; text-align: center; padding: 24px; background-color: #E3EAF3"](b("False")));
    cells.Add(td[style:"border: 1px solid black; padding: 24px"](df.Counts[1][0]));
    cells.Add(td[style:"border: 1px solid black; padding: 24px"](df.Counts[1][1]));
    rows.Add(tr(cells));

    var t = table(
        tbody(
            rows));

    writer.Write(t);
}, "text/html");

In [None]:
display(metrics.ConfusionMatrix);

In [None]:
var scatterMatrixDistanceTemperature = Chart.Plot(
    new Graph.Scatter 
    {
        x = featuresTemperatures,
        y = featuresLuminosities,
        mode = "markers",
        marker = new Graph.Marker()
        {
            color = "red",
            colorscale = "Jet"
        }
    }
);
display(scatterMatrixDistanceTemperature);