# Sensors

### Install packages

In [60]:
#r "nuget:Microsoft.ML,1.4.0"
using XPlot.Plotly;
using System;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;

### Declare data models

In [61]:
public class ModelInput
{
    [ColumnName("Temperature"), LoadColumn(0)]
    public float Temperature { get; set; }

    [ColumnName("Luminosity"), LoadColumn(1)]
    public float Luminosity { get; set; }

    [ColumnName("Infrared"), LoadColumn(2)]
    public float Infrared { get; set; }

    [ColumnName("Distance"), LoadColumn(3)]
    public float Distance { get; set; }

    [ColumnName("CreatedAt"), LoadColumn(4)]
    public string CreatedAt { get; set; }

    [ColumnName("Label"), LoadColumn(5)]
    public string Source { get; set; }
}

public class ModelOutput
{
    [ColumnName("PredictedLabel")]
    public string PredictedLabel;

    [ColumnName("Score")]
    public float[] Score;
}

### Instantiate the ML context

In [62]:
private static readonly MLContext mlContext = new MLContext(2020);

### Load data

In [63]:
private const string DATASET_PATH = "./sensors_data.csv";
IDataView data = mlContext.Data.LoadFromTextFile<ModelInput>(
    path: DATASET_PATH,
    hasHeader: true,
    separatorChar: ',');

### Shuffle and split data to train and test by a fraction of 0.2

In [64]:
var shuffledData = mlContext.Data.ShuffleRows(data, seed: 2020);
var split = mlContext.Data.TrainTestSplit(shuffledData, testFraction: 0.2);
var trainingData = split.TrainSet;
var testingData = split.TestSet;

In [65]:
var features = mlContext.Data.CreateEnumerable<ModelInput>(trainingData, true);
display(features.Take(10)); // be carefull here not to display the entire dataset

index,Temperature,Luminosity,Infrared,Distance,CreatedAt,Source
0,32.18,6.84,0.0,28.69,01/03/2020 10:31:27,Day
1,115.0,100.0,43.85,195.19,01/03/2020 10:26:04,Lighter
2,30.89,73.44,0.0,24.32,01/03/2020 10:33:01,FlashLight
3,81.48,100.0,93.46,400.0,01/03/2020 10:27:54,Lighter
4,32.18,6.64,0.0,76.97,01/03/2020 10:31:06,Day
5,24.28,14.45,0.0,17.49,01/03/2020 10:23:42,FlashLight
6,27.34,70.02,0.0,193.76,01/03/2020 10:25:27,FlashLight
7,23.48,87.6,0.0,343.57,01/03/2020 10:22:23,FlashLight
8,34.11,100.0,92.38,7.16,01/03/2020 10:28:07,Lighter
9,31.86,57.52,0.0,400.0,01/03/2020 10:33:25,FlashLight


### Categories

In [66]:
var categories = trainingData.GetColumn<string>("Label");
var categoriesHistogram = Chart.Plot(
    new Graph.Histogram 
    {
        x = categories
    }
);
display(categoriesHistogram);

### Plot diagram

In [67]:
var featuresTemperatures = features.Select(f => f.Temperature);
var featuresLuminosities = features.Select(f => f.Luminosity);
var featuresInfrareds = features.Select(f => f.Infrared);
var featuresDistances = features.Select(f => f.Distance);

var categoriesDiagram = Chart.Plot(new[] {
    new Graph.Box { y = featuresTemperatures, name = "Temperature" },
    new Graph.Box { y = featuresLuminosities, name = "Luminosity" },
    new Graph.Box { y = featuresInfrareds, name = "Infrared" },
    new Graph.Box { y = featuresDistances, name = "Distance" }
});
var layout = new Layout.Layout()
{
    title = "Segmentation box plot"
};
categoriesDiagram.WithLayout(layout);
display(categoriesDiagram);

In [68]:
#r "nuget:MathNet.Numerics, 4.9.0"
using MathNet.Numerics.Statistics;

In [69]:
var featureColumns = new string[] { "Temperature", "Luminosity", "Infrared", "Distance" };

var correlationMatrix = new List<List<double>>();
correlationMatrix.Add(featuresTemperatures.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresLuminosities.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresInfrareds.Select(x => (double)x).ToList());
correlationMatrix.Add(featuresDistances.Select(x => (double)x).ToList());

var length = featureColumns.Length;

var z = new double[length, length];
for (int x = 0; x < length; ++x)
{
    for (int y = 0; y < length - 1 - x; ++y)
    {
        var seriesA = correlationMatrix[x];
        var seriesB = correlationMatrix[length - 1 - y];

        var value = Correlation.Pearson(seriesA, seriesB);

        z[x, y] = value;
        z[length - 1 - y, length - 1 - x] = value;
    }

    z[x, length - 1 - x] = 1;
}

var correlationMatrixHeatmap = Chart.Plot(
    new Graph.Heatmap 
    {
        x = featureColumns,
        y = featureColumns.Reverse(),
        z = z,
        zmin = -1,
        zmax = 1
    }
);

var layout = new Layout.Layout()
{
    autosize = "true", 
    margin =  new Graph.Margin{ l = 90 },
    title = "Correlation Matrix"
};
correlationMatrixHeatmap.WithLayout(layout);
display(correlationMatrixHeatmap);

### Pre-processing pipeline

In [70]:
var preprocessingPipeline = mlContext.Transforms.Conversion.MapValueToKey("Label")
    .Append(mlContext.Transforms.Concatenate("Features", featureColumns))
    .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"));

### Model builder pipeline

In [71]:
var modelPipeline = preprocessingPipeline
    .Append(mlContext.MulticlassClassification.Trainers.SdcaNonCalibrated("Label", "Features"));

### Post-processing pipeline

In [72]:
var postprocessingPipeline = modelPipeline
    .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));

### Train the model

In [73]:
var model = postprocessingPipeline.Fit(trainingData);

In [74]:
var normalizedData = preprocessingPipeline.Fit(trainingData).Transform(trainingData);
var features = normalizedData.GetColumn<float[]>(normalizedData.Schema["Features"]).ToArray();
var featuresTemperatures = features.Select(f => f[0]);
var featuresLuminosities = features.Select(f => f[1]);
var featuresInfrareds = features.Select(f => f[2]);
var featuresDistances = features.Select(f => f[3]);

var histogramFeatures = Chart.Plot(new[] {
    new Graph.Box { y = featuresTemperatures, name = "Temperature" },
    new Graph.Box { y = featuresLuminosities, name = "Luminosity" },
    new Graph.Box { y = featuresInfrareds, name = "Infrared" },
    new Graph.Box { y = featuresDistances, name = "Distance" }
});
display(histogramFeatures);

### Cross validate

In [75]:
var crossValidationResults = mlContext.MulticlassClassification.CrossValidate(trainingData, postprocessingPipeline, numberOfFolds: 5, labelColumnName: "Label");
var metricsInMultipleFolds = crossValidationResults.Select(r => r.Metrics);

In [76]:
var accuracyValues = metricsInMultipleFolds.Select(m => m.MicroAccuracy);
var average = accuracyValues.Average();
//var stdDev = CalculateStandardDeviation(accuracyValues);
//var confInt = CalculateConfidenceInterval95(accuracyValues);
//var microAccuracyValues = ExtractMetrics(metricsInMultipleFolds.Select(m => m.MicroAccuracy));

//var macroAccuracyValues = ExtractMetrics(metricsInMultipleFolds.Select(m => m.MacroAccuracy));

//var logLossValues = ExtractMetrics(metricsInMultipleFolds.Select(m => m.LogLoss));

//var logLossReductionValues = ExtractMetrics(metricsInMultipleFolds.Select(m => m.LogLossReduction));

In [77]:
display(metricsInMultipleFolds);

index,LogLoss,LogLossReduction,MacroAccuracy,MicroAccuracy,TopKAccuracy,TopKPredictionCount,PerClassLogLoss,ConfusionMatrix
0,0.306801717107583,0.7713771429996593,0.975,0.9672131147540984,0,0,"[ 0, 0, 0.9357452371781282, 0 ]","{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.9069767441860465, 1, 1, 1 ], PerClassRecall: [ 1, 1, 0.9, 1 ], Counts: [ [ 39, 0, 0, 0 ], [ 0, 22, 0, 0 ], [ 4, 0, 36, 0 ], [ 0, 0, 0, 21 ] ], NumberOfClasses: 4 }"
1,0.0091096421120688,0.9929946277353782,0.9708538587848932,0.9664429530201344,0,0,"[ 0, 0, 0, 0.0646350797475363 ]","{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.9019607843137255, 1, 1, 1 ], PerClassRecall: [ 1, 1, 0.9310344827586207, 0.9523809523809523 ], Counts: [ [ 46, 0, 0, 0 ], [ 0, 24, 0, 0 ], [ 4, 0, 54, 0 ], [ 1, 0, 0, 20 ] ], NumberOfClasses: 4 }"
2,0.0094611091401838,0.9926541969872464,0.968121693121693,0.9603174603174603,0,0,"[ 0, 0, 0.028383327420551437, 0 ]","{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.9767441860465116, 1, 0.9111111111111111, 1 ], PerClassRecall: [ 0.9333333333333333, 1, 0.9761904761904762, 0.9629629629629629 ], Counts: [ [ 42, 0, 3, 0 ], [ 0, 12, 0, 0 ], [ 1, 0, 41, 0 ], [ 0, 0, 1, 26 ] ], NumberOfClasses: 4 }"
3,0.0,1.0,0.9951923076923076,0.992,0,0,"[ 0, 0, 0, 0 ]","{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.9666666666666667, 1, 1, 1 ], PerClassRecall: [ 1, 1, 0.9807692307692307, 1 ], Counts: [ [ 29, 0, 0, 0 ], [ 0, 19, 0, 0 ], [ 1, 0, 51, 0 ], [ 0, 0, 0, 25 ] ], NumberOfClasses: 4 }"
4,0.8315964831403551,0.3843805661716329,0.9563301282051282,0.9618320610687024,0,0,"[ 0, 0, 0.04064323727289541, 4.1149332270110595 ]","{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.9117647058823529, 1, 0.9591836734693877, 1 ], PerClassRecall: [ 1, 1, 0.9791666666666666, 0.8461538461538461 ], Counts: [ [ 31, 0, 0, 0 ], [ 0, 26, 0, 0 ], [ 1, 0, 47, 0 ], [ 2, 0, 2, 22 ] ], NumberOfClasses: 4 }"


### Evaluate the model against the testing data (measure the model performance)

In [78]:
var predictions = model.Transform(testingData);
var metrics = mlContext.MulticlassClassification.Evaluate(predictions, "Label", "Score", "PredictedLabel");
var metricsColumns = new List<string> { "MacroAccuracy", "MicroAccuracy", "LogLoss", "LogLossReduction" };
for (var i = 0; i < metrics.PerClassLogLoss.Count; i++)
{
    metricsColumns.Add($"{featureColumns[i]}LL");
}
var metricsValues = new List<double>
{ 
    metrics.MacroAccuracy, 
    metrics.MicroAccuracy, 
    metrics.LogLoss, 
    metrics.LogLossReduction
};
metricsValues.AddRange(metrics.PerClassLogLoss);

var metricsDiagram = Chart.Plot(new Graph.Bar()
{
    x = metricsValues,
    y = metricsColumns,
    orientation = "h", name = ""    
});
display(metricsDiagram);

In [79]:
display(metrics.ConfusionMatrix.GetFormattedConfusionTable());


Confusion table
PREDICTED     ||     0 |     1 |     2 |     3 | Recall
0.        Day ||    47 |     0 |     0 |     0 | 1.0000
1.    Lighter ||     0 |    29 |     0 |     1 | 0.9667
2. FlashLight ||     3 |     0 |    55 |     0 | 0.9483
3.   Infrared ||     2 |     0 |     2 |    11 | 0.7333
Precision     ||0.9038 |1.0000 |0.9649 |0.9167 |


In [80]:
//var categories = trainingData.GetColumn<string>("Label");


List<string> categories = new List<string> {"Temperature", "Luminosity", "Infrared", "Distance"};
display(categories);

index,value
0,Temperature
1,Luminosity
2,Infrared
3,Distance


In [130]:
using Microsoft.AspNetCore.Html;
using System.Collections.Generic;
using Microsoft.DotNet.Interactive.Formatting;
using System.Linq;

Formatter<ConfusionMatrix>.Register((df, writer) =>
{
    var cssFirstColor = "background-color: lightsteelblue; ";
    var cssSecondColor = "background-color: #E3EAF3; ";
    var cssTransparent = "background-color: transparent";
    var cssBold = "font-weight: bold; ";
    var cssPadding = "padding: 8px; ";
    var cssCenterAlign = "text-align: center; ";
    var cssTable = "margin: 50px; ";
    var cssTitle  = cssPadding +  cssFirstColor;
    var cssHeader = cssPadding + cssBold + cssSecondColor;
    var cssCount = cssPadding;
    var cssFormula = cssPadding + cssSecondColor;
    
    var rows = new List<IHtmlContent>();
    
    // header
    var cells = new List<IHtmlContent>();
    cells.Add(td[rowspan: 2, colspan: 2, style: cssTitle + cssCenterAlign]("Confusion Matrix"));
    cells.Add(td[colspan: df.Counts.Count, style: cssTitle + cssCenterAlign]("Predicted"));
    cells.Add(td[style: cssTitle](""));
    rows.Add(tr[style: cssTransparent](cells));

    // features header
    cells = new List<IHtmlContent>();
    for (int j = 0; j < df.Counts.Count; j++)
    {
        cells.Add(td[style: cssHeader](categories.ToList()[j]));
    }
    rows.Add(tr[style: cssTransparent](cells));
    cells.Add(td[style: cssTitle]("Recall"));

    // values
    for (int i = 0; i < df.NumberOfClasses; i++)
    {
        cells = new List<IHtmlContent>();
        if (i == 0)
        {
            cells.Add(td[rowspan: df.Counts.Count, style: cssTitle]("Truth"));
        }
        cells.Add(td[style: cssHeader](categories.ToList()[i]));
        for (int j = 0; j < df.NumberOfClasses; j++)
        {
            cells.Add(td[style: cssCount](df.Counts[i][j]));
        }
        cells.Add(td[style: cssFormula](Math.Round(df.PerClassRecall[i], 4)));
        rows.Add(tr[style: cssTransparent](cells));
    }

    //footer
    cells = new List<IHtmlContent>();
    cells.Add(td[colspan: 2, style: cssTitle]("Precision"));
    for (int j = 0; j < df.Counts.Count; j++)
    {
        cells.Add(td[style: cssFormula](Math.Round(df.PerClassPrecision[j], 4)));
    }
    cells.Add(td[style: cssFormula]("total = " + df.Counts.Sum(x => x.Sum())));
    rows.Add(tr[style: cssTransparent](cells));

    writer.Write(table[style: cssTable](tbody(rows)));
}, "text/html");

display(metrics.ConfusionMatrix);

0,1,2,3,4,5,6
Confusion Matrix,Confusion Matrix,Predicted,Predicted,Predicted,Predicted,
Confusion Matrix,Confusion Matrix,Temperature,Luminosity,Infrared,Distance,Recall
Truth,Temperature,47,0,0,0,1
Truth,Luminosity,0,29,0,1,0.9667
Truth,Infrared,3,0,55,0,0.9483
Truth,Distance,2,0,2,11,0.7333
Precision,Precision,0.9038,1,0.9649,0.9167,total = 150


In [82]:
var scatterMatrixDistanceTemperature = Chart.Plot(
    new Graph.Scatter 
    {
        x = featuresTemperatures,
        y = featuresLuminosities,
        mode = "markers",
        marker = new Graph.Marker()
        {
            color = "red",
            colorscale = "Jet"
        }
    }
);
display(scatterMatrixDistanceTemperature);