# ANALYZE DATA
(numerical and categorical distribution, box plot segmentation and correlation matrix)

In [1]:
#r "nuget:Microsoft.ML,1.5.2"
using XPlot.Plotly;
using Microsoft.ML;
using Microsoft.ML.Data;

MLContext mlContext = new MLContext(seed: 1);

Installed package Microsoft.ML version 1.5.2

In [1]:
#load "C:\Users\dcost\source\repos\SmartFireAlarm\SmartFireAlarm\Jupyter\Models.csx"

const string DATASET_PATH = "./sensors_data.csv";
IDataView data = mlContext.Data.LoadFromTextFile<ModelInput>(
    path: DATASET_PATH,
    hasHeader: true,
    separatorChar: ',');

var shuffledData = mlContext.Data.ShuffleRows(data, seed: 1);
var split = mlContext.Data.TrainTestSplit(shuffledData, testFraction: 0.3);
var trainingData = split.TrainSet;
var testingData = split.TestSet; 

### Extract features

In [1]:
var features = mlContext.Data.CreateEnumerable<ModelInput>(trainingData, true);
var sources = features.Select(f => f.Source);
var temperatures = features.Select(f => f.Temperature);
var luminosities = features.Select(f => f.Luminosity);
var infrareds = features.Select(f => f.Infrared);
var distances = features.Select(f => f.Distance);
var pirs = features.Select(f => f.PIR);
var humidities = features.Select(f => f.Humidity);

## 1. Numerical Distribution

In [1]:
var histogramsDiagram = Chart.Plot(new[] {
    new Graph.Histogram { x = temperatures, name = "Temperature" },
    new Graph.Histogram { x = luminosities, name = "Luminosity" },
    new Graph.Histogram { x = infrareds, name = "Infrared" },
    new Graph.Histogram { x = distances, name = "Distance" },
    new Graph.Histogram { x = pirs, name = "PIR" },
    new Graph.Histogram { x = humidities, name = "Humidity" }
});

var layout = new Layout.Layout()
{
    title = "Numerical distribution"
};
histogramsDiagram.WithLayout(layout);

display(histogramsDiagram);

## 2. Categorical Distribution

In [1]:
var categoriesHistogram = Chart.Plot(
    new Graph.Histogram { x = sources }
);

var layout = new Layout.Layout()
{
    title = "Categorical distribution (training data)"
};
categoriesHistogram.WithLayout(layout);

display(categoriesHistogram);

In [1]:
var testingFeatures = mlContext.Data.CreateEnumerable<ModelInput>(testingData, true);
var testingSources = testingFeatures.Select(f => f.Source);

var categoriesHistogram = Chart.Plot(
    new Graph.Histogram { x = testingSources }
);

var layout = new Layout.Layout()
{
    title = "Categorical distribution (testing data)"
};
categoriesHistogram.WithLayout(layout);

display(categoriesHistogram);

## 3. Box Plot Segmentation

In [1]:
var segmentationDiagram = Chart.Plot(new[] {
    new Graph.Box { y = temperatures, name = "Temperature" },
    new Graph.Box { y = luminosities, name = "Luminosity" },
    new Graph.Box { y = infrareds, name = "Infrared" },
    new Graph.Box { y = distances, name = "Distance" },
    new Graph.Box { y = pirs, name = "PIR" },
    new Graph.Box { y = humidities, name = "Humidity" }
});

var layout = new Layout.Layout()
{
    title = "Box plot segmentation"
};
segmentationDiagram.WithLayout(layout);

display(segmentationDiagram);

## 4. Correlation Matrix

In [1]:
#r "nuget:MathNet.Numerics"
#load "C:\Users\dcost\source\repos\SmartFireAlarm\SmartFireAlarm\Jupyter\Helpers.csx"

var featureColumns = new string[] { "Temperature", "Luminosity", "Infrared", "Distance", "PIR", "Humidity" };
var featureMatrix = new List<List<double>>();

featureMatrix.Add(temperatures.Select(Convert.ToDouble).ToList());
featureMatrix.Add(luminosities.Select(Convert.ToDouble).ToList());
featureMatrix.Add(infrareds.Select(Convert.ToDouble).ToList());
featureMatrix.Add(distances.Select(Convert.ToDouble).ToList());
featureMatrix.Add(pirs.Select(Convert.ToDouble).ToList());
featureMatrix.Add(humidities.Select(Convert.ToDouble).ToList());

var correlationMatrix = Chart.Plot(
    new Graph.Heatmap 
    {
        x = featureColumns,
        y = featureColumns.Reverse(),
        z = Helpers.GetPearsonCorrelation(featureMatrix),
        zmin = -1,
        zmax = 1
    }
);

var layout = new Layout.Layout()
{
    autosize = "true", 
    margin =  new Graph.Margin{ l = 90 }, // fix left margin to accomodate longer labels
    title = "Features Correlation Matrix"
};
correlationMatrix.WithLayout(layout);
display(correlationMatrix);

Installed package MathNet.Numerics version 4.12.0