In [124]:
// ML.NET Nuget packages installation
#r "nuget:Microsoft.ML,1.5.0-preview2"
#r "nuget:Microsoft.ML.Mkl.Components,1.5.0-preview2"
#r "nuget:Microsoft.ML.TimeSeries,1.5.0-preview2"
    
//Install XPlot package
#r "nuget:XPlot.Plotly,2.0.0"

// Install data analysis package
#r "nuget:Microsoft.Data.Analysis,0.2.0"
    

In [109]:
using Microsoft.ML;
using Microsoft.ML.Data;
public class TemperaturePoint
{
    [LoadColumn(0)]
    public string Date;

    [LoadColumn(1)]
    [ColumnName("Label")]
    public float MinTemp;

}

public class TemperatureParsed
{
    public float MinTemp;
    public int Month;
    public int Year;
    public int Day;
    public DateTime Date;
    public float DaysSinceStart;
    public float Cos;
}

In [110]:
MLContext mlContext = new MLContext(seed: 0);
IDataView trainDataView = mlContext.Data.LoadFromTextFile<TemperaturePoint>("daily-minimum-temperatures-in-me.csv", hasHeader: true, separatorChar: ',');

// extract minimum date for later processing
IEnumerable<string> dateColumn = trainDataView.GetColumn<string>("Date").ToList();
DateTime minDate = DateTime.Parse(dateColumn.Min());

int totalRows = dateColumn.Count();
display(minDate);
display(totalRows);

In [111]:
public static List<TemperaturePoint> Head(MLContext mlContext, IDataView dataView, int numberOfRows = 4)
{
    string msg = string.Format("DataView: Showing {0} rows with the columns", numberOfRows.ToString());
    display(msg);
          
    var rows = mlContext.Data.CreateEnumerable<TemperaturePoint>(dataView, reuseRowObject: false)
                    .Take(numberOfRows)
                    .ToList();
    
    return rows;
}

display(h4("Showing a few rows from training DataView:"));

var fewRows = Head(mlContext, trainDataView, 5);
display(fewRows);

DataView: Showing 5 rows with the columns

index,Date,MinTemp
0,1/1/1981,20.7
1,1/2/1981,17.9
2,1/3/1981,18.8
3,1/4/1981,14.6
4,1/5/1981,15.8


In [112]:
using System;
using System.Collections.Generic;

Action<TemperaturePoint, TemperatureParsed> mapping = (input, output) =>
    {
        const string DATETIME_FORMAT = "MM/dd/yyyy";
        output.MinTemp = input.MinTemp;
        
        DateTime result = DateTime.Parse(input.Date);
        output.Day = result.Day;
        output.Month = result.Month;
        output.Year = result.Year;
        output.Date = result;
        output.DaysSinceStart = (result-minDate).Days;
        output.Cos = (float) Math.Cos( (double) ((((2 * Math.PI)/365) * (output.DaysSinceStart-30))));

    };

var estimator = mlContext.Transforms.CustomMapping(mapping, null)
                .Append(mlContext.Transforms.Concatenate(outputColumnName: "DaysSince",
                                                         inputColumnNames: new[] { "DaysSinceStart" }))
                .Append(mlContext.Transforms.Concatenate(outputColumnName: "CosVector",
                                                         inputColumnNames: new[] { "Cos" }))
                .AppendCacheCheckpoint(mlContext);

public static List<TemperatureParsed> Head(MLContext mlContext, IDataView dataView, int numberOfRows = 4)
{
    string msg = string.Format("DataView: Showing {0} rows with the columns", numberOfRows.ToString());
    display(msg);
          
    var rows = mlContext.Data.CreateEnumerable<TemperatureParsed>(dataView, reuseRowObject: false)
                    .Take(numberOfRows)
                    .ToList();
    
    return rows;
}

var model = estimator.Fit(trainDataView);
var transformedData = model.Transform(trainDataView);
display(Head(mlContext, transformedData, 5));

DataView: Showing 5 rows with the columns

index,MinTemp,Month,Year,Day,Date,DaysSinceStart,Cos
0,20.7,1,1981,1,1981-01-01 00:00:00Z,0,0.8695894
1,17.9,1,1981,2,1981-01-02 00:00:00Z,1,0.8779601
2,18.8,1,1981,3,1981-01-03 00:00:00Z,2,0.8860706
3,14.6,1,1981,4,1981-01-04 00:00:00Z,3,0.8939186
4,15.8,1,1981,5,1981-01-05 00:00:00Z,4,0.90150166


In [113]:
int numTrain = (int) (0.8 * totalRows);
display(numTrain);
IDataView trainData = mlContext.Data.FilterRowsByColumn(transformedData, "DaysSinceStart", upperBound: numTrain);
IDataView testData = mlContext.Data.FilterRowsByColumn(transformedData, "DaysSinceStart", lowerBound: numTrain);

In [114]:
display(trainData.Schema);

index,Name,Index,IsHidden,Type,Annotations
0,Date,0,True,{ Microsoft.ML.Data.TextDataViewType: RawType: System.ReadOnlyMemory<System.Char> },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
1,Date,1,False,{ Microsoft.ML.Data.DateTimeDataViewType: RawType: System.DateTime },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
2,Label,2,False,{ Microsoft.ML.Data.NumberDataViewType: RawType: System.Single },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
3,MinTemp,3,False,{ Microsoft.ML.Data.NumberDataViewType: RawType: System.Single },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
4,Month,4,False,{ Microsoft.ML.Data.NumberDataViewType: RawType: System.Int32 },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
5,Year,5,False,{ Microsoft.ML.Data.NumberDataViewType: RawType: System.Int32 },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
6,Day,6,False,{ Microsoft.ML.Data.NumberDataViewType: RawType: System.Int32 },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
7,DaysSinceStart,7,False,{ Microsoft.ML.Data.NumberDataViewType: RawType: System.Single },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
8,Cos,8,False,{ Microsoft.ML.Data.NumberDataViewType: RawType: System.Single },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
9,DaysSince,9,False,"{ Microsoft.ML.Data.VectorDataViewType: Dimensions: [ 1 ], IsKnownSize: True, ItemType: { Microsoft.ML.Data.NumberDataViewType: RawType: System.Single }, Size: 1, RawType: Microsoft.ML.Data.VBuffer<System.Single> }","{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ { Microsoft.ML.DataViewSchema+Column: Name: SlotNames, Index: 0, IsHidden: False, Type: { Microsoft.ML.Data.VectorDataViewType: Dimensions: [ 1 ], IsKnownSize: True, ItemType: { Microsoft.ML.Data.TextDataViewType: RawType: System.ReadOnlyMemory`1[System.Char] }, Size: 1, RawType: Microsoft.ML.Data.VBuffer`1[System.ReadOnlyMemory`1[System.Char]] }, Annotations: { Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] } } ] }"


In [115]:
using XPlot.Plotly;

//Extract some data into arrays for plotting:
 
int numberOfRows = 730;
float[] temps = transformedData.GetColumn<float>("MinTemp").Take(numberOfRows).ToArray();
DateTime[] dates = transformedData.GetColumn<DateTime>("Date").Take(numberOfRows).ToArray();
float[] cos = transformedData.GetColumn<float>("Cos").Take(numberOfRows).ToArray();

Graph.Scattergl[] scatters = {
    new Graph.Scattergl()
    {
        x = dates,
        y = temps
    },
    new Graph.Scattergl()
    {
        x = dates,
        y = cos
    }
};
var chart = Chart.Plot(
    scatters
);

chart.Width = 600;
chart.Height = 600;
display(chart);

Notice that the data has a sinusoidal pattern, similar to a cosine function with a period of 365 days (1 year).
Let's try fitting a linear regression model.

A linear regression model has a formula of `y=mx+b`, where `x` (called the independent variable) represents time and `y` is the dependent variable.

In [140]:
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;

var forecastingPipeline = mlContext.Regression.Trainers.Ols(
    labelColumnName: "MinTemp", 
    featureColumnName: "CosVector");
    
var forecaster = forecastingPipeline.Fit(trainData);

// Use trained model to make inferences on test data
IDataView testDataPredictions = forecaster.Transform(testData);

// Extract model metrics and get RSquared
RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(
    testDataPredictions,
    labelColumnName: "MinTemp",
    scoreColumnName: "Score");

double rmse = trainedModelMetrics.RootMeanSquaredError;
double mae = trainedModelMetrics.MeanAbsoluteError;
display($"Mean Absolute Error:{mae:F3}\n Root Mean Squared Error: {rmse:F3}");

Mean Absolute Error:1.997
 Root Mean Squared Error: 2.574

In [162]:
using XPlot.Plotly;

static void PlotPredictions(IDataView testDataPredictions, String outputColumn = "Score", bool isVector = false) {
    int numberOfRows = 730;
    float[] temps = testDataPredictions.GetColumn<float>("MinTemp").Take(numberOfRows).ToArray();
    DateTime[] dates = testDataPredictions.GetColumn<DateTime>("Date").Take(numberOfRows).ToArray();
    float[] predictions = new float[dates.Length];
    if (isVector) {
        var tmp = testDataPredictions.GetColumn<float[]>(outputColumn).Take(numberOfRows).ToArray();
        for (int i = 0; i < predictions.Length; i++) {
            predictions[i] = tmp[i][0];
        }
    }
    else {
        predictions = testDataPredictions.GetColumn<float>(outputColumn).Take(numberOfRows).ToArray();
    }



    Graph.Scattergl[] scatters = {
        new Graph.Scattergl()
        {
            x = dates,
            y = predictions,
            name = "Predicted"
        },
       new Graph.Scattergl()
       {
           x = dates,
           y = temps,
           name="Actual"
       }
    };

    var chart = Chart.Plot(
        scatters
    );
    chart.Width = 600;
    chart.Height = 600;
    display(chart);
}

PlotPredictions(testDataPredictions);

Unhandled exception: System.ArgumentOutOfRangeException: Column 'Score' not found (Parameter 'name')
   at Microsoft.ML.DataViewSchema.get_Item(String name)
   at Microsoft.ML.Data.ColumnCursorExtensions.GetColumn[T](IDataView data, String columnName)
   at Submission#146.PlotPredictions(IDataView testDataPredictions)
   at Submission#171.<<Initialize>>d__0.MoveNext()
--- End of stack trace from previous location where exception was thrown ---
   at Microsoft.CodeAnalysis.Scripting.ScriptExecutionState.RunSubmissionsAsync[TResult](ImmutableArray`1 precedingExecutors, Func`2 currentExecutor, StrongBox`1 exceptionHolderOpt, Func`2 catchExceptionOpt, CancellationToken cancellationToken)

In [160]:
var tmp = testDataPredictions.GetColumn<float[]>("ForecastTemp").Take(numberOfRows).ToArray();

In [126]:
public class ModelOutput
{
    public float[] ForecastTemp { get; set; }

    public float[] LowerBoundTemp { get; set; }

    public float[] UpperBoundTemp { get; set; }
}

In [167]:
using Microsoft.ML.Transforms.TimeSeries;

// See explanation of parameters: https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.timeseriescatalog.forecastbyssa?view=ml-dotnet
var forecastingPipeline = mlContext.Forecasting.ForecastBySsa(
    outputColumnName: "ForecastTemp",
    inputColumnName: "MinTemp",
    windowSize: 30,
    seriesLength: numTrain,
    trainSize: numTrain,
    horizon: 1,
    confidenceLevel: 0.95f,
    confidenceLowerBoundColumn: "LowerBoundTemp",
    confidenceUpperBoundColumn: "UpperBoundTemp");

var forecaster = forecastingPipeline.Fit(trainData);

// // Use trained model to make inferences on test data
IDataView testDataPredictions = forecaster.Transform(testData);
PlotPredictions(testDataPredictions, outputColumn: "ForecastTemp", isVector: true);

Evaluate(testData, forecaster, mlContext);

static void Evaluate(IDataView testData, ITransformer model, MLContext mlContext)
{
    IDataView predictions = model.Transform(testData);
    IEnumerable<float> actual =
    mlContext.Data.CreateEnumerable<TemperatureParsed>(testData, true)
        .Select(observed => observed.MinTemp);
    IEnumerable<float> forecast =
    mlContext.Data.CreateEnumerable<ModelOutput>(predictions, true)
        .Select(prediction => prediction.ForecastTemp[0]);
    
    var metrics = actual.Zip(forecast, (actualValue, forecastValue) => actualValue - forecastValue);
    var MAE = metrics.Average(error => Math.Abs(error)); // Mean Absolute Error
    var RMSE = Math.Sqrt(metrics.Average(error => Math.Pow(error, 2))); // Root Mean Squared Error
    
    Console.WriteLine("Evaluation Metrics");
    Console.WriteLine("---------------------");
    Console.WriteLine($"Mean Absolute Error: {MAE:F3}");
    Console.WriteLine($"Root Mean Squared Error: {RMSE:F3}\n");
}



Evaluation Metrics
---------------------
Mean Absolute Error: 2.986
Root Mean Squared Error: 3.731

