In [None]:
#r "nuget: Microsoft.ML.AutoML, 0.21.0-preview.23266.6"

#r "nuget: Microsoft.Data.Analysis, 0.23.0-preview.1.25127.4"

In [94]:
using System.Threading;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.AutoML;

using Microsoft.Data.Analysis;
using System.Globalization;

In [95]:
public class CarSalesData
{
    public float Year { get; set; }
    public float Month { get; set; }
    public float Sales { get; set; }
}

In [96]:
// Initialize MLContext
MLContext ctx = new MLContext();

In [97]:
var dataPath = Path.GetFullPath(@"./formated.csv");

In [98]:
// Define data path
var lines = File.ReadAllLines(@"./bcdata.sgs.1378.csv");
IEnumerable<CarSalesData> carSalesData = new List<CarSalesData>();
for (int i = 1; i < lines.Length; i++) // pula o cabeçalho
{
    var carSale = new CarSalesData();
    var parts = lines[i].Split(';');
    parts[0] = parts[0].Replace("\"",""); // remove aspas
    var date= DateTime.ParseExact(parts[0], "dd/MM/yyyy", CultureInfo.InvariantCulture);
    carSale.Year = date.Year;
    carSale.Month = date.Month;
    carSale.Sales = float.Parse(parts[1].Replace(",", ".").Replace("\"",""));
    carSalesData = carSalesData.Append(carSale);
}
Console.WriteLine($"Dados carregados: {carSalesData.Count()} registros.");

Dados carregados: 529 registros.


In [None]:
// Load data as IDataView
var data = ctx.Data.LoadFromEnumerable(carSalesData);

In [100]:
var pipeline = ctx.Transforms.Concatenate("Features", "Year", "Month")
    .Append(ctx.Transforms.NormalizeMinMax("Features"))
    .AppendCacheCheckpoint(ctx);

In [None]:
// Transforms data
var processedData = pipeline.Fit(data).Transform(data);
Console.WriteLine($"Total de linhas: {processedData.GetRowCount()}");
var trainValidationData = ctx.Data.TrainTestSplit(processedData, testFraction: 0.2);

Total de linhas: 


In [None]:
// Configurates the experiment
var experimentSettings = new RegressionExperimentSettings
{
    MaxExperimentTimeInSeconds = 200,
    OptimizingMetric = RegressionMetric.RSquared
};
var experiment = ctx.Auto().CreateRegressionExperiment(experimentSettings);
var result = experiment.Execute(trainValidationData.TrainSet, labelColumnName: "Sales");

In [None]:
// Avaliation
var testMetrics = result.BestRun.Model.Transform(trainValidationData.TestSet);
var metrics = ctx.Regression.Evaluate(testMetrics, labelColumnName: "Sales");

In [None]:
Console.WriteLine($"Best Model: {result.BestRun.TrainerName}");
Console.WriteLine($"R²: {metrics.RSquared:0.###}");
Console.WriteLine($"MAE: {metrics.MeanAbsoluteError:#.###}");
Console.WriteLine($"RMSE: {metrics.RootMeanSquaredError:#.###}");   

Melhor modelo: ReplaceMissingValues=>Concatenate=>LightGbmRegression
R²: 0.908
MAE: 17980.202
RMSE: 23414.955


In [106]:
ctx.Model.Save(model, data.Schema, "model.mlnet");