## Demand Forecasting with Azure AutoML

In [2]:
# Load Azure Libraries

from azureml.core import Workspace, Dataset, Datastore
from azureml.core import Experiment
from azureml.core.compute import ComputeTarget
from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails
from azureml.opendatasets import OjSalesSimulated
from azureml.automl.core.forecasting_parameters import ForecastingParameters

In [3]:
# Load non-azure libraries

import pandas as pd
import numpy as np
import os
from pathlib import Path

In [5]:
# Connect Jupyter notebook to AMLS Workspace and set the compute cluster

ws = Workspace.from_config()
compute_name = 'compute-cluster'
compute_target = ComputeTarget(ws,compute_name)

In [6]:
# set the datastore

datastore = Datastore.get_default(ws)

In [7]:
# load a subset of the dataset

oj_sales_files = OjSalesSimulated.get_file_dataset()
oj_sales = oj_sales_files.take(10)

In [8]:
# Create a folder to downlodad the files

folder_name = "OJ_Sales"
os.makedirs(folder_name,exist_ok=True)

In [9]:
# Download the files

oj_sales.download(folder_name,overwrite=True)

['/mnt/batch/tasks/shared/LS_root/mounts/clusters/compute-instance-oj/code/OJ_Sales/https%3A/%2Fazureopendatastorage.azurefd.net/ojsales-simulatedcontainer/oj_sales_data/Store1000_dominicks.csv',
 '/mnt/batch/tasks/shared/LS_root/mounts/clusters/compute-instance-oj/code/OJ_Sales/https%3A/%2Fazureopendatastorage.azurefd.net/ojsales-simulatedcontainer/oj_sales_data/Store1001_minute.maid.csv',
 '/mnt/batch/tasks/shared/LS_root/mounts/clusters/compute-instance-oj/code/OJ_Sales/https%3A/%2Fazureopendatastorage.azurefd.net/ojsales-simulatedcontainer/oj_sales_data/Store1002_tropicana.csv',
 '/mnt/batch/tasks/shared/LS_root/mounts/clusters/compute-instance-oj/code/OJ_Sales/https%3A/%2Fazureopendatastorage.azurefd.net/ojsales-simulatedcontainer/oj_sales_data/Store1000_minute.maid.csv',
 '/mnt/batch/tasks/shared/LS_root/mounts/clusters/compute-instance-oj/code/OJ_Sales/https%3A/%2Fazureopendatastorage.azurefd.net/ojsales-simulatedcontainer/oj_sales_data/Store1001_tropicana.csv',
 '/mnt/batch/tas

In [10]:
# Read in the 10 files as a single pandas datafram

OJ_file_path = Path('OJ_Sales').rglob('*.csv')
OJ_files = [x for x in OJ_file_path]
df = pd.concat((pd.read_csv(f) for f in OJ_files))

In [11]:
# view the first 10 rows

df.head(10)

Unnamed: 0,WeekStarting,Store,Brand,Quantity,Advert,Price,Revenue
0,1990-06-14,1000,dominicks,12003,1,2.59,31087.77
1,1990-06-21,1000,dominicks,10239,1,2.39,24471.21
2,1990-06-28,1000,dominicks,17917,1,2.48,44434.16
3,1990-07-05,1000,dominicks,14218,1,2.33,33127.94
4,1990-07-12,1000,dominicks,15925,1,2.01,32009.25
5,1990-07-19,1000,dominicks,17850,1,2.17,38734.5
6,1990-07-26,1000,dominicks,10576,1,1.97,20834.72
7,1990-08-02,1000,dominicks,9912,1,2.26,22401.12
8,1990-08-09,1000,dominicks,9571,1,2.11,20194.81
9,1990-08-16,1000,dominicks,15748,1,2.42,38110.16


In [12]:
#  Register the dataframe as dataset

Dataset.Tabular.register_pandas_dataframe(df,datastore,"OJ Sales Sample")

Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/eb5b0356-4f55-47e9-9413-909ef26663e5/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


{
  "source": [
    "('workspaceblobstore', 'managed-dataset/eb5b0356-4f55-47e9-9413-909ef26663e5/')"
  ],
  "definition": [
    "GetDatastoreFiles",
    "ReadParquetFile",
    "DropColumns"
  ],
  "registration": {
    "id": "f585bebf-7886-46fe-9e9e-9866caf6549d",
    "name": "OJ Sales Sample",
    "version": 1,
    "workspace": "Workspace.create(name='ws-ojforecasting', subscription_id='be234501-d43f-4775-b64e-61ebec9a9e0b', resource_group='rg-ojsalesforecasting')"
  }
}

### Training Forecasting Model with Standard Algorithms

In [13]:
# Set up the experiment

experiment_name = 'OJ-Sales-Forecasting'
exp = Experiment(workspace=ws,name=experiment_name)

In [14]:
# Retrieve the dataset

dataset_name = "OJ Sales Sample"
dataset = Dataset.get_by_name(ws,dataset_name,version='latest')

In [15]:
# set the target column

target_column = 'Quantity'

In [16]:
task = 'forecasting'

In [17]:
primary_metric = 'normalized_root_mean_squared_error'

In [18]:
featurization = 'auto'

In [32]:
params = ForecastingParameters.from_parameters_dict({'country_or_region_for_holidays':'US',
                                                    'drop_columns_names':'Revenue',
                                                    'forecast_horizon':6,
                                                    'target_rolling_window_size':'auto',
                                                    'target_lags':'auto',
                                                     'feature_lags':'auto',
                                                    'seasonality':'auto',
                                                    'short_series_handling':True,
                                                    'use_stl':'season_trend',
                                                    'time_column_name':'WeekStarting',
                                                    'time_series_id_column_names':['Store','Brand'],
                                                    'short_series_handling_configuration':'auto'},
                                                    validate_params=True)

In [33]:
# Configure AutoML Run

config = AutoMLConfig(task=task,
                     primary_metric = primary_metric,
                     featurization=featurization,
                     compute_target=compute_target,
                     training_data=dataset,
                     label_column_name=target_column,
                     experiment_timeout_minutes=15,
                     enable_early_stopping=True,
                     n_cross_validations=3,
                     model_explainability=True,
                     enable_stack_ensemble=False,
                     enable_voting_ensemble=True,
                     forecasting_parameters=params)

In [34]:
# Train the model

AutoML_run = exp.submit(config,show_output=True)
RunDetails(AutoML_run).show()

Submitting remote run.
No run_configuration provided, running on compute-cluster with default configuration
Running on remote compute: compute-cluster


Experiment,Id,Type,Status,Details Page,Docs Page
OJ-Sales-Forecasting,AutoML_b18c7329-394a-4d6c-9cb2-7dee94396261,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation



Current status: ModelSelection. Beginning model selection.
Heuristic parameters: Target_Lag = '[0]', Target_Rolling_Window = '0'.


********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Time Series ID detection
STATUS:       PASSED
DESCRIPTION:  The data set was analyzed, and no duplicate time index were detected.
              Learn more about time-series forecasting configurations: https://aka.ms/AutomatedMLForecastingConfiguration

********************************************************************************************

TYPE:         Frequency detection
STATUS:       PASSED
DESCRIPTION:  The time series was analyzed, all data points are aligned with detected frequency.
              Learn more about data preparation for time-series forecasting: https://aka.ms/AutomatedMLDataPreparation

********************************************************************************************

TYPE:         Missing featur

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

### Registering the forecasting model

In [35]:
# Give the model name and description

description = 'Best AutoML Forecastin Run Using OJ Sales Sample Data'
tags = {'project': "OJ Sales","Creator":"Femi Obembe"}
model_name = 'OJ-Sales-Sample-Forecasting-AutoML'

In [36]:
# Register the model to AMLS workspace

AutoML_run.register_model(model_name=model_name,description=description,tags=tags)

Model(workspace=Workspace.create(name='ws-ojforecasting', subscription_id='be234501-d43f-4775-b64e-61ebec9a9e0b', resource_group='rg-ojsalesforecasting'), name=OJ-Sales-Sample-Forecasting-AutoML, id=OJ-Sales-Sample-Forecasting-AutoML:1, version=1, tags={'project': 'OJ Sales', 'Creator': 'Femi Obembe'}, properties={})