Copyright 2021 Google LLC..

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

# About:

This notebook uses an existing AutoML model to make a prediction about a key business objective. To run the notebook, set the parameters in the ***Input Parameters*** section below.


## Load Libraries

In [None]:
import datetime
import pandas as pd
from utils import forecaster_util
import template_util
from google.cloud import bigquery

## Input Parameters



In [None]:
# Date of this prediction run (usually today). All output BigQuery tables,
# including the features table and the model, will have this date suffix.
# NB: run_date can be overridden with any date in the format: 'YYYYMMDD'.
run_date = datetime.datetime.today().strftime('%Y%m%d')

parameters = {
    # GCP project.
    'project_id': '',
    # BigQuery dataset to store the output featues and model. Must be located in
    # the US or EU, as required by AutoML.
    'dataset_id': '',
    
    # Name of the ouput model table (default: model_YYYYMMDD)
    'model_table': '',

    # BigQuery SQL query to extract the input prediction data. Note there must
    # be one column called 'ts' of type TIMESTAMP, and one column for the label
    # (i.e. the KPI to forecast). The label column can have any name. The query
    # must also extract any numeric (INT64, FLOAT64) columns used in the model.
    'data_query': """""",
    
    # Timestamp for the first prediction. Note that this exact timestamp must
    # exist in the output of the data_query above. Format: '%Y-%m-%d:%H'
    'prediction_ts': '',
    
    # Name of the BigQuery column containing the numeric key business objective
    # that the model will predict.
    'label': '',
    # BigQuery column names of numeric features in the data_query that will be
    # used to help predict the label.
    'numeric_features': [],
    
    
    # Window size
    'window_size': 'HOUR',  # One of HOUR, DAY, WEEK.

    # Make predictions for this many prediction_windows in the future. These
    # are default values for each prediction_window_size and can be overriden.
    'num_hour_prediction_windows': 72,
    'num_day_prediction_windows': 28,
    'num_week_prediction_windows': 4,

    # List of windows. Numeric features are constructed over historical window
    # periods. Each window is specified with a pair (window_start, window_end),
    # which corresponds to the range (today - window_start day) to
    # (today + window_end days) inclusive
    'hour_lookback_windows': [
        (1, 1), (2, 2), (3, 3), (4, 4), (24, 24), (2 * 24, 2 * 24),
        (7 * 24, 7 * 24), (14 * 24, 14 * 24), (21 * 24, 21 * 24),
        (28 * 24, 28 * 24), (7*24, 1), (21*24, 7*24), (35*24, 21*24)],
    'day_lookback_windows': [
        (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (14, 14),
        (21, 21), (28, 28), (7, 1), (21, 8), (49, 22)],
    'week_lookback_windows': [
        (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (14, 14),
        (21, 21), (28, 28), (7, 1), (21, 8), (49, 22)],

    # List of BigQuery aggregation functions to apply to the historical windows.
    'aggregate_functions': ['SUM', 'AVG'],
    
    # Name of the ouput predictions table (default: predictions_YYYYMMDD).
    'prediction_table': f'predictions_{date_string}',

    # SQL template locations.
    'create_prediction_output_table_template':
        'templates/create_prediction_output_table.sql',
    'features_template': 'templates/features.sql',
    'prediction_template': 'templates/prediction.sql'
}

if parameters['window_size'] == 'HOUR':
  parameters['micros_per_window'] = 60 * 60 * 1000000
  parameters['num_prediction_periods'] = parameters[
      'num_hour_prediction_windows']
  parameters['lookback_windows'] = parameters['hour_lookback_windows']
elif parameters['window_size'] == 'DAY':
  parameters['micros_per_window'] = 24 * 60 * 60 * 1000000
  parameters['num_prediction_periods'] = parameters[
      'num_day_prediction_windows']
  parameters['lookback_windows'] = parameters['day_lookback_windows']
elif parameters['window_size'] == 'WEEK':
  parameters['micros_per_window'] = 7 * 24 * 60 * 60 * 1000000
  parameters['num_prediction_periods'] = parameters[
      'num_week_prediction_windows']
  parameters['lookback_windows'] = parameters['week_lookback_windows']
else:
  assert('Error: Unknown window_size', parameters['window_size'])

parameters['max_lookback'] = max(
    [window_start for (window_start, _) in parameters['lookback_windows']])

## Make Prediction

In [None]:
client = bigquery.Client(parameters['project_id'])

prediction_query = template_util.render_template(
    parameters['create_prediction_output_table_template'], parameters)
client.query(prediction_query).result();


## Extract Prediction

In [None]:
prediction_table = (f"{parameters['project_id']}."
                    f"{parameters['dataset_id']}."
                    f"{parameters['prediction_table']}")
results = client.list_rows(prediction_table).to_dataframe()
assert len(results.index) == parameters['num_prediction_periods'] + 1, 'Forecast returned the wrong number of predictions.'
results['prediction timestamp'] = pd.to_datetime(results['ts']) + pd.to_timedelta(results['prediction_period']*parameters['micros_per_window']*1000)
results = results[['prediction timestamp', f"predicted_label_{parameters['label']}"]].sort_values(by=['prediction timestamp'])
print(results.to_string())