In [None]:
!pip install google-cloud-bigquery-storage google-cloud-bigquery db-dtypes pandas matplotlib seaborn

## Necessary Imports

Importing the necessary modules from the `google.cloud` library to interact
with [BigQuery](https://cloud.google.com/bigquery/docs/introduction).
It imports the <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">bigquery</code> module and the <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">Client</code> class from <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">google.cloud.bigquery</code>.
These imports are essential for establishing a connection with [BigQuery](https://cloud.google.com/bigquery/docs/introduction),
executing queries, and managing datasets within [Google Cloud Platform](https://cloud.google.com/).

<b style="color:#e83e8c">Note:</b> to install the necessary modules from the <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">google.cloud</code> library, execute the command <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">pip install google-cloud-bigquery</code>

In [1]:
from datetime import datetime, timedelta

import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from google.cloud import bigquery
from google.cloud.bigquery import Client

In [2]:
%load_ext google.cloud.bigquery

In [3]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
sns.set_theme()

In [4]:
def plot_historical_and_forecast(
    input_timeseries,
    timestamp_col_name,
    data_col_name,
    forecast_output=None,
    actual=None,
    title=None,
    plotstartdate=None,
    **plot_kwargs,
):

    figsize = plot_kwargs.get("figsize", (20, 6))
    plot_title = plot_kwargs.get("plot_title", True)
    title_fontsize = plot_kwargs.get("title_fontsize", 16)
    plot_legend = plot_kwargs.get("plot_legend", True)
    loc = plot_kwargs.get('loc', 'upper left')
    prop = plot_kwargs.get('prop', {'size': 14})

    if plotstartdate:
        input_timeseries[timestamp_col_name] = pd.to_datetime(input_timeseries[timestamp_col_name])
        input_timeseries = input_timeseries[input_timeseries[timestamp_col_name] >= pd.to_datetime(plotstartdate)]
        
    input_timeseries = input_timeseries.sort_values(timestamp_col_name)
    
    # Plot the input historical data
    plt.figure(figsize=figsize)
    plt.plot(input_timeseries[timestamp_col_name], input_timeseries[data_col_name], label = 'Historical')
    plt.xlabel(timestamp_col_name)
    plt.ylabel(data_col_name)

    if forecast_output is not None:
        forecast_output = forecast_output.sort_values('forecast_timestamp')
        forecast_output['forecast_timestamp'] = pd.to_datetime(forecast_output['forecast_timestamp'])
        x_data = forecast_output['forecast_timestamp']
        y_data = forecast_output['forecast_value']
        confidence_level = forecast_output['confidence_level'].iloc[0] * 100
        low_CI = forecast_output['confidence_interval_lower_bound']
        upper_CI = forecast_output['confidence_interval_upper_bound']

        # Plot the forecast data
        plt.plot(x_data, y_data, alpha = 1, label = 'Forecast', linestyle='--')

        # Shade the confidence interval
        plt.fill_between(x_data, low_CI, upper_CI, color = '#539caf', alpha = 0.4,
                         label = f'{confidence_level} confidence interval')

    # Plot actual data
    if actual is not None:
        actual = actual.sort_values(timestamp_col_name)
        plt.plot(actual[timestamp_col_name], actual[data_col_name], label = 'Actual', linestyle='--')

    # Display title, legend
    if plot_title:
        plt.title(f'{title}', fontsize=title_fontsize)

    if plot_legend:
        plt.legend(loc=loc, prop=prop)
    plt.show()

## Initializing BigQuery Client

Creating an instance of the <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">bigquery.Client</code> class with the project ID <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">"bunge-demo"</code>. This client will be used to interact with the BigQuery service within the specified Google Cloud project. 

The `Client` class from the <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">google.cloud.bigquery</code> module is essential for managing and executing queries, as well as handling other BigQuery-related operations.

<b style="color:#e83e8c">Note:</b> Ensure that your Google Cloud credentials are properly set up and the project ID <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">"bunge-demo"</code> is accessible.


In [5]:
PROJECT_ID = "bunge-demo"
DATASET = "bqmlforecast"

client = bigquery.Client(PROJECT_ID)

In [6]:
START_DATE = "2023-01-01"
END_DATE = "2024-01-01"

## Creating BigQuery Dataset

Defining the dataset name and constructing a SQL query to create a new schema (dataset) in BigQuery. The dataset will be named <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">"bqmlforecast"</code> and will be created in the <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">"us"</code> location.

- <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">DATASET = "bqmlforecast"</code>: Sets the name of the dataset.
- <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">dataset_create_query</code>: A formatted string containing the SQL <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">CREATE SCHEMA</code> statement to create the dataset with the specified options.
- <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">dataset_create_job = client.query(dataset_create_query)</code>: Submits the query to create the dataset using the <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,Menlo, Monaco,Consolas,monospace;">client.query</code> method.
- <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">print(dataset_create_job.result())</code>: Executes the query and prints the result to confirm the dataset creation.

<b style="color:#e83e8c">Note:</b> Ensure that the specified dataset name does not already exist and that you have the necessary permissions to create datasets in the project.

In [13]:
dataset_create_query = f"""
CREATE SCHEMA {DATASET}
OPTIONS(
    location="us"
)
"""
dataset_create_job = client.query(dataset_create_query)
print(dataset_create_job.result())

<google.cloud.bigquery.table._EmptyRowIterator object at 0x000002158D7B3E00>


In [None]:
training_data_query = f"""
WITH sales_data AS (
    SELECT 
        city,
        date,
        item_description AS item_name,
        SUM(bottles_sold) AS total_amount_sold,
        AVG(state_bottle_retail) AS avg_bottle_price,
        SUM(volume_sold_liters) AS total_volume_sold_liters,
        SUM(sale_dollars) AS total_sale_dollars,
        AVG(state_bottle_retail - state_bottle_cost) AS avg_bottle_profit
    FROM
        `bigquery-public-data.iowa_liquor_sales.sales`
    WHERE
        bottles_sold > 0
        AND sale_dollars > 0
        AND city IS NOT NULL
    GROUP BY
        city, date, item_name
    HAVING
        date BETWEEN DATE('{START_DATE}') AND DATE('{END_DATE}')
),
filtered_sales AS (
    SELECT
        city,
        date,
        item_name,
        total_amount_sold,
        avg_bottle_price,
        total_volume_sold_liters,
        total_sale_dollars,
        avg_bottle_profit
    FROM
        sales_data
    WHERE
        item_name IN (
            SELECT 
                item_name
            FROM
                sales_data
            GROUP BY
                item_name
            HAVING
                SUM(total_amount_sold) > 100  -- Adjust the threshold as needed
        )
)
SELECT
    city,
    date,
    item_name,
    total_amount_sold,
    avg_bottle_price,
    total_volume_sold_liters,
    total_sale_dollars,
    avg_bottle_profit,
    -- 7-day moving averages
    AVG(total_amount_sold) OVER (PARTITION BY city, item_name ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS ma7_total_amount_sold,
    AVG(avg_bottle_price) OVER (PARTITION BY city, item_name ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS ma7_avg_bottle_price,
    AVG(total_sale_dollars) OVER (PARTITION BY city, item_name ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS ma7_total_sale_dollars,
    -- 30-day moving averages
    AVG(total_amount_sold) OVER (PARTITION BY city, item_name ORDER BY date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) AS ma30_total_amount_sold,
    AVG(avg_bottle_price) OVER (PARTITION BY city, item_name ORDER BY date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) AS ma30_avg_bottle_price,
    AVG(total_sale_dollars) OVER (PARTITION BY city, item_name ORDER BY date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW) AS ma30_total_sale_dollars,
    -- 60-day moving averages
    AVG(total_amount_sold) OVER (PARTITION BY city, item_name ORDER BY date ROWS BETWEEN 59 PRECEDING AND CURRENT ROW) AS ma60_total_amount_sold,
    AVG(avg_bottle_price) OVER (PARTITION BY city, item_name ORDER BY date ROWS BETWEEN 59 PRECEDING AND CURRENT ROW) AS ma60_avg_bottle_price,
    AVG(total_sale_dollars) OVER (PARTITION BY city, item_name ORDER BY date ROWS BETWEEN 59 PRECEDING AND CURRENT ROW) AS ma60_total_sale_dollars
FROM
    filtered_sales
ORDER BY
    date, city, item_name;
"""
training_data_query_job = client.query(training_data_query)
df_train = training_data_query_job.to_dataframe()

itemslist = list(df_train.item_name.unique())

for item in itemslist:
    datah = df_train[df_train.item_name==item]
    plot_historical_and_forecast(input_timeseries=datah,
                                 timestamp_col_name="date",
                                 data_col_name="total_amount_sold",
                                 forecast_output=None,
                                 actual=None,
                                 title=item)

## Querying Training Data

Constructing and executing a SQL query to retrieve training data from the
<code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">bigquery-public-data.iowa_liquor_sales.sales</code> table.
This query aggregates sales data for each item by date within the specified date range and loads the result into a DataFrame.

- <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">training_data_query</code>: A multi-line string containing the SQL <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">SELECT</code> statement.
  - <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">date</code>: Selects the date of the sale.
  - <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">item_description AS item_name</code>: Renames the <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">item_description</code> column to <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">item_name</code>.
  - <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">SUM(bottles_sold) AS total_amount_sold</code>: Aggregates the total number of bottles sold for each item by date.
- <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">FROM \`bigquery-public-data.iowa_liquor_sales.sales\`</code>: Specifies the table from which to query the data.
- <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">GROUP BY date, item_name</code>: Groups the results by date and item name.
- <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">HAVING date BETWEEN DATE('2023-01-01') AND DATE('2024-01-01')</code>: Filters the results to include only sales within the specified date range.
- <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">training_data_query_job = client.query(training_data_query)</code>: Submits the query using the <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">client.query</code> method.
- <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">df_train = training_data_query_job.to_dataframe()</code>: Converts the query result to a Pandas DataFrame.
- <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">df_train</code>: Displays the DataFrame containing the training data.

<b style="color:#e83e8c">Note:</b> Ensure that the <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">bigquery-public-data</code> dataset is accessible and that the date range is accurate for your analysis.

In [14]:
training_data_table_creation_query = """
CREATE OR REPLACE VIEW bqmlforecast.training_data AS
(
    SELECT 
        date,
        item_description AS item_name,
        SUM(bottles_sold) AS total_amount_sold
    FROM
        `bigquery-public-data.iowa_liquor_sales.sales` 
    GROUP BY
        date, item_name
    HAVING 
        date BETWEEN DATE('2023-01-01') AND DATE('2024-01-01')
)
"""
training_data_table_creation_job = client.query(training_data_table_creation_query)
print(training_data_table_creation_job.result())

<google.cloud.bigquery.table._EmptyRowIterator object at 0x000002158DB57770>


In [30]:
TRAININGDATA_STARTDATE = '2023-01-01'
TRAININGDATA_ENDDATE = '2024-01-01'

In [15]:
create_model_query = """
CREATE OR REPLACE MODEL bqmlforecast.arima_model

OPTIONS(
  MODEL_TYPE='ARIMA',
  TIME_SERIES_TIMESTAMP_COL='date', 
  TIME_SERIES_DATA_COL='total_amount_sold',
  TIME_SERIES_ID_COL='item_name',
  HOLIDAY_REGION='US'
) AS

SELECT 
    date,
    item_name,
    total_amount_sold
FROM
  bqmlforecast.training_data
"""
create_model_job = client.query(create_model_query)
print(create_model_job.result())

<google.cloud.bigquery.table._EmptyRowIterator object at 0x000002158D7B3E00>


In [16]:
evaluate_query = """
SELECT
  *
FROM
  ML.EVALUATE(MODEL bqmlforecast.arima_model)
"""

evaluate_job = client.query(evaluate_query)
df_eval = evaluate_job.to_dataframe()
df_eval



Unnamed: 0,item_name,non_seasonal_p,non_seasonal_d,non_seasonal_q,has_drift,log_likelihood,AIC,variance,seasonal_periods
0,10TH MOUNTAIN BOURBON,0,1,0,False,-50.085345,102.170690,4.653333e+01,[NO_SEASONALITY]
1,10TH MOUNTAIN CORDIAL,0,2,0,False,-22.083126,46.166252,5.328238e-01,[NO_SEASONALITY]
2,10TH MOUNTAIN RYE,4,1,1,True,-6.296322,26.592645,5.210867e-02,[NO_SEASONALITY]
3,135 EAST HYOGO JAPANESE DRY GIN,0,0,1,False,-81.775715,169.551430,3.138532e+00,[NO_SEASONALITY]
4,173 CRAFT DISTILLERY BARREL & KANE,0,1,3,True,-98.614537,207.229074,4.713125e+01,[NO_SEASONALITY]
...,...,...,...,...,...,...,...,...,...
3618,ZING ZANG BLAZING BLOODY MARY RTS,2,1,2,True,-893.707815,1799.415631,9.707195e+01,[WEEKLY]
3619,ZING ZANG BLOODY MARY RTS,1,1,1,True,-1012.305093,2032.610186,2.568075e+02,[NO_SEASONALITY]
3620,ZIYAD BERRY BUSSIN TEA,5,1,0,False,41.365535,-70.731070,1.232114e-07,[NO_SEASONALITY]
3621,ZIYAD LEMON RUSH,4,0,0,False,9.426052,-6.852104,2.698253e-04,[NO_SEASONALITY]


## Make predictions using the model

In [23]:
predictions_query = f"""
DECLARE HORIZON STRING DEFAULT "30";
DECLARE CONFIDENCE_LEVEL STRING DEFAULT "0.90";
SELECT
  *
FROM 
  ML.FORECAST(MODEL bqmlforecast.arima_model, 
              STRUCT(30 AS horizon, 
                     0.90 AS confidence_level));
"""
predictions_job = client.query(predictions_query)
predictions_df = predictions_job.to_dataframe()
predictions_df



Unnamed: 0,item_name,forecast_timestamp,forecast_value,standard_error,confidence_level,prediction_interval_lower_bound,prediction_interval_upper_bound,confidence_interval_lower_bound,confidence_interval_upper_bound
0,10TH MOUNTAIN BOURBON,2023-12-22 00:00:00+00:00,1.000,6.822,0.900,-10.208,12.208,-10.208,12.208
1,10TH MOUNTAIN BOURBON,2023-12-23 00:00:00+00:00,1.000,9.647,0.900,-14.851,16.851,-14.851,16.851
2,10TH MOUNTAIN BOURBON,2023-12-24 00:00:00+00:00,1.000,11.815,0.900,-18.413,20.413,-18.413,20.413
3,10TH MOUNTAIN BOURBON,2023-12-25 00:00:00+00:00,1.000,13.643,0.900,-21.417,23.417,-21.417,23.417
4,10TH MOUNTAIN BOURBON,2023-12-26 00:00:00+00:00,1.000,15.253,0.900,-24.062,26.062,-24.062,26.062
...,...,...,...,...,...,...,...,...,...
108685,ZIYAD WATERMELON DRIP,2025-10-26 00:00:00+00:00,6.575,20.345,0.900,-26.853,40.004,-26.853,40.004
108686,ZIYAD WATERMELON DRIP,2025-11-21 00:00:00+00:00,6.597,21.444,0.900,-28.636,41.831,-28.636,41.831
108687,ZIYAD WATERMELON DRIP,2025-12-17 00:00:00+00:00,6.620,22.561,0.900,-30.450,43.689,-30.450,43.689
108688,ZIYAD WATERMELON DRIP,2026-01-12 00:00:00+00:00,6.642,23.697,0.900,-32.294,45.578,-32.294,45.578


Since <code style="font-size:87.5%;color:#e83e8c;word-break:break-word;font-family:SFMono-Regular,monospace;">horizon</code> is set to 30, the result is 30 x (number of items), with one row per forecasted value:

In [24]:
print(f"Number of rows: {predictions_df.shape[0]:,}")

Number of rows: 108,690


#### Inspect the model coefficients

In [25]:
inspect_coefficients_query = """
SELECT
  *
FROM 
  ML.ARIMA_COEFFICIENTS(MODEL bqmlforecast.arima_model)
"""
inspect_coefficients_job = client.query(inspect_coefficients_query)
coefficients_df = inspect_coefficients_job.to_dataframe()
coefficients_df



Unnamed: 0,item_name,ar_coefficients,ma_coefficients,intercept_or_drift
0,10TH MOUNTAIN BOURBON,[],[],0.000
1,10TH MOUNTAIN CORDIAL,[],[],0.000
2,10TH MOUNTAIN RYE,"[-0.16196281591791994, -0.12147184140554565, -...",[-0.9913756369976714],-0.534
3,135 EAST HYOGO JAPANESE DRY GIN,[],[0.5123595332220228],3.627
4,173 CRAFT DISTILLERY BARREL & KANE,[],"[-0.3092840782706587, -0.001139406838070179, -...",-1.004
...,...,...,...,...
3618,ZING ZANG BLAZING BLOODY MARY RTS,"[0.8034238808184487, -0.7334290338863436]","[-1.0493287456316371, 0.8809921565326091]",-0.405
3619,ZING ZANG BLOODY MARY RTS,[0.7572070593229627],[-0.9999996053427814],-0.193
3620,ZIYAD BERRY BUSSIN TEA,"[-0.10005381410694879, -0.1098189173636172, -0...",[],0.000
3621,ZIYAD LEMON RUSH,"[-1.1780228448442096, -1.8278283975672043, -1....",[],4.416


#### Plot the forecasted predictions against the historical data

Plot the forecasted predictions, using the `df_train` dataframe that contains the historical data that you used for training and the `predictions_df` dataframe that contains the prediction data:

In [None]:
itemslist = list(df_train.item_name.unique())

for item in itemslist:
    datah = df_train[df_train.item_name==item].copy()
    dataf = predictions_df[predictions_df.item_name==item].copy()
    
    plot_historical_and_forecast(input_timeseries=datah,
                                 timestamp_col_name="date",
                                 data_col_name="total_amount_sold",
                                 forecast_output=dataf,
                                 actual=None,
                                 title=item,
                                 plotstartdate="2023-01-01")

#### Plot the forecasted predictions against the actual data

In [32]:
actual_query = f"""
SELECT 
    date,
    item_description AS item_name,
    SUM(bottles_sold) AS total_amount_sold
FROM
    `bigquery-public-data.iowa_liquor_sales.sales` 
GROUP BY
    date, item_name
HAVING
    date BETWEEN DATE_ADD('{TRAININGDATA_ENDDATE}',
                              INTERVAL 1 DAY)
            AND DATE_ADD('{TRAININGDATA_ENDDATE}',
                             INTERVAL 1+CAST(30 AS INT64) DAY)
ORDER BY
    date;
"""

actual_job = client.query(actual_query)
actual_df = actual_job.to_dataframe()
actual_df



Unnamed: 0,date,item_name,total_amount_sold
0,2024-01-02,DON JULIO REPOSADO,100
1,2024-01-02,JOHNNIE WALKER BLACK,101
2,2024-01-02,TOWN BRANCH TRUE CASK BOURBON,7
3,2024-01-02,SAILOR JERRY SPICED NAVY RUM,75
4,2024-01-02,NEW AMSTERDAM 80PRF,554
...,...,...,...
33884,2024-02-01,COURVOISIER VSOP COGNAC,2
33885,2024-02-01,TOAST AND TAVERN SPICY MARGARITA RTD,12
33886,2024-02-01,OLD TUB,4
33887,2024-02-01,HORSE SOLDIER BARREL STRENGTH BOURBON WHISKEY,2


In [40]:
import seaborn as sns
sns.set_theme()

In [None]:
itemslist = list(df_train.item_name.unique())

for item in itemslist:
    datah = df_train[df_train.item_name==item].sort_values('date')
    dataf = predictions_df[predictions_df.item_name==item].sort_values(['forecast_timestamp'])
    dataa = actual_df[actual_df.item_name==item].sort_values('date')

    plot_historical_and_forecast(
        input_timeseries=datah,
        timestamp_col_name="date",
        data_col_name="total_amount_sold",
        forecast_output=dataf,
        actual=dataa,
        title=item,
        plotstartdate="2023-11-01",
        prop={'size': 12},
    )

In [52]:
%%bigquery --params {"TRAININGDATA_STARTDATE": '2023-01-01', "TRAININGDATA_ENDDATE": '2024-01-01'}  --project "bunge-demo"

CREATE OR REPLACE VIEW bqmlforecast.outputdata_datastudio AS (
  SELECT
    date AS timestamp,
    item_name,
    total_amount_sold AS history_value,
    NULL AS forecast_value,
    NULL AS prediction_interval_lower_bound,
    NULL AS prediction_interval_upper_bound
  FROM
    bqmlforecast.training_data
  UNION ALL
  SELECT
    EXTRACT(DATE
    FROM
      forecast_timestamp) AS timestamp,
    item_name,
    NULL AS history_value,
    forecast_value,
    prediction_interval_lower_bound,
    prediction_interval_upper_bound
  FROM
    ML.FORECAST(MODEL bqmlforecast.arima_model,
      STRUCT(30 AS horizon, 0.9 AS confidence_level)) 
  ORDER BY timestamp
  )

Query is running:   0%|          |

In [45]:
actual_job.to_dataframe()

In [54]:
x = training_data_table_creation_job.to_dataframe()

In [56]:
x.empty

True

In [58]:
x.columns.empty

True