## Async Forecast Infer

### Import Libraries

The first step is to import the libraries that we will need.

In [1]:
import json
import os
import datetime

import wallaroo
from wallaroo.object import EntityNotFoundError

# used to display dataframe information without truncating
from IPython.display import display
import pandas as pd
import numpy as np

from resources import simdb
from resources import util

pd.set_option('display.max_colwidth', None)


### Initialize connection

Start a connect to the Wallaroo instance and save the connection into the variable `wl`.

In [2]:
# Login through local Wallaroo instance

wl = wallaroo.Client()

Please log into the following URL in a web browser:

	https://doc-test.keycloak.wallaroocommunity.ninja/auth/realms/master/device?user_code=KGMR-QKNC

Login successful!


### Set Configurations

The following will set the workspace, model name, and pipeline that will be used for this example.  If the workspace or pipeline already exist, then they will assigned for use in this example.  If they do not exist, they will be created based on the names listed below.

In [3]:
workspace_name = 'moto2'
pipeline_name = 'bikedaypipe'
model_name = 'bikedaymodel'

## Set the Workspace and Pipeline


In [4]:
def get_workspace(name):
    workspace = None
    for ws in wl.list_workspaces():
        if ws.name() == name:
            workspace= ws
    if(workspace == None):
        workspace = wl.create_workspace(name)
    return workspace

def get_pipeline(name):
    try:
        pipeline = wl.pipelines_by_name(name)[0]
    except EntityNotFoundError:
        pipeline = wl.build_pipeline(name)
    return pipeline

workspace = get_workspace(workspace_name)

wl.set_current_workspace(workspace)

pipeline = get_pipeline(pipeline_name)



In [5]:
model_file_name = 'forecast.py'

bike_day_model = wl.upload_model(model_name, model_file_name).configure(runtime="python")

In [6]:
pipeline.add_model_step(bike_day_model)

0,1
name,bikedaypipe
created,2023-05-23 14:17:19.742581+00:00
last_updated,2023-05-23 14:17:19.742581+00:00
deployed,(none)
tags,
versions,2295af89-00c2-4c4b-ac88-1617647268da
steps,


In [7]:
# Set the deployment to allow for additional engines to run
deploy_config = (wallaroo.DeploymentConfigBuilder()
                        .replica_count(4)
                        .cpus(1)
                        .memory("1Gi")
                        .build()
                    )

pipeline.deploy(deployment_config = deploy_config)

0,1
name,bikedaypipe
created,2023-05-23 14:17:19.742581+00:00
last_updated,2023-05-23 14:17:22.308066+00:00
deployed,True
tags,
versions,"ceae5746-29d5-4ef3-b92c-e88f3b36f393, 2295af89-00c2-4c4b-ac88-1617647268da"
steps,bikedaymodel


### Run Inference
For this example, we will forecast bike rentals for the following seven days, based on the previous month's rentals (inclusive of "today").

In [8]:
# retrieve forecast schedule
first_day, analysis_days = util.get_forecast_days()

print(f'Running analysis on {first_day}')

Running analysis on 2011-02-22


In [9]:
# connect to SQL data base 
conn = simdb.get_db_connection()
print(f'Bike rentals table: {simdb.tablename}')

# create the query and retrieve data
query = util.mk_dt_range_query(tablename=simdb.tablename, forecast_day=first_day)
print(query)
data = pd.read_sql_query(query, conn)
data.head()

Bike rentals table: bikerentals
select cnt from bikerentals where date > DATE(DATE('2011-02-22'), '-1 month') AND date <= DATE('2011-02-22')


Unnamed: 0,cnt
0,986
1,1416
2,1985
3,506
4,431


In [10]:
pd.read_sql_query("select date, cnt from bikerentals where date > DATE(DATE('2011-02-22'), '-1 month') AND date <= DATE('2011-02-22') LIMIT 5", conn)

Unnamed: 0,date,cnt
0,2011-01-23,986
1,2011-01-24,1416
2,2011-01-25,1985
3,2011-01-26,506
4,2011-01-27,431


In [11]:
# send data to model for forecast

results = pipeline.infer(data.to_dict(orient='list'))[0]
results


{'forecast': [1462, 1483, 1497, 1507, 1513, 1518, 1521]}

In [12]:
# annotate with the appropriate dates (the next seven days)
resultframe = pd.DataFrame({
    'date' : util.get_forecast_dates(first_day),
    'forecast' : results['forecast']
})

# write the new data to the db table "bikeforecast"
resultframe.to_sql('bikeforecast', conn, index=False, if_exists='append')

# display the db table
query = "select date, forecast from bikeforecast"
pd.read_sql_query(query, conn)

Unnamed: 0,date,forecast
0,2011-02-23,1462
1,2011-02-24,1483
2,2011-02-25,1497
3,2011-02-26,1507
4,2011-02-27,1513
5,2011-02-28,1518
6,2011-03-01,1521


In [13]:
pipeline.url()

'https://doc-test.api.wallaroocommunity.ninja/v1/api/pipelines/infer/bikedaypipe-10/bikedaypipe'

Normally here we would close the database connection and undeploy the pipeline until the next forecast run.

# Four weeks of inferences
We'll do it all in a loop, to get inferences for the entire month of March.

In [None]:
# get our list of items to run through

inference_data = []

content_type = "application/json"

for day in analysis_days:
    print(f"Current date: {day}")
    query = util.mk_dt_range_query(tablename=simdb.tablename, forecast_day=day)
    print(query)
    data = pd.read_sql_query(query, conn)
    inference_data.append(data.to_dict(orient='list'))

In [None]:
parallel_results = pipeline.parallel_infer(tensor_list=inference_data, timeout=20, num_parallel=16, retries=2)

for result in parallel_results:
    display(result)
    # annotate with the appropriate dates (the next seven days)
    resultframe = pd.DataFrame({
        'date' : util.get_forecast_dates(day),
        'forecast' : result['forecast']
    })
    
    # write the new data to the db table "bikeforecast"
    resultframe.to_sql('bikeforecast', conn, index=False, if_exists='append')

On April 1st, we can compare March forecasts to actuals

In [18]:
query = f'''SELECT bikeforecast.date AS date, forecast, cnt AS actual
            FROM bikeforecast LEFT JOIN bikerentals
            ON bikeforecast.date = bikerentals.date
            WHERE bikeforecast.date >= DATE('2011-03-01')
            AND bikeforecast.date <  DATE('2011-04-01')
            ORDER BY 1'''

print(query)


comparison = pd.read_sql_query(query, conn)
comparison

SELECT bikeforecast.date AS date, forecast, cnt AS actual
            FROM bikeforecast LEFT JOIN bikerentals
            ON bikeforecast.date = bikerentals.date
            WHERE bikeforecast.date >= DATE('2011-03-01')
            AND bikeforecast.date <  DATE('2011-04-01')
            ORDER BY 1


Unnamed: 0,date,forecast,actual
0,2011-03-01,1521,1851
1,2011-03-02,1764,2134
2,2011-03-03,1749,1685
3,2011-03-04,1743,1944
4,2011-03-05,1741,2077
5,2011-03-06,1740,605
6,2011-03-07,1740,1872
7,2011-03-08,1740,2133
8,2011-03-09,1735,1891
9,2011-03-10,1858,623


### Undeploy the Pipeline

Undeploy the pipeline and return the resources back to the Wallaroo instance.

In [None]:
conn.close()
pipeline.undeploy()