As you collect new data, you may want to use it to generate new forecasts. Forecast does not automatically retrain a predictor when you import an updated dataset, but you can use an existing predictor to generate forecasts with the updated data. For instance, if you collect daily sales data and want to include new datapoints in your forecast, you could import the updated data and use it to generate a forecast without training a new predictor. If, however, you want your predictor to be trained off of the new data, you must create a new predictor.
* To generate a forecast off of new data:
1. Upload the updated CSV file to an Amazon S3 bucket. The updated CSV should still contain all of your existing data.
2. Create a dataset import job with the new data. The most recent import job is the one that forecasts are generated off of.
3. Create a new forecast using the existing predictor.
4. Retreieve the forecast as usual.

# Setup

In [None]:
%%capture
%pip install awswrangler

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import util
import time
import boto3
import awswrangler as wr
import pandas as pd
import numpy as np
import seaborn as sns
from datetime import timedelta
from datetime import datetime
model = 'schedule'

In [None]:
tStart = time.time()

In [None]:
session = boto3.Session(region_name='us-east-1') 
forecast = session.client(service_name='forecast') 

In [None]:
# Parameter
TIMESTAMP_FORMAT = "yyyy-MM-dd hh:mm:ss"
# Naming
project = 'bill_scs_rate_' + model
bucket_name = 'sagemaker-us-east-1-123456'
key = 'puretech_data/' + model + '.csv'
s3DataPath = "s3://"+bucket_name+"/"+key
# Existing ARN
role_arn = 'arn:aws:iam::123456:role/PuretechBillSuccessRateForecast'
datasetArn = 'arn:aws:forecast:us-east-1:123456:dataset/bill_scs_rate_' + model + '_ds'
predictorArn = 'arn:aws:forecast:us-east-1:123456:predictor/bill_scs_rate_' + model + '_predictor'
forecastArn = 'arn:aws:forecast:us-east-1:123456:forecast/bill_scs_rate_' + model + '_forecast'
forecastExportJobArn = 'arn:aws:forecast:us-east-1:123456:forecast-export-job/bill_scs_rate_' + model + '_forecast/export'
datasetImportJobDeleteArn = forecast.describe_predictor(PredictorArn=predictorArn)['DatasetImportJobArns']
# Service create
timetoday = datetime.today().strftime('%Y-%m-%d').replace('-','_')
datasetImportJobName = 'EP_DSIMPORT_JOB_TARGET_' + timetoday
forecastName = project + '_forecast'
forecastExportName = 'export'
outputPath='s3://'+bucket_name+'/forecast_output/'+model

## Create Data Import Job



In [None]:
# Create import job name with timestamp
ds_import_job_response=forecast.create_dataset_import_job(DatasetImportJobName=datasetImportJobName,
                                                          DatasetArn=datasetArn,
                                                          DataSource= {
                                                              "S3Config" : {
                                                                 "Path":s3DataPath,
                                                                 "RoleArn": role_arn
                                                              } 
                                                          },
                                                          TimestampFormat=TIMESTAMP_FORMAT
                                                         )

In [None]:
ds_import_job_arn=ds_import_job_response['DatasetImportJobArn']
print(ds_import_job_arn)

In [None]:
status_indicator = util.StatusIndicator()

while True:
    status = forecast.describe_dataset_import_job(DatasetImportJobArn=ds_import_job_arn)['Status']
    status_indicator.update(status)
    if status in ('ACTIVE', 'CREATE_FAILED'): break
    time.sleep(10)

status_indicator.end()

## Create Forecast

Delete existing first

In [None]:
util.wait_till_delete(lambda: forecast.delete_forecast_export_job(ForecastExportJobArn = forecastExportJobArn))
util.wait_till_delete(lambda: forecast.delete_forecast(ForecastArn = forecastArn))

In [None]:
create_forecast_response=forecast.create_forecast(ForecastName=forecastName,
                                                  PredictorArn=predictorArn)
forecastArn = create_forecast_response['ForecastArn']
print(forecastArn)

In [None]:
status_indicator = util.StatusIndicator()

while True:
    status = forecast.describe_forecast(ForecastArn=forecastArn)['Status']
    status_indicator.update(status)
    if status in ('ACTIVE', 'CREATE_FAILED'): break
    time.sleep(10)

status_indicator.end()

## Create Forecast Export

In [None]:
forecast_export_response = forecast.create_forecast_export_job(
                                             ForecastExportJobName = forecastExportName,
                                             ForecastArn=forecastArn, 
                                             Destination = {
                                                "S3Config" : {
                                                    "Path":outputPath,
                                                    "RoleArn": role_arn
                                                } 
                                             }
                                           )
forecastExportJobArn = forecast_export_response['ForecastExportJobArn']
print(forecastExportJobArn)

In [None]:
status_indicator = util.StatusIndicator()

while True:
    status = forecast.describe_forecast_export_job(ForecastExportJobArn=forecastExportJobArn)['Status']
    status_indicator.update(status)
    if status in ('ACTIVE', 'CREATE_FAILED'): break
    time.sleep(10)

status_indicator.end()

## S3 to DynamoDB

In [None]:
def load_pred():
    bucket = sagemaker.Session().default_bucket()
    prefix = 'forecast_output/' + model 
    path = f's3://{bucket}/{prefix}'   
    suffix = 'part0.csv'    
    df = wr.s3.read_csv(path=path, path_suffix=suffix, last_modified_begin=datetime.now(timezone.utc)-timedelta(hours=24))
    return df

In [None]:
def transform(df):
    df = df.drop('item_id', axis=1)
    df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%dT%H:%M:%SZ")
    df['date'] = df['date']+timedelta(hours=8)
    return df

In [None]:
def put_time(model, timetoday, best_time, dynamodb=None):
    if not dynamodb:
        dynamodb = boto3.resource('dynamodb')

    table = dynamodb.Table('BestBillTime')
    response = table.put_item(
       Item={
            'mt_category': model,
            'forecast_date': timetoday,
            'best_time': str(best_time)
        }
    )
    return response

In [None]:
df = load_pred()
df = transform(df)
sns.lineplot(x='date', y='p50', data=df)

In [None]:
best_idx = df['p50'].idxmax()
best_time, best_value = df.iloc[best_idx].date, df.iloc[best_idx].p50
print(best_time, best_value)

In [None]:
timetoday = datetime.today().strftime('%Y-%m-%d')
put_time(model, timetoday, best_time)

In [None]:
tEnd = time.time()
print ("Spent %f minutes" % ((tEnd - tStart)/60))