## Predict with Amazon Forecast

Now we are going to use the previous data set to train a model with Amazon Forecast.



In [1]:
import boto3
from time import sleep
import subprocess

session = boto3.Session(region_name='us-east-1') # Check supported regions

forecast = session.client(service_name='forecast')
forecastquery = session.client(service_name='forecastquery')

In [6]:
# Check available algorithms
forecast.list_recipes()

{'RecipeNames': ['forecast_ARIMA',
  'forecast_DEEP_AR',
  'forecast_DEEP_AR_PLUS',
  'forecast_ETS',
  'forecast_MDN',
  'forecast_MQRNN',
  'forecast_NPTS',
  'forecast_PROPHET',
  'forecast_SQF'],
 'ResponseMetadata': {'RequestId': '3fbdf62a-6048-4c1f-be9f-6cfb0618236b',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Wed, 30 Jan 2019 15:07:31 GMT',
   'x-amzn-requestid': '3fbdf62a-6048-4c1f-be9f-6cfb0618236b',
   'content-length': '174',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

### 1. Train/Test split

To be able to evaluate the forecast quality later, we are going to leave out the last 15 days of data from the analysis.

In [7]:
import pandas as pd
df = pd.read_csv("target_time_series.csv", dtype = object,header=None)
df.head(3)

Unnamed: 0,0,1,2
0,96995,2013-02-04,1.0
1,96995,2013-03-31,1.0
2,96995,2013-04-06,1.0


In [8]:
df[1] = pd.to_datetime(df[1])

In [9]:
df = df[df[1]<'2017-08-01']

In [10]:
df.to_csv('target_time_series_train.csv',index=None,header=None)

### 2. Upload data to S3

In [15]:
s3 = session.client('s3')

accountId = boto3.client('sts').get_caller_identity().get('Account')

bucketName = 'amazon-forecast-chrisking-data-mg'# Update to your bucket name
key="favorita/target_time_series_train.csv"

#s3.upload_file(Filename="target_time_series_train.csv", Bucket=bucketName, Key=key)

roleArn = 'arn:aws:iam::%s:role/amazonforecast'%accountId

### 3. Create Dataset

Now we are going to create the data set schema in Amazon Forecast.

In [None]:
DATASET_FREQUENCY = "D" 
TIMESTAMP_FORMAT = "yyyy-MM-dd"

In [16]:
project = 'favorita_forecast2' # Replace this with a unique name here, make sure the entire name is < 30 characters.
datasetName= project+'_ds2'
datasetGroupName= project +'_gp2'
s3DataPath = "s3://"+bucketName+"/"+key

In [None]:
# Specify the schema of your dataset here. Make sure the order of columns matches the raw data files.
schema ={
   "Attributes":[
      {
         "AttributeName":"item_id",
         "AttributeType":"string"
      },
      {
         "AttributeName":"timestamp",
         "AttributeType":"timestamp"
      },
      {
         "AttributeName":"demand",
         "AttributeType":"float"
      }
   ]
}

response=forecast.create_dataset(
                    Domain="RETAIL",
                    DatasetType='TARGET_TIME_SERIES',
                    DataFormat='CSV',
                    DatasetName=datasetName,
                    DataFrequency=DATASET_FREQUENCY, 
                    TimeStampFormat=TIMESTAMP_FORMAT,
                    Schema = schema
                   )

In [None]:
forecast.create_dataset_group(DatasetGroupName=datasetGroupName,RoleArn=roleArn,DatasetNames=[datasetName])

### 4. Create Data Import Job
Brings the data into Amazon Forecast system ready to forecast from raw data. 

In [None]:
ds_import_job_response=forecast.create_dataset_import_job(DatasetName=datasetName,Delimiter=',', DatasetGroupName =datasetGroupName ,S3Uri= s3DataPath)

In [None]:
ds_versionId=ds_import_job_response['VersionId']
print(ds_versionId)

In [None]:
while True:
    dataImportStatus = forecast.describe_dataset_import_job(DatasetName=datasetName,VersionId=ds_versionId)['Status']
    print(dataImportStatus)
    if dataImportStatus != 'ACTIVE' and dataImportStatus != 'FAILED':
        sleep(30)
    else:
        break

### 5. Create Solution with your own forecast horizon

We are going to use a forecast horizon of 30 days. Even though we are going to evaluate predictions on the 15 days we left out of training.

In [11]:
predictorName= project+'_mqrnn2'

In [None]:
forecastHorizon = 30

In [None]:
forecast.list_recipes()

In [None]:
createPredictorResponse=forecast.create_predictor(RecipeName='forecast_MQRNN',DatasetGroupName= datasetGroupName ,PredictorName=predictorName, 
  ForecastHorizon = forecastHorizon)

In [None]:
predictorVerionId=createPredictorResponse['VersionId']

In [None]:
while True:
    predictorStatus = forecast.describe_predictor(PredictorName=predictorName,VersionId=predictorVerionId)['Status']
    print(predictorStatus)
    if predictorStatus != 'ACTIVE' and predictorStatus != 'FAILED':
        sleep(30)
    else:
        break

In [None]:
forecast.describe_predictor(PredictorName=predictorName,VersionId=predictorVerionId)

### 6. Get Accuracy Metrics

In [13]:
project = 'favorita_forecast2' # Replace this with a unique name here, make sure the entire name is < 30 characters.
predictorName= project+'_mqrnn2'

In [14]:
forecastquery.get_accuracy_metrics(PredictorName=predictorName)

{'ModelMetrics': {'MQRNN': {'Metrics': {'p10': '0.19168282136436146',
    'p50': '0.6720288740197131',
    'p90': '0.4864501466871068',
    'rmse': '5.0690687430412815'},
   'MetricsByBucket': []}},
 'ResponseMetadata': {'RequestId': '4c91609e-a68a-4a27-96c5-69e13d6e7c24',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',
   'date': 'Wed, 30 Jan 2019 15:47:10 GMT',
   'x-amzn-requestid': '4c91609e-a68a-4a27-96c5-69e13d6e7c24',
   'content-length': '171',
   'connection': 'keep-alive'},
  'RetryAttempts': 0}}

### 7. Deploy Predictor

In [None]:
forecast.deploy_predictor(PredictorName=predictorName)

In [None]:
deployedPredictorsResponse=forecast.list_deployed_predictors()
print(deployedPredictorsResponse)

In [None]:
while True:
    deployedPredictorStatus = forecast.describe_deployed_predictor(PredictorName=predictorName)['Status']
    print(deployedPredictorStatus)
    if deployedPredictorStatus != 'ACTIVE' and deployedPredictorStatus != 'FAILED':
        sleep(30)
    else:
        break
print(deployedPredictorStatus)