## Predictor

What you need:
Specify the Endpoint of the deployed model and test data channel.

What it does:
Query the endpoint and get inference. Further analysis can be added.


In [None]:
import json
import sagemaker
import pandas as pd
import numpy as np
import time

In [None]:
# Interface of this file
# Input: S3 path of json lines
# Output: original time series list and predicted time series list

input_path = "s3://sagemaker-deepar20190120/sagemaker/wiki-test-deepar/data/test_subset/test.json"
freq = 'H'
prediction_length = 48
context_length = 118
ep = "DEMO-deepar-2019-02-07-03-16-16-206"
sagemaker_session = sagemaker.Session()

In [None]:
class DeepARPredictor(sagemaker.predictor.RealTimePredictor):

    def set_prediction_parameters(self, freq, prediction_length):
        """Set the time frequency and prediction length parameters. This method **must** be called
        before being able to use `predict`.

        Parameters:
        freq -- string indicating the time frequency
        prediction_length -- integer, number of predicted time points

        Return value: none.
        """
        self.freq = freq
        self.prediction_length = prediction_length

    def predict(self, ts, cat=None, encoding="utf-8", num_samples=100, quantiles=["0.1", "0.5", "0.9"]):
        """Requests the prediction of for the time series listed in `ts`, each with the (optional)
        corresponding category listed in `cat`.

        Parameters:
        ts -- list of `pandas.Series` objects, the time series to predict
        cat -- list of integers (default: None)
        encoding -- string, encoding to use for the request (default: "utf-8")
        num_samples -- integer, number of samples to compute at prediction time (default: 100)
        quantiles -- list of strings specifying the quantiles to compute (default: ["0.1", "0.5", "0.9"])

        Return value: list of `pandas.DataFrame` objects, each containing the predictions
        """
        prediction_times = [x.index[-1] + 1 for x in ts]
        req = self.__encode_request(ts, cat, encoding, num_samples, quantiles)
        res = super(DeepARPredictor, self).predict(req)
        return self.__decode_response(res, prediction_times, encoding)

    def __encode_request(self, ts, cat, encoding, num_samples, quantiles):
        instances = [series_to_obj(ts[k], cat[k] if cat else None) for k in range(len(ts))]
        configuration = {"num_samples": num_samples, "output_types": ["quantiles"], "quantiles": quantiles}
        http_request_data = {"instances": instances, "configuration": configuration}
        return json.dumps(http_request_data).encode(encoding)

    def __decode_response(self, response, prediction_times, encoding):
        response_data = json.loads(response.decode(encoding))
        list_of_df = []
        for k in range(len(prediction_times)):
            prediction_index = pd.DatetimeIndex(start=prediction_times[k], freq=self.freq,
                                                periods=self.prediction_length)
            list_of_df.append(pd.DataFrame(data=response_data['predictions'][k]['quantiles'], index=prediction_index))
        return list_of_df


# helper func
def series_to_obj(ts, cat=None):
    obj = {"start": str(ts.index[0]), "target": list(ts)}
    if cat is not None:
        obj["cat"] = cat
    return obj


# helper func
def series_to_jsonline(ts, cat=None):
    return json.dumps(series_to_obj(ts, cat))

In [None]:
predictor = DeepARPredictor(
    endpoint=ep,
    sagemaker_session=sagemaker_session,
    content_type="application/json"
)


predictor.set_prediction_parameters(freq, prediction_length)

In [None]:
# Input: json lines, string or file path or S3 url
# Output: list of original time series, list of predicted DATA FRAME
def get_predictions(input_path):
    df_ts = pd.read_json(input_path, lines=True)
    num_pt = len(df_ts.iloc[1, 1])
    num_ts = len(df_ts)
    ts_orig = []
    ts_to_pred = []
    for k in range(num_ts):
        t0 = df_ts.iloc[k, 0]
        data = df_ts.iloc[k, 1]
        index = pd.DatetimeIndex(start=t0, freq=freq, periods=num_pt)
        ts = pd.Series(data=data, index=index)
        ts_0val = ts.replace({np.nan: 0}, inplace = False)
        # Only replace missing values to 0 when doing plotting, NOT in prediction!
        ts_orig.append(ts_0val)
        # Predictor only recognizes null literal as missing values, not np.nan.
        ts.replace({np.nan: None}, inplace = True)
        ts_to_pred.append(ts[:-prediction_length])
        
    ts_predicted = []
    for series in ts_to_pred:
        ts_predicted.append(predictor.predict([series]))
        time.sleep(0.01)
    return ts_orig, ts_predicted


In [None]:
# big call here!!!
time_series_wiki, list_of_wiki_pred = get_predictions(input_path)


In [None]:
"""
Amazing Analysis and Plot Blow (not really)
"""
