In [112]:
import json
import joblib
import pickle
import os
import pandas as pd
import numpy as np
# from google.cloud import storage
from skopt import BayesSearchCV
from tsextract.feature_extraction.extract import build_features_forecast
from sklearn.preprocessing import StandardScaler
# from scripts.modeltrain.regressionalgorithm import RegressionAlgorithm

In [113]:
def scale_lagged_df(df, label_count):
    try:
        if label_count != 0:
            scaler_features = StandardScaler().fit(df[df.columns.values[:-label_count]])
            scaler_label = StandardScaler().fit(
                np.array(df[df.columns.values[-label_count:]]).reshape(-1, label_count))
        else:
            scaler_features = StandardScaler().fit(df[df.columns.values])
            scaler_label = scaler_features

        return scaler_features, scaler_label

    except Exception as e:
        print(str(e))

In [114]:
def inverse_transform(pred_series, last_actual_observation):
    try:
        series_undifferenced = pred_series.copy()
        series_undifferenced.iat[0] = series_undifferenced.iat[0] + last_actual_observation
        series_undifferenced = series_undifferenced.cumsum()

        return series_undifferenced
    except Exception as e:
        print(str(e))

In [115]:
def get_forecast(model, range_start, range_end):
    import datetime

    feature_arrays = []
    features_requests = model.features_requests
    train_df = model.train_df
    scaler_label = model.scaler_label
    labels = model.labels
    date_column = model.date_column
    freq = model.freq
    forecast_dfs = []

    for label in labels:
        df = pd.DataFrame({date_column: train_df.index, label: train_df[label]})
        features_request = [fr for fr in features_requests if fr.get('name') == label][0]
        features_request_copy = features_request.copy()
        features_request_copy.pop("name")
        target_series = df[label]
        build_forecast_df = build_features_forecast(target_series, features_request_copy, include_tzero=True)
        forecast_dfs.append(build_forecast_df)
        
    tail = list(set([df.shape[0] for df in forecast_dfs]))
    tail = tail[0]
    
    for build_forecast_df in forecast_dfs:
        sub_scaler_features, sub_scaler_label = scale_lagged_df(build_forecast_df, 0)
        scaled_features_forecast = sub_scaler_features.transform(build_forecast_df[-tail:])
        feature_arrays.append(scaled_features_forecast)
    
    merged_array = np.concatenate(tuple(feature_arrays), axis=1)
    pred = model.predict(merged_array[:, :-(len(labels))])
    pred = scaler_label.inverse_transform(np.array(pred).reshape(-1, 1))

    forecast_range = pd.date_range(start=train_df.index[-1] + datetime.timedelta(days=1), 
                                   end=train_df.index[-1] + datetime.timedelta(days=tail), 
                                   freq=freq)
    print(forecast_range)
    forecast_range = forecast_range.to_list()
    
    for i, l in enumerate(labels):
        
        df_pred = pd.DataFrame({date_column: forecast_range})
        df_pred['pred_' + l] = np.ravel(pred[:tail, [i]])
        try:
            df_pred.set_index(date_column, inplace=True)
        except:
            pass
        target_series = df_pred['pred_' + l]
        print(target_series)
        volatility = target_series.groupby(target_series.index.day).std()
        forecast_vol = target_series.index.map(lambda d: volatility.loc[d.day])
        df_pred['forecast_vol'] = forecast_vol
        target_series = target_series * forecast_vol

        target_series = inverse_transform(target_series, train_df[l][-1])
        df_pred['pred_' + l] = target_series
        
        temp_range_start = datetime.datetime.strptime(range_start, '%Y-%m-%d')
        temp_range_end = datetime.datetime.strptime(range_end, '%Y-%m-%d')
    
        if temp_range_start and temp_range_end:
            df_pred = df_pred.loc[(df_pred.index >= temp_range_start) & (df_pred.index <= temp_range_end)]

        # plt.rcParams["figure.figsize"] = (30, 7)
        # # plt.scatter(train_df.index, train_df[l])
        # # plt.plot(train_df.index, train_df[l], label=l + ' actual')
        # plt.scatter(df_pred.index, df_pred['pred_' + l], label=l + ' forecast')
        # plt.plot(df_pred.index, df_pred['pred_' + l], label=l + ' forecast')
        # plt.legend()
        # plt.show()

    return np.array(df_pred)

In [116]:
body = json.load(open('input.json'))
instances = body['instances']
input_payload = instances[0]

In [117]:
def download_model_from_gcp():
    error_message = "" 
    try:
        storage_client = storage.Client(project="hclsw-gcp-xai")
        bucket = storage_client.get_bucket("regression-model")

        destination_file_name = "model_storage/model.pkl"
        model_path = f"%s/model.pkl" % ("mlp")
        blob = bucket.blob(model_path)

        # Download the file to a destination
        blob.download_to_filename(destination_file_name)
        return error_message
    except Exception as ex:
        print (str(ex))
        error_message = str(ex)
    return error_message

In [118]:
# download_model_from_gcp()

In [119]:
# !pip install dill
# import dill
import sys
sys.path.append('cuburt')
# model = pickle.load(open('model_storage/model.pkl','rb'))
model = joblib.load("model_storage/model-new.sav")

In [120]:
output = get_forecast(model, '2017-05-31', '2017-06-30')

DatetimeIndex(['2016-05-01', '2016-05-02', '2016-05-03', '2016-05-04',
               '2016-05-05', '2016-05-06', '2016-05-07', '2016-05-08',
               '2016-05-09', '2016-05-10',
               ...
               '2017-12-21', '2017-12-22', '2017-12-23', '2017-12-24',
               '2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28',
               '2017-12-29', '2017-12-30'],
              dtype='datetime64[ns]', length=609, freq='D')
date
2016-05-01    1.510651
2016-05-02    1.206081
2016-05-03    1.236827
2016-05-04    1.501203
2016-05-05    1.216756
                ...   
2017-12-26   -0.154669
2017-12-27   -0.414170
2017-12-28   -0.274680
2017-12-29   -0.477388
2017-12-30   -0.242026
Name: pred_dcoilwtico, Length: 609, dtype: float64


  target_series = inverse_transform(target_series, train_df[l][-1])


In [122]:
{"predictions": output}

{'predictions': array([[82.71756488,  0.45656119],
        [82.56858234,  0.57937611],
        [82.6519082 ,  0.49779481],
        [82.12199045,  0.67051688],
        [81.93520312,  0.58459708],
        [81.9574459 ,  0.53025176],
        [81.81314965,  0.62611023],
        [81.8981058 ,  0.46717942],
        [81.44105496,  0.59462174],
        [81.39367086,  0.59611465],
        [81.21763002,  0.50186202],
        [81.06715696,  0.39594249],
        [80.92993111,  0.38818472],
        [80.6750434 ,  0.45855794],
        [80.49492185,  0.68028012],
        [80.28437593,  0.55959632],
        [80.13539161,  0.4246276 ],
        [79.77260254,  0.63647824],
        [79.55154511,  0.56161382],
        [79.34917629,  0.43474136],
        [79.08497812,  0.39127955],
        [78.75227337,  0.46630509],
        [78.52759952,  0.61800388],
        [78.0720009 ,  0.50246888],
        [77.96370443,  0.57400911],
        [77.81179811,  0.60445762],
        [77.52981281,  0.62410934],
        [76.9