# Notebook for Final Model Training and Registery - Trainin on entire dataset using the best hyperparameters

In [None]:
# install packages on databricks
working_remotely = 'spark' not in locals()
print(f'working_remotely: {working_remotely}')

if not working_remotely:
    import subprocess
    subprocess.run(['pip', 'install', '-r', 'requirements.txt'])

In [None]:
import os
import shutil
import sys
import torch
import numpy as np
import pandas as pd
import random
from scipy import stats
from tqdm import tqdm_notebook as tqdm

import mlflow

import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px

import optuna
from optuna.integration import PyTorchLightningPruningCallback
from optuna.visualization import (
    plot_optimization_history,
    plot_contour,
    plot_param_importances,
)

from darts import TimeSeries, concatenate
from darts.dataprocessing.transformers import Scaler
from darts.models import TFTModel
from darts.metrics import mape, smape, mae, ope, rmse
from darts.utils.statistics import check_seasonality, plot_acf
from darts.datasets import AirPassengersDataset, IceCreamHeaterDataset
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.utils.likelihood_models import QuantileRegression, GumbelLikelihood, GaussianLikelihood

from darts import TimeSeries
from darts.utils.timeseries_generation import (
    gaussian_timeseries,
    linear_timeseries,
    sine_timeseries,
)
from darts.models import (
    TFTModel,
    LinearRegressionModel,
    LightGBMModel,
    RNNModel,
    TCNModel,
    TransformerModel,
    NBEATSModel,
    BlockRNNModel,
    VARIMA,
)

from torchmetrics import MeanAbsolutePercentageError, MeanAbsoluteError
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

import warnings
warnings.filterwarnings("ignore")

import logging

# define log
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import torch
torch.cuda.is_available()

In [None]:
os.chdir('../..')

In [None]:
# custom modules
import src.data_engineering.data_engineering as de
from src.utils import plotting

# Spark connection and tracking server

In [None]:
if working_remotely:
    # not on databricks
    # load env variables for tracking uri
    # and create spark connection
    from databricks.connect import DatabricksSession
    from dotenv import load_dotenv
    load_dotenv()
    
    spark = DatabricksSession.builder.remote(
      host       = f"{os.environ['DATABRICKS_HOST']}",
      token      = os.environ['DATABRICKS_TOKEN'],
      cluster_id = os.environ['CLUSTER_ID']
    ).getOrCreate()
    
else:
    # we're on databricks
    mlflow.set_tracking_uri("databricks")

In [None]:
type(spark)

# get data from spark

In [None]:
refresh_data = (
    'lmp_df.parquet' not in os.listdir() or
    'mtlf_df.parquet' not in os.listdir() or
    'mtrf_df.parquet' not in os.listdir()
)
refresh_data

In [None]:
## lmp
if refresh_data:
    query = 'SELECT * FROM sandbox_data_science.spp_weis.lmp_hourly'
    res = spark.sql(query).collect()
    df = spark.createDataFrame(res).toPandas()
    df.to_parquet('lmp_df.parquet')
    display(df.head())


In [None]:
## mtrf
if refresh_data:
    query = 'SELECT * FROM sandbox_data_science.spp_weis.mtrf'
    res = spark.sql(query).collect()
    df = spark.createDataFrame(res).toPandas()
    df.to_parquet('mtrf_df.parquet')
    display(df.head())

In [None]:
## mtlf
if refresh_data:
    query = 'SELECT * FROM sandbox_data_science.spp_weis.mtlf'
    res = spark.sql(query).collect()
    df = spark.createDataFrame(res).toPandas()
    df.to_parquet('mtlf_df.parquet')
    display(df.head())

# Load dataframes

In [None]:
price_df = pd.read_parquet('lmp_df.parquet')
mtlf_df = pd.read_parquet('mtlf_df.parquet').sort_values('GMTIntervalEnd')
mtrf_df = pd.read_parquet('mtrf_df.parquet').sort_values('GMTIntervalEnd')

# Feature Engineering

In [None]:
# lmp
psco_lmp_df, list_nodes_name, psco_price_df_long = de.get_psco_price_df(price_df)
lmp_series = de.create_psco_price_series(psco_lmp_df, list_nodes_name)

# mtlf series
mtlf_series, avg_act_series = de.create_mtlf_series(mtlf_df)
# mtlf_series, avg_act_series = de.create_mtlf_lmp_series(mtlf_df, psco_lmp_df, list_nodes_name)

# mtrf series
mtrf_ratio_df = de.add_enrgy_ratio_to_mtrf(mtlf_df, mtrf_df)
mtrf_ratio_df = de.add_enrgy_ratio_diff_to_mtrf(mtrf_ratio_df)
mtrf_series = de.create_mtrf_series(mtrf_ratio_df)

In [None]:
list_nodes_name

# Preprocess series

In [None]:
scalers = {}

start_time = pd.Timestamp('2023-04-02 00:00:00')
### TIME CHANGE ########################################################
input_chunk_length = 24*14
forecast_horizon = 24*7
# training_cutoff = pd.Timestamp("2023-06-01 06:00:00")
## first we need to modify the starting time in lpm series and then calculate the training cutoff
lmp_series_starts = de.lmp_series_start_time(lmp_series, start_time)
training_cutoff = de.get_train_cutoff(lmp_series_starts, forecast_horizon)
print(f'training_cutoff: {training_cutoff}')
########################################################################
# lmp_series_horizon_dropped = lmp_series_drop_horizon(lmp_series, start_time, forecast_horizon)
lmp_series_train, lmp_series_val, lmp_series_all = de.get_lmp_train_test_series(lmp_series_starts, training_cutoff, forecast_horizon, input_chunk_length)
(lmp_series_train_transformed, 
 lmp_series_val_transformed, 
 lmp_series_transformed,
 lmp_scaler) = de.scale_series(lmp_series_train, lmp_series_val, lmp_series_all, global_fit=True)
scalers['series'] = lmp_scaler

print(f'train start: {lmp_series_train.start_time()}')
print(f'train end: {lmp_series_train.end_time()}')
print(f'val start: {lmp_series_val.start_time()}')
print(f'val end: {lmp_series_val.end_time()}')


mtlf_series_train, mtlf_series_val, mtlf_series = de.get_mtlf_train_test_series(
    mtlf_series, start_time, training_cutoff, forecast_horizon, input_chunk_length
    )
# (mtlf_series_train_transformed, 
#  mtlf_series_val_transformed, 
#  mtlf_series_transformed, 
#  mtlf_scaler) = de.scale_series(mtlf_series_train, mtlf_series_val, mtlf_series)
# scalers['mtlf'] = mtlf_scaler

# print(f'train start: {mtlf_series_train.start_time()}')
# print(f'train end: {mtlf_series_train.end_time()}')
# print(f'val start: {mtlf_series_val.start_time()}')
# print(f'val end: {mtlf_series_val.end_time()}')


avg_act_series_train, avg_act_series_val, avg_act_series = de.get_avg_act_train_test_series(avg_act_series, start_time, training_cutoff)
(avg_act_series_train_transformed, 
 avg_act_series_val_transformed, 
 avg_act_series_transformed,
 past_scaler) = de.scale_series(avg_act_series_train, avg_act_series_val, avg_act_series)
scalers['pc'] = past_scaler

print(f'train start: {avg_act_series_train.start_time()}')
print(f'train end: {avg_act_series_train.end_time()}')
print(f'val start: {avg_act_series_val.start_time()}')
print(f'val end: {avg_act_series_val.end_time()}')


mtrf_series_train, mtrf_series_val, mtrf_series = de.get_mtrf_train_test_series(
    mtrf_series, start_time, training_cutoff, forecast_horizon, input_chunk_length
    )
# mtrf_series_train_transformed, mtrf_series_val_transformed, mtrf_series_transformed = de.scale_mtrf_series(mtrf_series_train, mtrf_series_val, mtrf_series)

# print(f'train start: {mtrf_series_train.start_time()}')
# print(f'train end: {mtrf_series_train.end_time()}')
# print(f'val start: {mtrf_series_val.start_time()}')
# print(f'val end: {mtrf_series_val.end_time()}')

In [None]:
past_cov_train = avg_act_series_train_transformed
past_cov_val = avg_act_series_val_transformed
past_cov = avg_act_series_transformed

In [None]:
# Concatenate future training covariates
future_covariates_train = concatenate([mtlf_series_train, mtrf_series_train], axis=1)
future_covariates_train.values().shape

In [None]:
# Concatenate future validation covariates
end_time = mtlf_series_val.end_time() + pd.Timedelta('1H')
mtrf_series_val_end_droped = mtrf_series_val.drop_after(end_time)

future_covariates_val = concatenate([mtlf_series_val, mtrf_series_val_end_droped], axis=1)
future_covariates_val.values().shape

In [None]:
# Concatenate the entire covariate series
mtrf_series_end_droped = mtrf_series.drop_after(end_time)

future_covariates = concatenate([mtlf_series, mtrf_series_end_droped], axis=1)
future_covariates.values().shape

In [None]:
(future_covariates_train_transformed, 
 future_covariates_val_transformed, 
 future_covariates_transformed, 
 future_scaler) = de.scale_series(future_covariates_train, future_covariates_val, future_covariates)
scalers['fc'] = future_scaler

print(f'train start: {future_covariates_train.start_time()}')
print(f'train end: {future_covariates_train.end_time()}')
print(f'val start: {future_covariates_val.start_time()}')
print(f'val end: {future_covariates_val.end_time()}')

In [None]:
lmp_train_all = []
for i in range(len(list_nodes_name)):
    lmp_train_all.append(lmp_series_train_transformed[list_nodes_name[i]])

lmp_val_all = []
for i in range(len(list_nodes_name)):
    lmp_val_all.append(lmp_series_val_transformed[list_nodes_name[i]])

lmp_all = []
for i in range(len(list_nodes_name)):
    lmp_all.append(lmp_series_transformed[list_nodes_name[i]])
    

In [None]:
scalers

# Final Model Training with MLFlow Experiment

## Set up MLFlow experiments 

In [None]:
from mlflow import MlflowClient
client = MlflowClient()

mlflow.set_tracking_uri("databricks")
user_name = 'Faezeh.Ebrahimi@xcelenergy.com'

# Initialize hyperparameter tuning experiment
experiment_name_tune = 'spp_weis_hyperparameter_tuning'
experiment_path_tune = f"/Users/{user_name}/{experiment_name_tune}"
mlflow.set_experiment(experiment_path_tune)
experiment_id_tune = client.get_experiment_by_name(experiment_path_tune).experiment_id

# Initialize final model training and loging experiment
experiment_name_final = 'spp_weis_final_model'
experiment_path_final = f"/Users/{user_name}/{experiment_name_final}"
mlflow.set_experiment(experiment_path_final)
experiment_id_final = client.get_experiment_by_name(experiment_path_final).experiment_id

## Select training series

In [None]:
# select series for training
num_training_series = min(10, len(lmp_train_all))
training_series = [lmp_train_all[i] for i in range(num_training_series)]
val_series = [lmp_val_all[i] for i in range(num_training_series)]
# for final training on all data
all_series = [lmp_all[i] for i in range(num_training_series)]
# list of series to make a prediction for
list_nodes_to_predict = [list_nodes_name[i] for i in range(num_training_series)]

In [None]:
# Parameters
num_samples = 200
figsize = (16, 5)
lowest_q, low_q, high_q, highest_q = 0.01, 0.1, 0.9, 0.99
label_q_outer = f"{int(lowest_q * 100)}-{int(highest_q * 100)}th percentiles"
label_q_inner = f"{int(low_q * 100)}-{int(high_q * 100)}th percentiles"

# default quantiles for QuantileRegression
quantiles = [
        0.01,
        0.05,
        0.1,
        0.15,
        0.2,
        0.25,
        0.3,
        0.4,
        0.5,
        0.6,
        0.7,
        0.75,
        0.8,
        0.85,
        0.9,
        0.95,
        0.99,
    ]

## Retreive the hyperparameter tuning experiment_id, the best run associated with it, and the best parameters

In [None]:

tuning_runs = mlflow.search_runs(experiment_ids= experiment_id_tune ,order_by=["metrics.trial_mae ASC"])

best_run_tune = client.get_run(tuning_runs.iloc[0].run_id)
best_params_tune = best_run_tune.data.params
best_params_tune

In [None]:
# MLFlow saves parameters keys and values in string format. we need to convert the values back to their original type.
def convert_params(key,value):
    if key == 'likelihood':
        return QuantileRegression(quantiles=quantiles)
    else:
        return eval(value)

best_params_converted = {k: convert_params(k, v) for k, v in best_params_tune.items()}

In [None]:
best_params_converted

## Define Custom Class for Darts TFT Global Model for model loading and prediction

In [None]:
import os
import pandas as pd
import mlflow
import mlflow.pyfunc
import time


class DartsTFTGlobalModel(mlflow.pyfunc.PythonModel):
    # def __init__(self, tft_model):
    #     self.model = tft_model

    # def load_context(self, context):
    #     self.model.load(context.artifacts["model"])

    def load_context(self, context):
        from darts.models import TFTModel
        import pickle
        # print(f'context.artifacts: {context.artifacts}')
        # device = 0 if torch.cuda.is_available() else -1
        self.model = TFTModel.load(context.artifacts["model"], map_location=torch.device('cpu'))
        with open(context.artifacts["scalers"], 'rb') as handle:
            self.scalers = pickle.load(handle)


    def __repr__(self):
        return self.model.__repr__()
        
    def __str__(self):
        return self.model.__str__()

    def predict(self, context, model_input):
        """
        Custom predict function for TFTModel.
        Args:
            model_input: pd.DataFrame. Containes the unscaled serie to make prediction for,
                         future covariate series, and past covariate series as columns of a dataframe.
        Returns:
            prediction: json-formatted time series in original scale.
        """
        # ".from_json() returns a float64 dtype"
        series = TimeSeries.from_json(model_input.iloc[:,0][0]).astype(np.float32) 
        past_covariates = TimeSeries.from_json(model_input.iloc[:,1][0]).astype(np.float32)
        future_covariates = TimeSeries.from_json(model_input.iloc[:,2][0]).astype(np.float32)
        forecast_horizon = model_input.iloc[:,3].item()
        num_samples = model_input.iloc[:,4].item()

        # scale time series
        
        series_scaled = TimeSeries.from_dataframe(
            series.pd_dataframe()/self.scalers['series']
            )

        past_covariates_scaled = TimeSeries.from_dataframe(
            past_covariates.pd_dataframe()/self.scalers['pc']
            )
        
        future_covariates_scaled = TimeSeries.from_dataframe(
            future_covariates.pd_dataframe()/self.scalers['fc']
            )

        pred_series = self.model.predict(
                series=series_scaled,
                past_covariates=past_covariates_scaled,
                future_covariates=future_covariates_scaled,
                n=forecast_horizon,
                num_samples=num_samples
            )
        
        pred_series = TimeSeries.from_dataframe(
            pred_series.pd_dataframe()*self.scalers['series']
            )

        return TimeSeries.to_json(pred_series)
    
#Changed the output format pf prediction function due to the folowing exception error:
#Exception: Request failed with status 400, {"error_code": "BAD_REQUEST", "message": "Encountered an unexpected error while converting model response to JSON.Error 'Object of type TimeSeries is not JSON serializable'"


In [None]:
# create an input example to infer signature
node_series = lmp_series[list_nodes_name[1]]
past_cov_series = avg_act_series
future_cov_series = future_covariates

data = {
    'series': [node_series.to_json()],
    'past_covariates': [past_cov_series.to_json()],
    'future_covariates': [future_cov_series.to_json()],
    'n': forecast_horizon,
    'num_samples': 200
}
df = pd.DataFrame(data)

ouput_example = 'the endpoint return json as a string'

In [None]:
# this signature will be logged with the model
# registered models must have signatures
from mlflow.models import infer_signature
darts_tft_signature = infer_signature(df, ouput_example)
darts_tft_signature

## Train and log model

In [None]:
import pickle

run_backtest = True
artifact_path = "/dbfs/FileStore/models/global_tft_model/final_model"

with mlflow.start_run(run_name='DartsGlobalModel_TFT_final_model', experiment_id=experiment_id_final) as run:
    print(f'run: {run}')

    tft_best_params = {
        "input_chunk_length": best_params_converted["input_chunk_length"],
        "output_chunk_length": best_params_converted["output_chunk_length"],
        "hidden_size": best_params_converted["hidden_size"],
        "lstm_layers": best_params_converted["lstm_layers"],
        "num_attention_heads": best_params_converted["num_attention_heads"],
        "dropout": best_params_converted["dropout"],
        "batch_size": best_params_converted["batch_size"],
        "n_epochs": best_params_converted["n_epochs"],
        "add_encoders": encoders,
        "likelihood": QuantileRegression(quantiles=quantiles), 
        "optimizer_kwargs": best_params_converted["optimizer_kwargs"],
        "random_state": 42,
        "torch_metrics": torch_metrics,
    }
    
    past_covariates = [past_cov for i in range(num_training_series)]
    future_covariates=[future_covariates_transformed for i in range(num_training_series)]

    tft_model = TFTModel(**tft_best_params)
   
    tft_model.fit(
        series=training_series,
        val_series=val_series,
        past_covariates=past_covariates,
        val_past_covariates=past_covariates,
        future_covariates=future_covariates,
        val_future_covariates=future_covariates,
        verbose=True)


    # log parameters for the run
    # need to add accuracy results here...
    final_train_val_only = False
    refit = True
    epochs_trained = tft_model.epochs_trained
    n_epochs_final = 3 

    params = tft_best_params
    params["refit"] = refit
    params["final_train_val_only"] = final_train_val_only
    params["epochs_trained"] = epochs_trained
    params["num_training_series"] = num_training_series
    params["forecast_horizon"] = tft_model.output_chunk_length
    params["n_epochs_final"] = n_epochs_final

    metrics = {}

    # backtesting takes a moment and generates a lot of output
    # we can turn it off for testing
    if run_backtest:
        # back test on validation data
        acc = tft_model.backtest(
            series=val_series,
            # series=all_series,
            past_covariates=past_covariates,
            future_covariates=future_covariates,
            retrain=False,
            forecast_horizon=forecast_horizon,
            stride=25,
            metric=[mae, rmse],
            verbose=False,
        )
        
        acc_df = pd.DataFrame(
            np.mean(acc, axis=0).reshape(1,-1),
            columns=['mae', 'rmse']
        )

        # log metrics
        metrics['mae'] = acc_df.mae[0]
        metrics['rmse'] = acc_df.rmse[0]


    # finish training on entire data set before logging model
    if final_train_val_only:
        final_train_series = val_series
    else:
        # for final training on all data
        final_train_series = all_series

    if refit:
        log.info('final training')
        tft_model.reset_model()
        tft_model.fit(
                series=final_train_series,
                past_covariates=past_covariates,
                future_covariates=future_covariates,
                verbose=True,
                epochs=n_epochs_final, # continue training
                )
    
    # final model back test on validation data
    acc = tft_model.backtest(
            series=val_series,
            past_covariates=past_covariates,
            future_covariates=future_covariates,
            retrain=False,
            forecast_horizon=forecast_horizon,
            stride=25,
            metric=[mae, rmse],
            verbose=False,
        )
    
    acc_df = pd.DataFrame(
        np.mean(acc, axis=0).reshape(1,-1),
        columns=['mae', 'rmse']
    )

    # log metrics
    metrics['mae_final'] = acc_df.mae[0]
    metrics['rmse_final'] = acc_df.rmse[0]
    mlflow.log_metrics(metrics)
    

    # set up path to save model
    model_name = "tft_model"
    model_path = '/'.join([artifact_path, model_name])

    shutil.rmtree(artifact_path, ignore_errors=True)
    os.makedirs(artifact_path)

    # log params
    params['model_name'] = model_name
    mlflow.log_params(params)

    # save tft model files (tft_model, tft_model.ckpt) 
    # and load them to artifacts when logging the model
    tft_model.save(model_path)

    scaler_name = 'scalers.pkl'
    scaler_path = '/'.join([artifact_path, scaler_name])
    with open(scaler_path, 'wb') as handle:
        pickle.dump(scalers, handle)


    # map model artififacts in dictionary
    artifacts = {
        'model': model_path,
        'model.ckpt': model_path+'.ckpt',
        'scalers': scaler_path,
    }

    # log model 
    mlflow.pyfunc.log_model(
        artifact_path='GlobalForecasting',
        code_path=[ 'notebooks/model_training/darts_tft_wrapper.py'],
        signature=darts_tft_signature,
        artifacts=artifacts,
        # model will get loaded from artifacts, we don't need instantiate with one
        python_model=DartsTFTGlobalModel(), 
        pip_requirements=["-r notebooks/model_training/requirements.txt"],
    )

    # # log model darts_tft_signature
    # mlflow.pyfunc.log_model(
    #     artifact_path='GlobalForecasting-final-model',
    #     signature=darts_tft_signature,
    #     artifacts=artifacts,
    #     # model will get loaded from artifacts, we don't need instantiate with one
    #     python_model=DartsTFTGlobalModel(), 
    #     pip_requirements="/dbfs/FileStore/required_packages/requirements.txt"
    # ) 

# Model Registery

# Load the model and make prediction

In [None]:
mtrf_series_end_droped = mtrf_series.drop_after(end_time)
future_covariates = concatenate([mtlf_series, mtrf_series_end_droped], axis=1)
future_covariates.values().shape

In [None]:
node_series = lmp_series[list_nodes_name[1]]
past_cov_series = avg_act_series
future_cov_series = future_covariates

data = {
    'series': [node_series.to_json()],
    'past_covariates': [past_cov_series.to_json()],
    'future_covariates': [future_cov_series.to_json()],
    'n': forecast_horizon,
    'num_samples': 200
}
df = pd.DataFrame(data)

In [None]:
from mlflow import MlflowClient

# Create an experiment with a name that is unique and case sensitive.
client = MlflowClient()

exp = client.get_experiment_by_name(experiment_path)
exp.experiment_id

runs = client.search_runs(
    experiment_ids = exp.experiment_id,
    order_by=['param.mae']
    )
runs

In [None]:
run.to_dictionary()

In [None]:
info_df = pd.DataFrame([r.to_dictionary()['info'] for r in runs])
metrics_df = pd.DataFrame([r.to_dictionary()['data']['metrics'] for r in runs])
params_df = pd.DataFrame([r.to_dictionary()['data']['params'] for r in runs])
model_log_df = pd.concat([info_df, metrics_df, params_df], axis=1)
model_log_df.sort_values('mae', ascending=True, inplace=True)
model_log_df

In [None]:
# TODO: need to update so only logs model if it's not already logged
best_run_id = model_log_df.run_id.iloc[0]
best_run_id

In [None]:
import mlflow
# logged_model = 'runs:/2eeff386c66f45d98c312a63cbd91557/GlobalForecasting'
logged_model = f'runs:/{best_run_id}/GlobalForecasting'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
pred = loaded_model.predict(df)
# pred

In [None]:
TimeSeries.from_json(pred).mean(axis=1).plot()

In [None]:
logged_model

In [None]:
#TODO: register model only if it hasn't been logged yet
catalog = "sandbox_data_science"
schema = "spp_weis"
model_name = "GlobalForecasting"

mlflow.set_registry_uri("databricks-uc")
mlflow.register_model(
    model_uri=logged_model,
    name=f"{catalog}.{schema}.{model_name}"
)

In [None]:
# for programtic deployments see
# https://docs.databricks.com/_extras/notebooks/source/machine-learning/model-serving-endpoint-python.html


# Prediction using API endpoints

In [None]:
node_series = lmp_series[list_nodes_name[1]]
past_cov_series = avg_act_series
future_cov_series = future_covariates

data = {
    'series': [node_series.to_json()],
    'past_covariates': [past_cov_series.to_json()],
    'future_covariates': [future_cov_series.to_json()],
    'n': forecast_horizon,
    'num_samples': 200
}
df = pd.DataFrame(data)

In [None]:
import os
import requests
import numpy as np
import pandas as pd
import json

def create_tf_serving_json(data):
  return {'inputs': {name: data[name].tolist() for name in data.keys()} if isinstance(data, dict) else data.tolist()}

def score_model(dataset):
  url = 'https://dbc-beada314-1494.cloud.databricks.com/serving-endpoints/spp_weis/invocations'
  api_token = os.environ['DATABRICKS_TOKEN']
  headers = {'Authorization': f'Bearer {api_token}', 'Content-Type': 'application/json'}
  ds_dict = {'dataframe_split': dataset.to_dict(orient='split')} if isinstance(dataset, pd.DataFrame) else create_tf_serving_json(dataset)
  data_json = json.dumps(ds_dict, allow_nan=True)
  response = requests.request(method='POST', headers=headers, url=url, data=data_json)
  if response.status_code != 200:
    raise Exception(f'Request failed with status {response.status_code}, {response.text}')
  return response.json()

In [None]:
endpoint_pred = score_model(df)
# endpoint_pred

In [None]:
preds = TimeSeries.from_json(endpoint_pred['predictions'])

In [None]:
preds.mean(axis=1).plot()

In [None]:
def get_quantile_df(preds):

    # get dataframe from preds TimeSeries
    plot_df = (
        preds.pd_dataframe()
        .reset_index()
        .melt(id_vars='time')
        .rename(columns={'component':'node'})
    )

    # remove sample numbers
    plot_df.node = ['_'.join(n.split('_')[:-1]) for n in plot_df.node]

    # get quanitles
    q_df = plot_df.groupby(['time', 'node']).quantile([0.1, 0.5, 0.9])

    # create columns from quantiles
    q_pivot = q_df.reset_index().pivot(columns='level_2', index=['time', 'node'])

    # level from columns after pivot
    q_pivot.columns = q_pivot.columns.droplevel()

    # remove index level name
    q_pivot.columns.name = None
    
    return q_pivot


In [None]:
q_df = get_quantile_df(preds)
q_df

In [None]:
def get_mean_df(preds):
    plot_df = (
        preds.pd_dataframe()
        .reset_index()
        .melt(id_vars='time')
        .rename(columns={'component':'node'})
    )

    # remove sample numbers
    plot_df.node = ['_'.join(n.split('_')[:-1]) for n in plot_df.node]

    # get quanitles
    mean_df = plot_df.groupby(['time', 'node']).mean()
    mean_df.rename(columns={'value':'mean'}, inplace=True)
    return mean_df


In [None]:
plot_df = get_mean_df(preds).merge(
    get_quantile_df(preds),
    left_index=True,
    right_index=True,
)