In [None]:
import os
from pathlib import Path
import pandas as pd
import time
import experiment_config_gpflow as config
import experiment_funcs_gpflow as ef
from fbprophet import Prophet
from analysis_gpflow import Analyst
import scipy.stats as sps
os.environ['NUMEXPR_MAX_THREADS'] = '12'

In [None]:
# Periodicity is '1D', '1W', '1M', '1Q', '1Y' etc
def prophet_fcast(train, test, periodicity):

   u, std = train.mean(), train.std()

   train_norm = (train - u)/std
   test_norm = (test - u)/std

   train_size = train_norm.shape[0]
   test_size = test_norm.shape[0]
   total_size = train_size + test_size

   dates = pd.date_range(start='2000-01-01', periods=total_size, freq=periodicity)
   dates_train = dates[:train_size]
   dates_test = dates[train_size:total_size]

   train_df = pd.DataFrame()
   train_df['ds'] = dates_train
   train_df['y'] = train_norm

   test_df = pd.DataFrame()
   test_df['ds'] = dates_test
   test_df['y'] = test_norm

   m_def = Prophet(interval_width=0.95)
   m_def = m_def.fit(train_df)
   forecast = m_def.predict(test_df)

   return forecast

In [None]:

# SEE experiment_config_gpflow.py for the experiment configurations
# I have added [:1] to the second and third loop so it runs only once (one file, one model)
df_results = []
for folder in config.DATA_FOLDERS:
    
    
    location = Path(config.DATA_ROOT).joinpath(folder)
    trainFiles = list(filter(lambda name: 'train' in name, os.listdir(location)))
    
    for file in trainFiles[:1]:
        
        # This is a lazy evaluator, it returns a function that will be called inside the training routine
        # if you want access to it just set data = data() 
        data = ef.read_experiment_data(folder, location, file)()
        idx = ef.get_idx(file)
        
        for kName in list(config.EXPERIMENTS.keys())[:1]:
            
            print(kName)
            
            # Experiment input
            inputs = ef.ExperimentInput(kName, data, config.EXPERIMENTS[kName],
                                        config.LIKELIHOOD_EXPERIMENT.get(kName, config.LIKELIHOOD_DEFAULT),
                                        config.MODEL_EXPERIMENT.get(kName, config.MODEL_DEFAULT),
                                        config.PRIORS_EXPERIMENT.get(kName, config.PRIOR_DEFAULT),
                                        config.SCORE.get(kName, config.SCORE_DEFAULT),
                                        config.INDUCING_EXPERIMENT.get(kName, config.INDUCING_DEFAULT),
                                        config.N_RESTARTS.get(kName, config.N_RESTARTS_DEF))
            
            # Models because returns a list with one model per checkpoint (number of restarts)
            # In the config file I set only the last one
            # Models is a list of (reg, goodness, in_, failed) -> gpflow model, score, input, failed training
            # Training parameters are inside model_gpflow.py
            models = ef.train_model(inputs)
            for m in models:
                # Saves serialized results in the file and outputs dataframe with results
                # This gets saved in the results folder
                out_results = ef.save_model_results(m)
                df_results.append(out_results)
        
        # Prophet
        # Electricity dataset has daily observations
        # data['ytrain'], data['ytest'] are already normalized
        forecast = prophet_fcast(data['ytrain'], data['ytest'], '1D')
        analyst = Analyst(None, data, 'prophet', 'waic', period=1)
        stdFcast = (forecast[['yhat_upper']].values-forecast[['yhat']].values)/sps.norm.ppf(0.975)
        out_prophet = analyst.measures_dict(forecast[['yhat']].values, stdFcast, data['ytest'], 'prophet')
        out_prophet['folder'] = folder
        out_prophet['idx'] = idx
        df_results.append(pd.DataFrame([out_prophet]))

In [None]:
results = pd.concat(df_results)

In [None]:
pd.concat(df_results).groupby('name').mean()[['crps', 'mae', 'rmse', 'smape']]