In [None]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
import fbprophet
import pickle
import matplotlib.pyplot as plt
import time
import warnings
import itertools
from datetime import datetime, timedelta
warnings.filterwarnings("ignore")

pd.options.display.max_rows = 9999
pd.options.display.max_columns = 100

def show_ts(ts, forecast=None, forecast2 = None, title="Forecast Plot"):
    ax = ts.plot(label = "Observed", figsize=(10,3))
    if not (forecast is None):
        forecast.plot(ax=ax, label='Forecast')
        plt.legend()
    if not (forecast2 is None):
        forecast2.plot(ax=ax, label='Forecast')
        plt.legend()
        
    ax.set_xlabel('Date')
    ax.set_ylabel('Messages/Second')
    plt.title(title)
    plt.show()

def stan_init(m):
    # Retrieve parameters from a trained model.
    res = {}
    for pname in ['k', 'm', 'sigma_obs']:
        res[pname] = m.params[pname][0][0]
    for pname in ['delta', 'beta']:
        res[pname] = m.params[pname][0]
    return res
    
durations_df = pd.read_csv("results/durations.csv")

In [None]:
data_names = ["avazu","IoT","wiki_de","wiki_en","horton","retailrocket","taxi", "alibaba", "google"]

sampling_rates = ["1h"]
multipliers = [1]
forecast_horizons = [12]

train_test_split = 0.8

for data_name in data_names:
    for i,sampling_rate in enumerate(sampling_rates):
        print()
        print()
        print(data_name, sampling_rate)

        multiplier = multipliers[i]
        fh = forecast_horizons[i]
        df = pd.read_csv("../data/"+data_name+"_"+sampling_rate+".csv", parse_dates=True)
        df.columns =["ds","y"]

        train = df.iloc[:int(len(df)*train_test_split)]
        test = df.iloc[int(len(df)*train_test_split):]
        
        print("Train shape:", train.shape)
        print("Test shape:", test.shape)
        start_time = time.time()
        model = Prophet(yearly_seasonality=False, weekly_seasonality=False, uncertainty_samples=0).fit(train)
        end_time = time.time()
        training_duration = end_time-start_time

        durations_df.loc[(durations_df.dataset == data_name) & (durations_df.sampling_rate == sampling_rate)\
                         , "Prophet"] = training_duration
        
        try:
            results_df = pd.read_csv("results/"+ data_name + "_" + sampling_rate + "_results.csv", index_col=0, parse_dates=True)
        except:
            results_df = test.t.to_frame()
            
        future = test[["ds"]]

        results_df["Prophet"] = 0
        results_df["Prophet"].iloc[:fh] =  model.predict(future.iloc[:fh]).yhat.values
        print((datetime.now()).strftime("%d.%m.%Y %H:%M:%S"))

        # predicting and tuning
        i = 1
        start_time = time.time()
        while i < len(results_df):
            ts = train.append(test.iloc[:i])
            model = Prophet(yearly_seasonality=False, weekly_seasonality=False, uncertainty_samples=0).fit(ts, init = stan_init(model))
            try:
                results_df["Prophet"].iloc[i:i+fh] += model.predict(future.iloc[i:i+fh]).yhat.values
            except ValueError:
                results_df["Prophet"].iloc[i:] += model.predict(future.iloc[i:]).yhat.values
            i += 1
            if i % 50 ==0:
                print(i,"/",len(test))
                print((datetime.now()).strftime("%d.%m.%Y %H:%M:%S"))
                
        end_time = time.time()
        
        tuning_duration = (end_time - start_time) / len(results_df)
        durations_df.loc[(durations_df.dataset == data_name) & (durations_df.sampling_rate == sampling_rate)\
                         , "Prophet_tune"] = tuning_duration
        
        great_divider = list(range(1,len(results_df)+1))
        great_divider = list(map(lambda x: min(x,fh), great_divider))
        results_df["Prophet"] /= great_divider

        show_ts(results_df.t, results_df.Prophet)
        results_df.to_csv("results/"+data_name+"_"+sampling_rate+"_results.csv")
        durations_df.to_csv("results/durations.csv", index=False)