In [None]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import sys
sys.path.append("../")
import warnings
warnings.filterwarnings('ignore')

from kats.consts import TimeSeriesData

# Lectura de datos

In [None]:
## https://www.kaggle.com/datasets/bobnau/daily-website-visitors/code
daily_website_df = pd.read_csv("../kats/data/daily-website-visitors.csv")
daily_website_df.head()
# Function to remove commas
def remove_commas(x):
    return float(x.replace(',', ''))
# Apply the preprocessing functions

daily_website_df['Date'] = pd.to_datetime(daily_website_df['Date'])
daily_website_df['Page.Loads'] = daily_website_df['Page.Loads'].apply(lambda x : remove_commas(x))
daily_website_df['Unique.Visits'] = daily_website_df['Unique.Visits'].apply(lambda x : remove_commas(x))
daily_website_df['First.Time.Visits'] = daily_website_df['First.Time.Visits'].apply(lambda x : remove_commas(x))
daily_website_df['Returning.Visits'] = daily_website_df['Returning.Visits'].apply(lambda x : remove_commas(x))
daily_website_df['Day.Of.Week']
df_mask=daily_website_df['Day.Of.Week']==2
daily_website_df_monday = daily_website_df[df_mask]


daily_website_df.drop(['Row','Day','Day.Of.Week','Returning.Visits'],inplace=True,axis=1)
daily_website_df.columns = ["time", "PageLoads","UniqueVisits", "FirstTimeVisits" ]

daily_website_df_monday.drop(['Row','Day','Day.Of.Week','Returning.Visits'],inplace=True,axis=1)
daily_website_df_monday.columns = ["time", "PageLoads","UniqueVisits", "FirstTimeVisits" ]


## First study

In [None]:

daily_website_df.describe().T

## First visualitation

In [None]:
daily_website_ts = TimeSeriesData(daily_website_df)

daily_website_ts_monday = TimeSeriesData(daily_website_df_monday)

In [None]:
# We can plot multiple time series from multi_ts by passing in the name of each value column we want to plot
daily_website_ts[0:365].plot(cols=["PageLoads","UniqueVisits", "FirstTimeVisits"])
plt.title('First year of Daily Website Data', fontsize = 30)
plt.xlabel('time(days)')
plt.ylabel('number')
plt.savefig('../images/DWD_first_year', bbox_inches = 'tight')

In [None]:
# We can plot multiple time series from multi_ts by passing in the name of each value column we want to plot
daily_website_ts.plot(cols=["PageLoads","UniqueVisits", "FirstTimeVisits"])
# fig = plt.figure()
plt.title('Daily Website Data', fontsize = 30)
plt.xlabel('time(days)')
plt.ylabel('number')
plt.savefig('../images/DWD', bbox_inches = 'tight')

In [None]:
# We can plot multiple time series from multi_ts by passing in the name of each value column we want to plot
daily_website_ts_monday.plot(cols=["PageLoads","UniqueVisits", "FirstTimeVisits"])
plt.title('Mondays of Daily Website Data', fontsize = 30)
plt.xlabel('time(weeks)')
plt.ylabel('number')
plt.savefig('../images/DWD_mondays', bbox_inches = 'tight')

In [None]:
daily_website_PL_ts = TimeSeriesData(daily_website_df.drop(['UniqueVisits','FirstTimeVisits'], axis=1))
daily_website_PL_ts

daily_website_UV_ts = TimeSeriesData(daily_website_df.drop(['PageLoads','FirstTimeVisits'], axis=1))
daily_website_UV_ts

daily_website_FTV_ts = TimeSeriesData(daily_website_df.drop(['PageLoads','UniqueVisits'], axis=1))
daily_website_FTV_ts

# First comparative : PageLoads

In [None]:
## HYPERPARAMETERS
import kats.utils.time_series_parameter_tuning as tpt
from kats.consts import ModelEnum, SearchMethodEnum, TimeSeriesData


from ax.core.parameter import ChoiceParameter, FixedParameter, ParameterType
from ax.models.random.sobol import SobolGenerator
from ax.models.random.uniform import UniformGenerator
warnings.simplefilter(action='ignore')

ts = daily_website_PL_ts

## SARIMA

In [None]:
from kats.models.sarima import SARIMAModel, SARIMAParams
parameters_grid_search = [
{
    "name": "p",
    "type": "choice",
    "values": list(range(1, 3)),
    "value_type": "int",
    "is_ordered": True,
},
{
    "name": "d",
    "type": "choice",
    "values": list(range(1, 3)),
    "value_type": "int",
    "is_ordered": True,
},
{
    "name": "q",
    "type": "choice",
    "values": list(range(1, 3)),
    "value_type": "int",
    "is_ordered": True,
}
]

parameter_tuner_grid = tpt.SearchMethodFactory.create_search_method(
    objective_name="evaluation_metric",
    parameters=parameters_grid_search,
    selected_search_method=SearchMethodEnum.GRID_SEARCH,
)


# Divide into an 80/20 training-test split
split = int(0.8*len(ts))

train_ts = ts[0:split]
test_ts = ts[split:]

# Fit an ARIMA model and calculate the MAE for the test data
def evaluation_function(params):
    sarima_params = SARIMAParams(
        p = params['p'],
        d = params['d'],
        q = params['q']
    )
    model = SARIMAModel(train_ts, sarima_params)
    model.fit()
    model_pred = model.predict(steps=len(test_ts))
    error = np.mean(np.abs(model_pred['fcst'].values - test_ts.value.values))
    return error


parameter_tuner_grid.generate_evaluate_new_parameter_values(
    evaluation_function=evaluation_function
)

# Retrieve parameter tuning results

parameter_tuning_results_grid = (
    parameter_tuner_grid.list_parameter_value_scores()
)

parameter_tuning_results_grid

min = parameter_tuning_results_grid['mean'].min()

parameter_tuning_results_grid[parameter_tuning_results_grid['mean']==min].parameters.values[0]

In [None]:
# create SARIMA param class
sarima_params = SARIMAParams(
    p = 2, 
    d = 1, 
    q = 2, 
    trend = 'ct', #both linear or constant
    seasonal_order=(1,0,1,365) # Becouse the data have a weekly behaivour
    )

## Linear y Cuadrático : sin parámetros

In [None]:
from kats.models.linear_model import LinearModelParams, LinearModel
from kats.models.quadratic_model import QuadraticModelParams, QuadraticModel
lin_params = LinearModelParams()
qua_params = QuadraticModelParams()

## Stlf

In [None]:
from kats.models.stlf import STLFModel, STLFParams
parameters_grid_search = [
{
    "name": "method",
    "type": "choice",
    "values": ['theta','linear','quadratic'] ,
    "value_type": "str",
    "is_ordered": True,
},
{
    "name": "m",
    "type": "choice",
    "values": [7,30,365],
    "value_type": "int",
    "is_ordered": True,
}
]

parameter_tuner_grid = tpt.SearchMethodFactory.create_search_method(
    objective_name="evaluation_metric",
    parameters=parameters_grid_search,
    selected_search_method=SearchMethodEnum.GRID_SEARCH,
)

# Fit an ARIMA model and calculate the MAE for the test data
def evaluation_function(params):
    stlf_params =STLFParams(
        method = params['method'],
        m = params['m']
    )
    model =STLFModel(train_ts, stlf_params)
    model.fit()
    model_pred = model.predict(steps=len(test_ts))
    error = np.mean(np.abs(model_pred['fcst'].values - test_ts.value.values))
    return error


parameter_tuner_grid.generate_evaluate_new_parameter_values(
    evaluation_function=evaluation_function
)

# Retrieve parameter tuning results

parameter_tuning_results_grid = (
    parameter_tuner_grid.list_parameter_value_scores()
)

parameter_tuning_results_grid

min = parameter_tuning_results_grid['mean'].min()

parameter_tuning_results_grid[parameter_tuning_results_grid['mean']==min].parameters.values[0]

In [None]:
stlf_params = STLFParams(
    method = 'theta',
    m = 7
    )

## Harmonic Regression

In [None]:
from kats.models.harmonic_regression import HarmonicRegressionModel, HarmonicRegressionParams
parameters_grid_search = [
{
    "name": "period",
    "type": "choice",
    "values": [7,30,360] ,
    "value_type": "float",
    "is_ordered": True,
},
{
    "name": "fourier_order",
    "type": "choice",
    "values": [4,5,6,7,8],
    "value_type": "int",
    "is_ordered": True,
}
]

parameter_tuner_grid = tpt.SearchMethodFactory.create_search_method(
    objective_name="evaluation_metric",
    parameters=parameters_grid_search,
    selected_search_method=SearchMethodEnum.GRID_SEARCH,
)

# Fit an ARIMA model and calculate the MAE for the test data
def evaluation_function(params):
    hg_params =HarmonicRegressionParams(
        period = params['period'],
        fourier_order = params['fourier_order']
    )
    model =HarmonicRegressionModel(train_ts, hg_params)
    model.fit()
    model_pred = model.predict(dates  = test_ts.time)
    error = np.mean(np.abs(model_pred['fcst'].values - test_ts.value.values))
    return error


parameter_tuner_grid.generate_evaluate_new_parameter_values(
    evaluation_function=evaluation_function
)

# Retrieve parameter tuning results

parameter_tuning_results_grid = (
    parameter_tuner_grid.list_parameter_value_scores()
)

parameter_tuning_results_grid

min = parameter_tuning_results_grid['mean'].min()

parameter_tuning_results_grid[parameter_tuning_results_grid['mean']==min].parameters.values[0]

In [None]:
hr_params =HarmonicRegressionParams(
        period = 30,
        fourier_order = 4
    )

## Evaluación de modelos

In [None]:
from kats.utils.backtesters import BackTesterSimple


backtester_simple_errors = {}
ALL_ERRORS = ['mae','mape', 'mase', 'mse', 'rmse', 'smape']

###############################################################################

backtester_simple_sarima = BackTesterSimple(
    error_methods=ALL_ERRORS,
    data=ts,
    params= sarima_params,
    train_percentage=80,
    test_percentage=20, 
    model_class=SARIMAModel)

backtester_simple_sarima.run_backtest()

backtester_simple_errors['sarima'] = {}
for error, value in backtester_simple_sarima.errors.items():
    backtester_simple_errors['sarima'][error] = value

# ###############################################################################
# backtester_simple_hr = BackTesterSimple(
#     error_methods=ALL_ERRORS,
#     data=ts,
#     params= hr_params,
#     train_percentage=80,
#     test_percentage=20, 
#     model_class=HarmonicRegressionModel)

# backtester_simple_hr.run_backtest()

# backtester_simple_errors['harmonic-regression'] = {}
# for error, value in backtester_simple_hr.errors.items():
#     backtester_simple_errors['harmonic-regression'][error] = value


###############################################################################
backtester_simple_lin = BackTesterSimple(
    error_methods=ALL_ERRORS,
    data=ts,
    params = lin_params,
    train_percentage=80,
    test_percentage=20, 
    model_class= LinearModel)

backtester_simple_lin.run_backtest()

backtester_simple_errors['linear'] = {}
for error, value in backtester_simple_lin.errors.items():
    backtester_simple_errors['linear'][error] = value

###############################################################################
backtester_simple_qua = BackTesterSimple(
    error_methods=ALL_ERRORS,
    data=ts,
    params = qua_params,
    train_percentage=80,
    test_percentage=20, 
    model_class= QuadraticModel)

backtester_simple_qua.run_backtest()

backtester_simple_errors['quadratic'] = {}
for error, value in backtester_simple_qua.errors.items():
    backtester_simple_errors['quadratic'][error] = value

###############################################################################
backtester_simple_stlf = BackTesterSimple(
    error_methods=ALL_ERRORS,
    data=ts,
    params = stlf_params,
    train_percentage=80,
    test_percentage=20, 
    model_class= STLFModel)

backtester_simple_stlf.run_backtest()

backtester_simple_errors['stlf'] = {}
for error, value in backtester_simple_stlf.errors.items():
    backtester_simple_errors['stlf'][error] = value


pd.DataFrame.from_dict(backtester_simple_errors) 

## Ensemble

In [None]:
from kats.models.ensemble.ensemble import EnsembleParams, BaseModelParams
from kats.models.ensemble.kats_ensemble import KatsEnsemble
model_ensemble_params = EnsembleParams(
            [
                BaseModelParams("sarima",sarima_params),
                BaseModelParams("linear", lin_params),
                BaseModelParams("quadratic", qua_params)
                # BaseModelParams("stlf", stlf_params)
            ]
        )

from kats.models.ensemble.bates_granger_ensemble import BatesGrangerEnsemble
from kats.models.ensemble.weighted_avg_ensemble import WeightedAvgEnsemble
from kats.models.ensemble.mean_ensemble import MeanEnsembleModel
from kats.models.ensemble.median_ensemble import MedianEnsembleModel


In [None]:
backtester_ensemble_errors = {}
ALL_ERRORS = ['mae','mape', 'mase', 'mse', 'rmse', 'smape']
###############################################################################
backtester_ensemble_median = BackTesterSimple(
    error_methods=ALL_ERRORS,
    data=ts,
    params= model_ensemble_params,
    train_percentage=80,
    test_percentage=20,
    model_class=MedianEnsembleModel)

backtester_ensemble_median.run_backtest()

backtester_ensemble_errors['median'] = {}
for error, value in backtester_ensemble_median.errors.items():
    backtester_ensemble_errors['median'][error] = value
###############################################################################
backtester_ensemble_mean = BackTesterSimple(
    error_methods=ALL_ERRORS,
    data=ts,
    params= model_ensemble_params,
    train_percentage=80,
    test_percentage=20, 
    model_class=MeanEnsembleModel)

backtester_ensemble_mean.run_backtest()

backtester_ensemble_errors['mean'] = {}
for error, value in backtester_ensemble_mean.errors.items():
    backtester_ensemble_errors['mean'][error] = value
###############################################################################
backtester_ensemble_weighted_average = BackTesterSimple(
    error_methods=ALL_ERRORS,
    data=ts,
    params= model_ensemble_params,
    train_percentage=80,
    test_percentage=20, 
    model_class=WeightedAvgEnsemble)

backtester_ensemble_weighted_average.run_backtest()

backtester_ensemble_errors['weighted_average'] = {}
for error, value in backtester_ensemble_weighted_average.errors.items():
    backtester_ensemble_errors['weighted_average'][error] = value
###############################################################################

backtester_ensemble_bates_granger = BackTesterSimple(
    error_methods=ALL_ERRORS,
    data=ts,
    params= model_ensemble_params,
    train_percentage=80,
    test_percentage=20, 
    model_class=BatesGrangerEnsemble)

backtester_ensemble_bates_granger.run_backtest()

backtester_ensemble_errors['bates&granger'] = {}
for error, value in backtester_ensemble_bates_granger.errors.items():
    backtester_ensemble_errors['bates&granger'][error] = value

pd.DataFrame.from_dict(backtester_ensemble_errors) 

In [None]:
pd.DataFrame.from_dict(backtester_ensemble_errors).round()


In [None]:
pd.DataFrame.from_dict(backtester_simple_errors).round()