In [1]:
from src.process import read_and_split_data, subsample_features
from src.model import train_candidates, cluster_predictions, compute_cluster_representatives, root_mean_square_error, plot_clustering, get_best_num_of_clusters, save_validation_predictions
from src.drift import PageHinkley, DataDrift
from src.predict import predict_n_steps_for_ensemble, predict_one_step_for_ensemble, get_weights, final_prediction_ensemble
import os
import pandas as pd
from config import EXPERIMENT_NAME, VALIDATION_WINDOW_SIZE, WEIGHTS_WINDOW_SIZE, EVALUATION_WINDOW, TARGET_INDEX, SUBSET_SIZE, K, MODEL
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from statsmodels.tsa.api import VAR
from sklearn.preprocessing import MinMaxScaler
from tslearn.clustering import TimeSeriesKMeans, silhouette_score
from tslearn.utils import to_time_series
import numpy as np

%load_ext autoreload
%autoreload 2



In [2]:
train, val, test = read_and_split_data(EXPERIMENT_NAME + '/NEW-DATA-1.T15-PREPROCESSED.csv', val_size=VALIDATION_WINDOW_SIZE)

In [4]:
# Hyperparameters
target_index = TARGET_INDEX

In [5]:
y_train = train.iloc[:, target_index]
X_train = train.drop(train.columns[target_index], axis=1)
X_test = test.drop(test.columns[target_index], axis=1)
X_test = X_test.iloc[WEIGHTS_WINDOW_SIZE:, :]
y_test = test.iloc[WEIGHTS_WINDOW_SIZE:, target_index]

X_scaler = MinMaxScaler()
X_train = X_scaler.fit_transform(X_train)
X_test = X_scaler.fit_transform(X_test)

In [9]:
model_RF = RandomForestRegressor(max_depth=2, random_state=0)
model_RF.fit(X_train, y_train)
predictions_RF = model_RF.predict(X_test)

In [11]:
root_mean_square_error(test.iloc[WEIGHTS_WINDOW_SIZE:, target_index].to_numpy(), predictions_RF)

0.021006391451659192

In [10]:
save_name = EXPERIMENT_NAME + '_Baseline_RF_results.csv'
path = F'/content/drive/MyDrive/Colab Notebooks/oafbs_results/{save_name}'
# path = os.path.join('results\sml10-dataset', save_name)

pd.DataFrame(predictions_RF).to_csv(path)

In [15]:
model_GBM = GradientBoostingRegressor()
model_GBM.fit(X_train, y_train)
predictions_GBM = model_GBM.predict(X_test)

In [16]:
root_mean_square_error(test.iloc[WEIGHTS_WINDOW_SIZE:, target_index].to_numpy(), predictions_GBM)

0.035405366917978236

In [17]:
save_name = EXPERIMENT_NAME + '_Baseline_GBM_results.csv'
path = F'/content/drive/MyDrive/Colab Notebooks/oafbs_results/{save_name}'
# path = os.path.join('results\sml10-dataset', save_name)

pd.DataFrame(predictions_GBM).to_csv(path)

In [6]:
var_y_train = y_train.to_numpy()
var_y_train = np.reshape(var_y_train, (-1, 1))
var_train = np.concatenate((X_train, var_y_train), axis=1)
var = VAR(var_train)
model = var.fit(1)
test_horizon = y_test.shape[0]
prediction_VAR = model.forecast(y=var_train[-1:], steps=test_horizon)
prediction_VAR = prediction_VAR[:, prediction_VAR.shape[1] - 1]

In [7]:
root_mean_square_error(test.iloc[WEIGHTS_WINDOW_SIZE:, target_index].to_numpy(), prediction_VAR)

0.07986407836143426

In [22]:
save_name = EXPERIMENT_NAME + '_Baseline_VAR_results.csv'
path = F'/content/drive/MyDrive/Colab Notebooks/oafbs_results/{save_name}'
# path = os.path.join('results\sml10-dataset', save_name)

pd.DataFrame(prediction_VAR).to_csv(path)

In [8]:
# ph = PageHinkley(delta=0.005, threshold=0.025)
# hoeffding = DataDrift(threshold=0.97)

In [9]:
# train_test = pd.concat([train, test], axis=0)

In [11]:
# train_size = train.shape[0]

In [None]:
# # Drift-aware VAR
# index = 0
# test_predictions = []
# evaluation_sliding_window_start = 0
# version = 1
# while index < (len(test)):
#     pred_at_t = model.forecast(y=var_train[-1:], steps=1)
#     pred_at_t = prediction_VAR[:, prediction_VAR.shape[1] - 1]
#     test_predictions.append(pred_at_t)
#     if(len(test_predictions) >= EVALUATION_WINDOW):
#         X = test.iloc[evaluation_sliding_window_start:evaluation_sliding_window_start+EVALUATION_WINDOW, test.columns!=test.columns[target_index]]
#         y = test.iloc[evaluation_sliding_window_start:evaluation_sliding_window_start+EVALUATION_WINDOW, target_index]
#         y_predicted = test_predictions[evaluation_sliding_window_start:evaluation_sliding_window_start+EVALUATION_WINDOW]
#         error = root_mean_square_error(y.to_numpy(), y_predicted)
#         print(f'Error for window [{evaluation_sliding_window_start}:{evaluation_sliding_window_start+EVALUATION_WINDOW}] is {error}')
#         ph.add_element(error)
#         print(f'Difference: {ph.sum - ph.minimum}')

#         glob_min = hoeffding.min
#         hoeffding.add_element(X, y)
#         print(f'Hoeffding global minimum: {glob_min}. Latest minimum: {hoeffding.min2}. Sample count: {hoeffding.sample_count}')
#         if ph.detected_change():
#             print('Change in error detected.')
#         if hoeffding.detected_change():
#             print('Change in data detected')
#         if ph.detected_change() | hoeffding.detected_change():
#             new_train = train_test.iloc[:train_size+index, ]
            
#             var_y_train = y_train.to_numpy()
#             var_y_train = np.reshape(var_y_train, (-1, 1))
#             var_train = np.concatenate((X_train, var_y_train), axis=1)
#             var = VAR(var_train)
#             model = var.fit(1)
#         evaluation_sliding_window_start += 1
#     index = index + 1