In [1]:
import pandas as pd
import numpy as np
import random
random.seed(42)
from datetime import datetime, timedelta
from sklearn import preprocessing
%load_ext autoreload
%autoreload 2
from GBRT_for_TSF.utils import evaluate_with_xgboost
from mpmf.utils import get_top_1_motif, get_top_k_motifs, compute_point_after_average

from sklearn.multioutput import MultiOutputRegressor
import xgboost as xgb

In [2]:
# ================== Parameters ==================
include_covariates = False  # True/False : whether to include features or not
include_motif_information = 4  # 0: no motif info; 1: Top-1 Motif; 2: Top-K Motifs (Direct); 3: Top-K Motifs (Average); 4: Top-K Motifs (Weighted Average)
k_motifs = 3
no_points_after_motif = 1  # number of points to consider
include_itself = False
# ================================================


num_periods_output = 24  # to predict
num_periods_input = 24  # input

ALL_Test_Data = []
ALL_Test_Prediction = []

file_name = "exchange_rate.txt"
data_path = r"../data/" + file_name
data = pd.read_csv(data_path, sep=",", header=None)

data = pd.DataFrame(data)
data = data.T

In [3]:
def New_preprocessing(TimeSeries):
    # print(len(TimeSeries))
    Data = []
    start_date = datetime(1990, 1, 1, 00, 00, 00)  # define start date
    for i in range(0, len(TimeSeries)):
        record = []
        record.append(TimeSeries[i])  # adding the xchangerate value
        record.append(start_date.month)
        record.append(start_date.day)
        # record.append(start_date.hour)
        # record.append(start_date.minute)
        record.append(start_date.weekday())
        record.append(start_date.timetuple().tm_yday)
        record.append(start_date.isocalendar()[1])
        # print(start_date.month,' ',start_date.day,' ',start_date.hour,' ',start_date.weekday(),' ',start_date.timetuple().tm_yday,' ',start_date.isocalendar()[1])
        start_date = start_date + timedelta(days=1)
        # print('year',start_date.year,'Month:',start_date.month,' day:',start_date.day,' hour:',start_date.hour)
        Data.append(record)
    ########## change list of lists to df ################
    headers = ["pems", "month", "day", "day_of_week", "day_of_year", "week_of_year"]
    Data_df = pd.DataFrame(Data, columns=headers)
    # print(Data_df)
    sub = Data_df.iloc[:, 1:]
    New_sub = preprocessing.minmax_scale(sub, feature_range=(-0.5, 0.5))
    # Normalize features to be from -0.5 to 0.5 as mentioned in the paper
    Normalized_Data_df = pd.DataFrame(
        np.column_stack([Data_df.iloc[:, 0], New_sub]), columns=headers
    )
    if include_motif_information:
        if include_motif_information == 1: # get_top_1_motif
            df_motif = get_top_1_motif(
                TimeSeries, num_periods_output, l=no_points_after_motif, include_itself=include_itself
            )
            interested_features = [
                c for c in df_motif.columns if (("idx" not in c) and ("dist" not in c))
            ]
            df_motif = df_motif[interested_features]
        if include_motif_information == 2: # get_top_k_motifs (Direct)
            df_motif = get_top_k_motifs(
                TimeSeries, num_periods_output, k=k_motifs, l=no_points_after_motif, include_itself=include_itself
            )
            interested_features = [
                c for c in df_motif.columns if (("idx" not in c) and ("dist" not in c))
            ]
            df_motif = df_motif[interested_features]
        if include_motif_information == 3: # get_top_k_motifs (Unweighted Average)
            df_motif = get_top_k_motifs(
                TimeSeries, num_periods_output, k=k_motifs, l=no_points_after_motif, include_itself=include_itself
            )
            interested_features = [c for c in df_motif.columns if ("idx" not in c)]
            df_motif = df_motif[interested_features]
            df_motif = compute_point_after_average(df_motif)
        if include_motif_information == 4: # get_top_k_motifs (Weighted Average)
            df_motif = get_top_k_motifs(
                TimeSeries, num_periods_output, k=k_motifs, l=no_points_after_motif, include_itself=include_itself
            )
            interested_features = [c for c in df_motif.columns if ("idx" not in c)]
            df_motif = df_motif[interested_features]
            df_motif = compute_point_after_average(df_motif, method="weighted")   
        # Normailize motif features to be from -0.5 to 0.5
        New_df_motif = preprocessing.minmax_scale(df_motif, feature_range=(-0.5, 0.5))
        # Convert the numpy array back to a DataFrame using the original columns and index
        New_df_motif = pd.DataFrame(New_df_motif, columns=df_motif.columns, index=df_motif.index)
        Normalized_Data_df = pd.concat([Normalized_Data_df, New_df_motif], axis=1)

    # print(Normalized_Data_df)
    #################################################################################################
    # cut training and testing
    train_split = np.floor(len(Normalized_Data_df) * 0.8)  # 60 % training
    # train_split=180
    train_split = int(
        train_split - (train_split % (num_periods_output + num_periods_input))
    )
    # print('-------------------',train_split)
    Train = Normalized_Data_df.iloc[0:train_split, :]
    Train = Train.values
    Train = Train.astype("float32")
    print("Traing length :", len(Train))
    total = len(Normalized_Data_df)
    test_split = np.floor(len(Normalized_Data_df) * 0.2)  # 20 % testing
    # test_split=20
    # print('-------------------test: ',test_split)
    test_split = int(
        test_split - (test_split % (num_periods_output + num_periods_input))
    )
    Test = Normalized_Data_df.iloc[(total - test_split - num_periods_input) :, :]
    Test = Test.values
    Test = Test.astype("float32")
    print("Traing length :", len(Test))
    # Number_Of_Features = 6
    Number_Of_Features = Normalized_Data_df.shape[1]
    ############################################ Windowing ##################################
    end = len(Train)
    start = 0
    next = 0
    x_batches = []
    y_batches = []
    count = 0
    # print('lennnn',len(Train))
    limit = max(num_periods_input, num_periods_output)
    while next + (limit) < end:
        next = start + num_periods_input
        x_batches.append(Train[start:next, :])
        y_batches.append(Train[next : next + num_periods_output, 0])
        start = start + 1
    y_batches = np.asarray(y_batches)
    y_batches = y_batches.reshape(-1, num_periods_output, 1)
    # print('Length of y batches :',len(y_batches),' ',num_periods_input,' ',num_periods_output)
    # print(x_batches)
    x_batches = np.asarray(x_batches)
    x_batches = x_batches.reshape(-1, num_periods_input, Number_Of_Features)
    # print('len x_batches ',len(x_batches))
    ############################################ Windowing ##################################
    end_test = len(Test)
    start_test = 0
    next_test = 0
    x_testbatches = []
    y_testbatches = []
    while next_test + (limit) < end_test:
        next_test = start_test + num_periods_input
        x_testbatches.append(Test[start_test:next_test, :])
        y_testbatches.append(Test[next_test : next_test + num_periods_output, 0])
        start_test = start_test + num_periods_input
    y_testbatches = np.asarray(y_testbatches)
    y_testbatches = y_testbatches.reshape(-1, num_periods_output, 1)
    x_testbatches = np.asarray(x_testbatches)
    x_testbatches = x_testbatches.reshape(-1, num_periods_input, Number_Of_Features)
    # print(' xTestbatches',len(x_testbatches),' yTestbatches',len(y_testbatches))
    return x_batches, y_batches, x_testbatches, y_testbatches

In [4]:
x_batches_Full = []
y_batches_Full = []
X_Test_Full = []
Y_Test_Full = []
for i in range(0, len(data)):
    print("Time series: ", i)
    x_batches = []
    y_batches = []
    X_Test = []
    Y_Test = []
    TimeSeries = data.iloc[i, :]
    # New_preprocessing(TimeSeries)
    # TimeSeries=np.arange(1,7588,1)
    # print(TimeSeries[7586:])
    x_batches, y_batches, X_Test, Y_Test = New_preprocessing(TimeSeries)
    for element1 in x_batches:
        x_batches_Full.append(element1)

    for element2 in y_batches:
        y_batches_Full.append(element2)

    for element5 in X_Test:
        X_Test_Full.append(element5)

    for element6 in Y_Test:
        Y_Test_Full.append(element6)


Time series:  0


OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


Traing length : 6048
Traing length : 1512
Time series:  1
Traing length : 6048
Traing length : 1512
Time series:  2
Traing length : 6048
Traing length : 1512
Time series:  3
Traing length : 6048
Traing length : 1512
Time series:  4
Traing length : 6048
Traing length : 1512
Time series:  5
Traing length : 6048
Traing length : 1512
Time series:  6
Traing length : 6048
Traing length : 1512
Time series:  7
Traing length : 6048
Traing length : 1512


In [5]:
xgboost_parameters = {
    "learning_rate": 0.07,
    "n_estimators": 80,
    "max_depth": 3,
    "min_child_weight": 1,
    "gamma": 0.0,
    "subsample": 0.97,
    "colsample_bytree": 0.97,
    "scale_pos_weight": 1,
    "random_state": 42,
    "verbosity": 1, # 0=Silent, 1=Warning, 2=Info, 3=Debug
}
# model = xgb.XGBRegressor(
#     learning_rate=0.07,
#     n_estimators=80,
#     max_depth=3,
#     min_child_weight=1,
#     gamma=0.0,
#     subsample=0.97,
#     colsample_bytree=0.97,
#     scale_pos_weight=1,
#     seed=42,
#     silent=False,
# )

In [6]:
rmse, wape, mae, mape = evaluate_with_xgboost(
    num_periods_output,
    x_batches_Full,
    y_batches_Full,
    X_Test_Full,
    Y_Test_Full,
    xgboost_parameters,
    (include_covariates or (include_motif_information > 0)),
)

In [7]:
print("RMSE: ", rmse)
print("WAPE: ", wape)
print("MAE: ", mae)
print("MAPE: ", mape)

RMSE:  0.019501902
WAPE:  0.015671477
MAE:  0.0117141
MAPE:  0.04732263


In [8]:
import datetime

print(f"This notebook was last run end-to-end on: {datetime.datetime.now()}\n")
###
###
# ###
# RMSE:  0.019549614
# WAPE:  0.015724955
# #~MAE:  0.011754074
# MAPE:  0.0473708

# RMSE:  0.019542774
# WAPE:  0.015714908
# #~MAE:  0.011746564
# MAPE:  0.047434438

# RMSE:  0.019506397
# WAPE:  0.015675997
# MAE:  0.01171748
# MAPE:  0.047415353

# RMSE:  0.019506397
# WAPE:  0.015675997
# MAE:  0.01171748
# MAPE:  0.047415353

# RMSE:  0.019572815
# WAPE:  0.015771963
# MAE:  0.011789212
# MAPE:  0.047519498

# RMSE:  0.019504154
# WAPE:  0.015673101
# MAE:  0.011715314
# MAPE:  0.04736448

This notebook was last run end-to-end on: 2025-12-26 16:44:05.986700

