In [None]:
!pip install pandas>=2.2 git+https://github.com/Nixtla/neuralforecast.git

In [None]:
from sqlalchemy import create_engine
import pandas as pd
import torch
import os

database_url = "sqlite:////content/data.sqlite"
# database_url = "sqlite:///../data/data.sqlite"
engine = create_engine(database_url)

# define hyper paras
use_standard_scaler = True
save_imputed_data = False
notebook_mode = 'train'
device = torch.device("cuda:0" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")


# 防止gpu内存不足中断训练
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
os.environ['NIXTLA_ID_AS_COL'] = '1'

In [None]:
query = "select date as ds,global_entry,hkmo_entry,tw_entry from sh_customs_daily_imputed"
original_data = pd.read_sql_query(query, engine)
# melting,转换为长格式
df_long = pd.melt(original_data, id_vars=['ds'], var_name='unique_id', value_name='y')
df_long['ds'] = pd.to_datetime(df_long.ds)

In [None]:
from neuralforecast.auto import AutoLSTM, AutoInformer, AutoAutoformer, AutoPatchTST, AutoiTransformer, AutoFEDformer
from ray import tune
from neuralforecast.core import NeuralForecast

horizon = 30
lstm_config = {
    "input_size": tune.choice([horizon * 4, horizon * 16, horizon * 32]),
    "h": horizon,
    "encoder_hidden_size": tune.choice([50, 100, 200]),
    "encoder_n_layers": tune.randint(1, 4),
    "context_size": tune.choice([5, 10, 50]),
    "decoder_hidden_size": tune.choice([64, 128, 256]),
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "max_steps": tune.choice([300, 500, 700]),
    "batch_size": tune.choice([16, 32]),
    "random_seed": tune.randint(1, 20),
}

informer_config = {
    "input_size": tune.choice([horizon, horizon * 2]),
    "h": horizon,
    "hidden_size": tune.choice([64, 128, 256]),
    "n_head": tune.choice([4, 8]),
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "scaler_type": tune.choice(["robust", "minmax"]),
    "max_steps": tune.choice([300, 500, 1000]),
    "batch_size": tune.choice([32, 64, 128]),
    "windows_batch_size": tune.choice([128, 256, 512]),
    "random_seed": tune.randint(1, 20),
}

autoformer_config = {
    "input_size": tune.choice([horizon, horizon * 2]),
    "h": horizon,
    "hidden_size": tune.choice([64, 128, 256]),
    "n_head": tune.choice([4, 8]),
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "scaler_type": tune.choice(["robust", "standard", "minmax"]),
    "max_steps": tune.choice([300, 500, 1000]),
    "batch_size": tune.choice([32, 64, 128]),
    "windows_batch_size": tune.choice([128, 256, 512]),
    "random_seed": tune.randint(1, 20),
}

pathtst_config = {
    "input_size": tune.choice([horizon, horizon * 2]),
    "h": horizon,
    "hidden_size": tune.choice([64, 128, 256]),
    "n_heads": tune.choice([4, 8]),
    "patch_len": tune.choice([16, 24]),
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "scaler_type": tune.choice([None, "robust", "standard"]),
    "revin": True,
    "revin_affine": tune.choice([True, False]),
    "max_steps": tune.choice([500, 700, 1000]),
    "batch_size": tune.choice([32, 64, 128]),
    "windows_batch_size": tune.choice([128, 256, 512]),
    "random_seed": tune.randint(1, 20),
}

itransformer_config = {
    "input_size": tune.choice([horizon, horizon * 2]),
    "h": horizon,
    "n_series": 3,
    "hidden_size": tune.choice([64, 128, 256]),
    "n_heads": tune.choice([4, 8]),
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "scaler_type": tune.choice(["robust", "standard", "minmax"]),
    "max_steps": tune.choice([500, 700, 1000]),
    "batch_size": tune.choice([32, 64, 128]),
    "loss": None,
    "random_seed": tune.randint(1, 20),
}

models = [
    # baseline
    AutoLSTM(
        h=horizon,  # input size has a multiplier, 1~5
        config=lstm_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=5,  # try 5
        backend='ray'
    ),
    # transformer-based model1
    AutoInformer(
        h=horizon,  # input size has a multiplier, 1~5
        config=informer_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=5,  # try 5
        backend='ray'
    ),
    # transformer-based model2
    AutoAutoformer(
        h=horizon,  # input size has a multiplier, 1~5
        config=autoformer_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=5,  # try 5
        backend='ray'
    ),
    # transformer-based model3
    AutoPatchTST(
        h=horizon,  # input size has a multiplier, 1~5
        config=pathtst_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=5,  # try 5
        backend='ray'
    ),
    # transformer-based model4
    AutoiTransformer(
        h=horizon,  # input size has a multiplier, 1~5
        n_series=3,
        config=itransformer_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=5,  # try 5
        backend='ray'
    )
]

nf = NeuralForecast(
    models=models,
    freq='1D',
)
cross_val = nf.cross_validation(df_long, val_size=int(0.1 * len(df_long)), test_size=int(0.1 * len(df_long)))

In [None]:
# cross_val.to_sql('model_evaluation', con=engine, if_exists='replace', index=True,index_label='unique_id')

In [None]:
# import pandas as pd

# cross_val = pd.read_sql('model_evaluation', con=engine)
# cross_val.head()
# horizon=30

In [None]:
from matplotlib import pyplot as plt

Y_plot = cross_val[cross_val['unique_id'] == 'global_entry']
cutoffs = cross_val['cutoff'].unique()[::horizon]
Y_plot = Y_plot[cross_val['cutoff'].isin(cutoffs)]
Y_plot = Y_plot[-365:]

plt.figure(figsize=(20, 5))
plt.plot(Y_plot['ds'], Y_plot['y'], label='True')
plt.plot(Y_plot['ds'], Y_plot['AutoLSTM'], label='LSTM')
plt.plot(Y_plot['ds'], Y_plot['AutoInformer'], label='Informer')
plt.plot(Y_plot['ds'], Y_plot['AutoAutoformer'], label='Autoformer')
plt.plot(Y_plot['ds'], Y_plot['AutoPatchTST'], label='PatchTST')
plt.plot(Y_plot['ds'], Y_plot['AutoiTransformer'], label='iTransformer')
plt.xlabel('Datestamp')
plt.ylabel('OT')
plt.grid()
plt.legend()


In [None]:
from neuralforecast.losses.numpy import mae, rmse, mape


model_names = ['AutoLSTM', 'AutoInformer', 'AutoAutoformer', 'AutoPatchTST', 'AutoiTransformer']


def eval_accuracy(df):
    unique_ids = df['unique_id'].unique()
    for unique_id in unique_ids:
        df_tmp = df[df['unique_id'] == unique_id]
        print(unique_id)
        y_true = df_tmp['y']
        for model_name in model_names:
            y_pred = df_tmp[model_name]
            print(
                f'{model_name} -- mae: {mae(y_true, y_pred)},rmse: {rmse(y_true, y_pred)}, mape: {mape(y_true, y_pred)}')


eval_accuracy(cross_val)

In [None]:
nf.save('./models', model_index=[0, 1, 2, 3, 4], overwrite=False, save_dataset=True)