In [None]:
!pip install neuralforecast pandas==2.2.1

In [None]:
from sqlalchemy import create_engine
import pandas as pd
import torch

# database_url = "sqlite:////content/data.sqlite"
database_url = "sqlite:///../data/data.sqlite"
engine = create_engine(database_url)

# define hyper paras
use_standard_scaler = True
save_imputed_data = False
notebook_mode = 'train'
device = torch.device("cuda:0" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")


In [None]:
query = "select date as ds,global_entry,hkmo_entry,tw_entry from sh_customs_daily_imputed"
data = pd.read_sql_query(query, engine)
original_data = data.copy()

In [None]:
# melting,转换为长格式
df_long = pd.melt(data, id_vars=['ds'], var_name='unique_id', value_name='y')
df_long['ds'] = pd.to_datetime(df_long.ds)

## 使用cross validation选择模型

使用RTX 4070Ti GPU 训练3小时

In [None]:
import os

# 防止gpu内存不足中断训练
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [None]:
from neuralforecast.auto import AutoLSTM, AutoInformer, AutoAutoformer, AutoPatchTST, AutoiTransformer, AutoFEDformer
from ray import tune
from neuralforecast.core import NeuralForecast

horizon = 30
lstm_config = {
    "input_size": tune.choice([horizon * 4, horizon * 16, horizon * 32]),
    "h": None,
    "encoder_hidden_size": tune.choice([50, 100, 200]),
    "encoder_n_layers": tune.randint(1, 4),
    "context_size": tune.choice([5, 10, 50]),
    "decoder_hidden_size": tune.choice([64, 128, 256]),
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "max_steps": tune.choice([300, 500, 700]),
    "batch_size": tune.choice([16, 32]),
    "random_seed": tune.randint(1, 20),
}

informer_config = {
    "input_size": tune.choice([horizon, horizon * 2]),
    "h": None,
    "hidden_size": tune.choice([64, 128, 256]),
    "n_head": tune.choice([4, 8]),
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "scaler_type": tune.choice(["robust", "minmax"]),
    "max_steps": tune.choice([300, 500, 1000]),
    "batch_size": tune.choice([32, 64, 128]),
    "windows_batch_size": tune.choice([128, 256, 512]),
    "random_seed": tune.randint(1, 20),
}

autoformer_config = {
    "input_size": tune.choice([horizon, horizon * 2]),
    "h": None,
    "hidden_size": tune.choice([64, 128, 256]),
    "n_head": tune.choice([4, 8]),
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "scaler_type": tune.choice(["robust", "minmax"]),
    "max_steps": tune.choice([300, 500, 1000]),
    "batch_size": tune.choice([32, 64, 128]),
    "windows_batch_size": tune.choice([128, 256, 512]),
    "random_seed": tune.randint(1, 20),
}

pathtst_config = {
    "input_size": tune.choice([horizon, horizon * 2]),
    "h": None,
    "hidden_size": tune.choice([64, 128, 256]),
    "n_heads": tune.choice([4, 8]),
    "patch_len": tune.choice([16, 24]),
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "scaler_type": tune.choice(["robust", "minmax"]),
    "revin": True,
    "revin_affine": tune.choice([True, False]),
    "max_steps": tune.choice([500, 700, 1000]),
    "batch_size": tune.choice([32, 64, 128]),
    "windows_batch_size": tune.choice([128, 256, 512]),
    "random_seed": tune.randint(1, 20),
}

FEDformer_config = {
    "input_size": tune.choice([horizon, horizon * 2]),
    "h": None,
    "hidden_size": tune.choice([64, 128, 256]),
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "scaler_type": tune.choice([None, "robust", "standard"]),
    "max_steps": tune.choice([500, 700, 1000]),
    "batch_size": tune.choice([32, 64, 128]),
    "windows_batch_size": tune.choice([128, 256, 512]),
    "loss": None,
    "random_seed": tune.randint(1, 20),
}

itransformer_config = {
    "input_size": tune.choice([horizon, horizon * 2]),
    "h": None,
    "n_series": None,
    "hidden_size": tune.choice([64, 128, 256]),
    "n_heads": tune.choice([4, 8]),
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "scaler_type": tune.choice(["robust", "standard", "minmax"]),
    "max_steps": tune.choice([500, 700, 1000]),
    "batch_size": tune.choice([32, 64, 128]),
    "loss": None,
    "random_seed": tune.randint(1, 20),
}

models = [
    # baseline
    AutoLSTM(
        h=horizon,  # input size has a multiplier, 1~5
        config=lstm_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=5,  # try 5
        backend='ray'
    ),
    # transformer-based model1
    AutoInformer(
        h=horizon,  # input size has a multiplier, 1~5
        config=informer_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=5,  # try 5
        backend='ray'
    ),
    # transformer-based model2
    AutoAutoformer(
        h=horizon,  # input size has a multiplier, 1~5
        config=autoformer_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=5,  # try 5
        backend='ray'
    ),
    # transformer-based model3
    AutoPatchTST(
        h=horizon,  # input size has a multiplier, 1~5
        config=pathtst_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=5,  # try 5
        backend='ray'
    ),
    # transformer-based model4
    AutoiTransformer(
        h=horizon,  # input size has a multiplier, 1~5
        n_series=3,
        config=itransformer_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=5,  # try 5
        backend='ray'
    )
]

nf = NeuralForecast(
    models=models,
    freq='1D',
)
cross_val = nf.cross_validation(df_long, val_size=int(0.1 * len(df_long)), test_size=int(0.1 * len(df_long)),
                                n_windows=None)

In [None]:
# cross_val.to_sql('model_eval', con=engine, if_exists='replace', index=True,index_label='unique_id')

In [None]:
from matplotlib import pyplot as plt

Y_plot = cross_val[cross_val.index == 'global_entry']
Y_plot = Y_plot[-365:]

plt.figure(figsize=(20, 5))
plt.plot(Y_plot['ds'], Y_plot['y'], label='True')
plt.plot(Y_plot['ds'], Y_plot['AutoLSTM'], label='LSTM')
plt.plot(Y_plot['ds'], Y_plot['AutoInformer'], label='Informer')
plt.plot(Y_plot['ds'], Y_plot['AutoAutoformer'], label='Autoformer')
plt.plot(Y_plot['ds'], Y_plot['AutoPatchTST'], label='PatchTST')
plt.plot(Y_plot['ds'], Y_plot['AutoiTransformer'], label='iTransformer')
plt.xlabel('Datestamp')
plt.ylabel('OT')
plt.grid()
plt.legend()


In [None]:
from neuralforecast.losses.numpy import mae, rmse, mape


def eval_accuracy(model_name, y_true, y_pred):
    print(f'{model_name} -- mae: {mae(y_true, y_pred)},rmse: {rmse(y_true, y_pred)}, mape: {mape(y_true, y_pred)}')
    # return mae(y_true, y_pred), rmse(y_true, y_pred), mape(y_true, y_pred)


eval_accuracy('LSTM', cross_val['y'], cross_val['AutoLSTM'])
eval_accuracy('Informer', cross_val['y'], cross_val['AutoInformer'])
eval_accuracy('Autoformer', cross_val['y'], cross_val['AutoAutoformer'])
eval_accuracy('PatchTST', cross_val['y'], cross_val['AutoPatchTST'])
eval_accuracy('iTransformer', cross_val['y'], cross_val['AutoiTransformer'])

In [None]:
reccurrent = 6

df_pred = nf.predict(df_long, step_size=1, random_seed=42)
df_pred['unique_id'] = df_pred.index

In [None]:
df_pred.head()

In [None]:
from matplotlib import pyplot as plt

Y_plot = df_pred[df_pred.index == 'global_entry']

plt.figure(figsize=(20, 5))
plt.plot(Y_plot['ds'], Y_plot['AutoLSTM'], label='LSTM')
plt.plot(Y_plot['ds'], Y_plot['AutoInformer'], label='Informer')
plt.plot(Y_plot['ds'], Y_plot['AutoAutoformer'], label='Autoformer')
plt.plot(Y_plot['ds'], Y_plot['AutoPatchTST'], label='PatchTST')
plt.plot(Y_plot['ds'], Y_plot['AutoNHITS'], label='NHITS')
plt.xlabel('Datestamp')
plt.ylabel('OT')
plt.grid()
plt.legend()

In [None]:
nf.save('./models', model_index=[0, 1, 2, 3, 4], overwrite=False, save_dataset=True)