In [None]:
# !pip install neuralforecast pandas==2.2.1

In [None]:
from sqlalchemy import create_engine
import pandas as pd
import torch

# 创建数据库引擎
# database_url = "sqlite:////content/data.sqlite"
database_url = "sqlite:///../data/data.sqlite"

engine = create_engine(database_url)

# define hyper paras
use_standard_scaler = True
save_imputed_data = False
notebook_mode = 'train'
device = torch.device("cuda:0" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

model_name = 'Autoformer'
model_version = '1.0.0'
saved_table = 'sh_customs_daily_pred'

In [None]:
query = "select date as ds,global_entry,hkmo_entry,tw_entry from sh_customs_daily_imputed"
data = pd.read_sql_query(query, engine)
original_data = data.copy()

In [None]:
# melting,转换为长格式
df_long = pd.melt(data, id_vars=['ds'], var_name='unique_id', value_name='y')
df_long['ds'] = pd.to_datetime(df_long.ds)

## 使用cross validation选择模型

使用RTX 4070Ti GPU 训练3小时

In [None]:
import os

# 防止gpu内存不足中断训练
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [None]:
from neuralforecast.auto import AutoLSTM, AutoInformer, AutoAutoformer, AutoPatchTST, AutoNHITS
import pandas as pd

from neuralforecast.core import NeuralForecast

horizon = 60
models = [
    # baseline
    AutoLSTM(
        h=horizon,  # input size has a multiplier, 1~5
        # config=AutoLSTM.default_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=10,  # try 10
        backend='optuna'
    ),
    # transformer-based model1
    AutoInformer(
        h=horizon,  # input size has a multiplier, 1~5
        # config=AutoInformer.default_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=10,  # try 10
        backend='optuna'
    ),
    # transformer-based model2
    AutoAutoformer(
        h=horizon,  # input size has a multiplier, 1~5
        # config=AutoAutoformer.default_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=10,  # try 10
        backend='optuna'
    ),
    # transformer-based model3
    AutoPatchTST(
        h=horizon,  # input size has a multiplier, 1~5
        # config=AutoPatchTST.default_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=10,  # try 10
        backend='optuna'
    ),
    # mlp based model1
    AutoNHITS(
        h=horizon,  # input size has a multiplier, 1~5
        # config=AutoNHITS.default_config,
        # loss= 'mae', # default is MAE
        verbose=True,  # track progress
        num_samples=10,  # try 10
        backend='optuna'
    )
]

nf = NeuralForecast(
    models=models,
    freq='1D',
)
cross_val = nf.cross_validation(df_long, val_size=int(0.1 * len(df_long)), test_size=int(0.1 * len(df_long)),n_windows=None)

In [None]:
from matplotlib import pyplot as plt

Y_plot = cross_val[cross_val['unique_id'] == 'global_entry']
cutoffs = cross_val['cutoff'].unique()[::horizon]
Y_plot = Y_plot[cross_val['cutoff'].isin(cutoffs)]
Y_plot = Y_plot[-365:]

plt.figure(figsize=(20, 5))
plt.plot(Y_plot['ds'], Y_plot['y'], label='True')
plt.plot(Y_plot['ds'], Y_plot['Informer'], label='Informer')
plt.plot(Y_plot['ds'], Y_plot['Autoformer'], label='Autoformer')
plt.plot(Y_plot['ds'], Y_plot['PatchTST'], label='PatchTST')
plt.xlabel('Datestamp')
plt.ylabel('OT')
plt.grid()
plt.legend()


In [None]:
from neuralforecast.losses.numpy import mae

mae_informer = mae(cross_val['y'], cross_val['Informer'])
mae_autoformer = mae(cross_val['y'], cross_val['Autoformer'])
mae_patchtst = mae(cross_val['y'], cross_val['PatchTST'])

print(f'Informer: {mae_informer:.3f}')
print(f'Autoformer: {mae_autoformer:.3f}')
print(f'PatchTST: {mae_patchtst:.3f}')


In [None]:
import matplotlib.pyplot as plt

# Creating DataFrames for easy plotting
# 去掉不稳定的前十个epoch
informer_loss = pd.DataFrame(nf.models[0].train_trajectories[10:], columns=['Epoch', 'Train Loss'])
autoformer_loss = pd.DataFrame(nf.models[1].train_trajectories[10:], columns=['Epoch', 'Train Loss'])
patchtst_loss = pd.DataFrame(nf.models[2].train_trajectories[10:], columns=['Epoch', 'Train Loss'])

# Plotting
plt.figure(figsize=(20, 5))
plt.plot(informer_loss['Epoch'], informer_loss['Train Loss'], label='Informer')
plt.plot(autoformer_loss['Epoch'], autoformer_loss['Train Loss'], label='Autoformer')
plt.plot(patchtst_loss['Epoch'], patchtst_loss['Train Loss'], label='PatchTST')
plt.xlabel('Epoch')
plt.ylabel('Loss / Scaled MAE')
plt.title('Training Loss')
plt.legend()
plt.show()

In [None]:
df_pred = nf.predict(df_long, step_size=horizon, random_seed=42)
df_pred.head()

In [None]:
from matplotlib import pyplot as plt

Y_plot = df_pred[df_pred.index == 'global_entry']

plt.figure(figsize=(20, 5))
plt.plot(Y_plot['ds'], Y_plot['Informer'], label='Informer')
plt.plot(Y_plot['ds'], Y_plot['Autoformer'], label='Autoformer')
plt.plot(Y_plot['ds'], Y_plot['PatchTST'], label='PatchTST')
plt.xlabel('Datestamp')
plt.ylabel('OT')
plt.grid()
plt.legend()