In [1]:
from transformers.models.gpt2.modeling_gpt2 import GPT2Model
import argparse
from exp.exp_short_term_forecasting import Exp_Short_Term_Forecast

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# local_model_path = "..\..\huggingface\gpt2"
# model = GPT2Model.from_pretrained(local_model_path, output_attentions=True, output_hidden_states=True)

## set configs

In [3]:
parser = argparse.ArgumentParser(description='gpt4ts')

# basic config
parser.add_argument('--task_name', type=str, default='short_term_forecast',
                    help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
parser.add_argument('--is_training', type=int, default=0, help='status')
parser.add_argument('--model_id', type=str, default='m4_Monthly', help='model id')
parser.add_argument('--model', type=str, default='GPT4TS',
                    help='model name, options: [Autoformer, Transformer, TimesNet]')
parser.add_argument('--local_model_path', type=str, default='..\..\huggingface\gpt2', help='load pretrained model from local folder')

# data loader
parser.add_argument('--data', type=str, default='m4', help='dataset type')
parser.add_argument('--root_path', type=str, default='../dataset/m4', help='root path of the data file')
parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
parser.add_argument('--features', type=str, default='M',
                    help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
parser.add_argument('--freq', type=str, default='h',
                    help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')

# forecasting task
parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
parser.add_argument('--label_len', type=int, default=48, help='start token length')
parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')

# inputation task
parser.add_argument('--mask_rate', type=float, default=0.25, help='mask ratio')

# anomaly detection task
parser.add_argument('--anomaly_ratio', type=float, default=0.25, help='prior anomaly ratio (%)')

# model define
parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock')
parser.add_argument('--num_kernels', type=int, default=6, help='for Inception')
parser.add_argument('--enc_in', type=int, default=1, help='encoder input size')
parser.add_argument('--dec_in', type=int, default=1, help='decoder input size')
parser.add_argument('--c_out', type=int, default=1, help='output size')
parser.add_argument('--d_model', type=int, default=128, help='dimension of model')
parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
parser.add_argument('--d_ff', type=int, default=128, help='dimension of fcn')
parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
parser.add_argument('--factor', type=int, default=1, help='attn factor')
parser.add_argument('--distil', action='store_false',
                    help='whether to use distilling in encoder, using this argument means not using distilling',
                    default=True)
parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
parser.add_argument('--embed', type=str, default='timeF',
                    help='time features encoding, options:[timeF, fixed, learned]')
parser.add_argument('--activation', type=str, default='gelu', help='activation')
parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')

# optimization
parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
parser.add_argument('--itr', type=int, default=1, help='experiments times')
parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
parser.add_argument('--batch_size', type=int, default=16, help='batch size of train input data')
parser.add_argument('--patience', type=int, default=3, help='early stopping patience')
parser.add_argument('--learning_rate', type=float, default=0.002, help='optimizer learning rate')
parser.add_argument('--des', type=str, default='Exp', help='exp description')
parser.add_argument('--loss', type=str, default='SMAPE', help='loss function')
parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)

# GPU
parser.add_argument('--use_gpu', type=bool, default=False, help='use gpu')
parser.add_argument('--gpu', type=int, default=0, help='gpu')
parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus')

# de-stationary projector params
parser.add_argument('--p_hidden_dims', type=int, nargs='+', default=[128, 128],
                    help='hidden layer dimensions of projector (List)')
parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector')

# patching
parser.add_argument('--patch_size', type=int, default=1)
parser.add_argument('--stride', type=int, default=1)
parser.add_argument('--gpt_layers', type=int, default=6)
parser.add_argument('--ln', type=int, default=0)
parser.add_argument('--mlp', type=int, default=0)
parser.add_argument('--weight', type=float, default=0)
parser.add_argument('--percent', type=int, default=5)

args = parser.parse_args(args=[])

if args.is_training:
    for ii in range(args.itr):
        # setting record of experiments
        setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
            args.task_name,
            args.model_id,
            args.model,
            args.data,
            args.features,
            args.seq_len,
            args.label_len,
            args.pred_len,
            args.d_model,
            args.n_heads,
            args.e_layers,
            args.d_layers,
            args.d_ff,
            args.factor,
            args.embed,
            args.distil,
            args.des, ii)
else:
    ii = 0
    setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
        args.task_name,
        args.model_id,
        args.model,
        args.data,
        args.features,
        args.seq_len,
        args.label_len,
        args.pred_len,
        args.d_model,
        args.n_heads,
        args.e_layers,
        args.d_layers,
        args.d_ff,
        args.factor,
        args.embed,
        args.distil,
        args.des, ii)
    


## train and test by the original classes

In [4]:
# exp = Exp_Short_Term_Forecast(args)
# # exp.model = model
# exp.train(setting)
# exp.test(setting, test=1)

## load model

In [5]:
from models import GPT4TS
from data_provider.m4 import M4Meta
from torch import nn
from data_provider.data_factory import data_provider
from utils.losses import mape_loss, mase_loss, smape_loss
import os
import time
from utils.tools import EarlyStopping, adjust_learning_rate, visual
from torch import optim
import torch
import numpy as np
import pandas as pd
from utils.m4_summary import M4Summary

In [6]:
if args.data == 'm4':
    args.pred_len = M4Meta.horizons_map[args.seasonal_patterns]  # Up to M4 config
    args.seq_len = 2 * args.pred_len  # input_len = 2*pred_len
    args.label_len = args.pred_len
    args.frequency_map = M4Meta.frequency_map[args.seasonal_patterns]
model = GPT4TS.Model(args).float()
if args.use_multi_gpu and args.use_gpu:
    model = nn.DataParallel(model, device_ids=args.device_ids)

In [18]:
# model

## data loading, take M4 as the example

In [8]:
# values = np.load("../dataset/m4/training.npz", allow_pickle=True)
# m4_info = pd.read_csv("../dataset/m4/M4-info.csv")

In [9]:
# values[0]

In [10]:
# m4_info.head()

In [11]:
# m4_info.SP.value_counts()

In [12]:
# values[m4_info.SP.values == 'Monthly'].shape

In [13]:
# training_values = np.array(
#     [v[~np.isnan(v)] for v in
#         values[m4_info.SP.values == 'Monthly']])  # split different frequencies

In [14]:
train_data, train_loader = data_provider(args, 'train')
vali_data, vali_loader = data_provider(args, 'val')
test_data, test_loader = data_provider(args, 'test')

train 48000
val 48000
test 48000


## trainning set

In [15]:
def select_criterion(loss_name='MSE'):
    if loss_name == 'MSE':
        return nn.MSELoss()
    elif loss_name == 'MAPE':
        return mape_loss()
    elif loss_name == 'MASE':
        return mase_loss()
    elif loss_name == 'SMAPE':
        return smape_loss()
    
if args.use_gpu:
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) if not args.use_multi_gpu else args.devices
    device = torch.device('cuda:{}'.format(args.gpu))
    print('Use GPU: cuda:{}'.format(args.gpu))
else:
    device = torch.device('cpu')
    print('Use CPU')

path = os.path.join(args.checkpoints, setting)
if not os.path.exists(path):
    os.makedirs(path)

train_steps = len(train_loader)
early_stopping = EarlyStopping(patience=args.patience, verbose=True)

model_optim = optim.Adam(model.parameters(), lr=args.learning_rate)
criterion = select_criterion(args.loss)
mse = nn.MSELoss()

Use CPU


## trainning

In [16]:
def vali(args, model, train_loader, vali_loader, criterion):
    x, _ = train_loader.dataset.last_insample_window()
    y = vali_loader.dataset.timeseries
    x = torch.tensor(x, dtype=torch.float32).to(device)
    x = x.unsqueeze(-1)

    model.eval()
    with torch.no_grad():
        # decoder input
        B, _, C = x.shape
        dec_inp = torch.zeros((B, args.pred_len, C)).float().to(device)
        dec_inp = torch.cat([x[:, -args.label_len:, :], dec_inp], dim=1).float()
        # encoder - decoder
        outputs = torch.zeros((B, args.pred_len, C)).float()  # .to(self.device)
        id_list = np.arange(0, B, 500)  # validation set size
        id_list = np.append(id_list, B)
        for i in range(len(id_list) - 1):
            outputs[id_list[i]:id_list[i + 1], :, :] = model(x[id_list[i]:id_list[i + 1]], None,
                                                                    dec_inp[id_list[i]:id_list[i + 1]],
                                                                    None).detach().cpu()
        f_dim = -1 if args.features == 'MS' else 0
        outputs = outputs[:, -args.pred_len:, f_dim:]
        pred = outputs
        true = torch.from_numpy(np.array(y))
        batch_y_mark = torch.ones(true.shape)

        loss = criterion(x.detach().cpu()[:, :, 0], args.frequency_map, pred[:, :, 0], true, batch_y_mark)

    model.train()
    return loss

In [None]:
model.train()
time_now = time.time()
for epoch in range(args.train_epochs):
    iter_count = 0
    train_loss = []

    epoch_time = time.time()
    for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): # i is the batch id, x is the inputs, y is the labels
        iter_count += 1
        model_optim.zero_grad()
        batch_x = batch_x.float().to(device) # the inputs should be switched to float before inputting into model #[batch_size, sequence len 36, 1]

        batch_y = batch_y.float().to(device)
        batch_y_mark = batch_y_mark.float().to(device)

        # decoder input
        dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float()
        dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(device) #[batch_size, 36, 1]

        print("\tencoder输入batch的维度为{}".format(batch_x.shape))

        outputs = model(batch_x, None, dec_inp, None)
        print("\tmodel输出的维度为{}".format(outputs.shape))

        f_dim = -1 if args.features == 'MS' else 0
        outputs = outputs[:, -args.pred_len:, f_dim:]
        batch_y = batch_y[:, -args.pred_len:, f_dim:].to(device)

        batch_y_mark = batch_y_mark[:, -args.pred_len:, f_dim:].to(device)
        loss_value = criterion(batch_x, args.frequency_map, outputs, batch_y, batch_y_mark)
        loss_sharpness = mse((outputs[:, 1:, :] - outputs[:, :-1, :]), (batch_y[:, 1:, :] - batch_y[:, :-1, :]))
        loss = loss_value  # + loss_sharpness * 1e-5
        train_loss.append(loss.item())

        if (i + 1) % 100 == 0:
            print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
            speed = (time.time() - time_now) / iter_count
            left_time = speed * ((args.train_epochs - epoch) * train_steps - i)
            print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
            iter_count = 0
            time_now = time.time()

        loss.backward()
        model_optim.step()

    print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
    train_loss = np.average(train_loss)
    vali_loss = vali(args, model, train_loader, vali_loader, criterion)
    test_loss = vali_loss
    print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
        epoch + 1, train_steps, train_loss, vali_loss, test_loss))
    early_stopping(vali_loss, model, path) # save model parameters to path
    if early_stopping.early_stop:
        print("Early stopping")
        break

    adjust_learning_rate(model_optim, epoch + 1, args)

# best_model_path = path + '/' + 'checkpoint.pth'
# model.load_state_dict(torch.load(best_model_path)) # load model from saved checkpoint

## testing

In [None]:
x, _ = train_loader.dataset.last_insample_window()
y = test_loader.dataset.timeseries
x = torch.tensor(x, dtype=torch.float32).to(device)
x = x.unsqueeze(-1)

print('loading model')
model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))

folder_path = './test_results/' + setting + '/'
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

model.eval()
with torch.no_grad():
    B, _, C = x.shape
    dec_inp = torch.zeros((B, args.pred_len, C)).float().to(device)
    dec_inp = torch.cat([x[:, -args.label_len:, :], dec_inp], dim=1).float()
    # encoder - decoder
    outputs = torch.zeros((B, args.pred_len, C)).float().to(device)
    id_list = np.arange(0, B, 1)
    id_list = np.append(id_list, B)
    for i in range(len(id_list) - 1):
        outputs[id_list[i]:id_list[i + 1], :, :] = model(x[id_list[i]:id_list[i + 1]], None,
                                                                dec_inp[id_list[i]:id_list[i + 1]], None)

        if id_list[i] % 1000 == 0:
            print(id_list[i])

    f_dim = -1 if args.features == 'MS' else 0
    outputs = outputs[:, -args.pred_len:, f_dim:]
    outputs = outputs.detach().cpu().numpy()

    preds = outputs
    trues = y
    x = x.detach().cpu().numpy()

    for i in range(0, preds.shape[0], preds.shape[0] // 10):
        gt = np.concatenate((x[i, :, 0], trues[i]), axis=0)
        pd = np.concatenate((x[i, :, 0], preds[i, :, 0]), axis=0)
        visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))

print('test shape:', preds.shape)

# result save
folder_path = './m4_results/' + args.model + '/'
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

forecasts_df = pd.DataFrame(preds[:, :, 0], columns=[f'V{i + 1}' for i in range(args.pred_len)])
forecasts_df.index = test_loader.dataset.ids[:preds.shape[0]]
forecasts_df.index.name = 'id'
forecasts_df.set_index(forecasts_df.columns[0], inplace=True)
forecasts_df.to_csv(folder_path + args.seasonal_patterns + '_forecast.csv')

print(args.model)
file_path = './m4_results/' + args.model + '/'
if 'Weekly_forecast.csv' in os.listdir(file_path) \
        and 'Monthly_forecast.csv' in os.listdir(file_path) \
        and 'Yearly_forecast.csv' in os.listdir(file_path) \
        and 'Daily_forecast.csv' in os.listdir(file_path) \
        and 'Hourly_forecast.csv' in os.listdir(file_path) \
        and 'Quarterly_forecast.csv' in os.listdir(file_path):
    m4_summary = M4Summary(file_path, args.root_path)
    # m4_forecast.set_index(m4_winner_forecast.columns[0], inplace=True)
    smape_results, owa_results, mape, mase = m4_summary.evaluate()
    print('smape:', smape_results)
    print('mape:', mape)
    print('mase:', mase)
    print('owa:', owa_results)
else:
    print('After all 6 tasks are finished, you can calculate the averaged index')

## train example

In [172]:
# from transformers import GPT2Tokenizer, GPT2LMHeadModel
# local_model_path = "..\..\huggingface\gpt2"
# model_gpt2 = GPT2LMHeadModel.from_pretrained(local_model_path, output_attentions=True, output_hidden_states=True)
# tokenizer = GPT2Tokenizer.from_pretrained(local_model_path)

# text = "what did you do last weekend?"

# # 编码输入文本
# input_ids = tokenizer.encode(text, return_tensors='pt') # [1, 7]

# # 生成文本
# output = model_gpt2.generate(input_ids, max_length=100, num_return_sequences=1) # [1,100]


# # 解码生成的文本
# generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

# # 打印生成的文本
# print(generated_text)