In [1]:
import pandas as pd
import matplotlib.pyplot as plt

import numpy as np

from transformers import TimeSeriesTransformerConfig, TimeSeriesTransformerForPrediction
import torch
from torch.utils.data import DataLoader
from datasets import Dataset, DatasetDict
from gluonts.transform import AddTimeFeatures
from gluonts.time_feature import time_features_from_frequency_str
from gluonts.time_feature import get_lags_for_frequency
from torch.optim import AdamW

In [2]:
data=pd.read_csv('./data/data_3.csv')


In [3]:
def mape(A,F,maskf_sub):
  sum=0
  length=0
  for i in range(len(A)):
   
    if maskf_sub[i]!=0:
      sum+=abs(A[i] - F[i]) / abs(A[i])
      length+=1
  if length>0:
    return 100/length*sum
  
  return 0
def smape(A, F,maskf_sub):
  sum=0
  length=0
  for i in range(len(A)):
    if maskf_sub[i]!=0:
      sum+=2 * np.abs(F[i] - A[i]) / (np.abs(A[i]) + np.abs(F[i]))
      length+=1
  if length>0:

    return 100/length * sum
  return 0

In [5]:
def get_mask(data):
    """
    data should in the form of pd.df
    gen a tenor with 0 and 1 to represent missing data
    """
    mask = ~data.isna().values
 
    mask_tensor = torch.tensor(mask, dtype=torch.float32)
    
    mask_tensor= mask_tensor.transpose(0,1)
    return mask_tensor

def get_time_feature(data):
    """
    data should in form of pd.df
    """

    ind=pd.PeriodIndex(data=data['ds'],freq='D')
    time_feature=ind.dayofyear.astype(float).values - 1
    return time_feature

def get_static_feature(inputsize):
    no=torch.arange(1,inputsize+1,1)
    no=no.unsqueeze(1)
    no=no.to(torch.long)
    return no

def normalize(data):
    mean_list=[]
    std_list=[]
    for i in data.columns:
        mean=data[i].mean()
        std=data[i].std()
        data[i]=(data[i]-mean)/std
        mean_list.append(mean)
        std_list.append(std)   
    return data,mean_list,std_list

In [6]:
def transformer(data,period,prediction_period,inputs,batchs,context_length):
    
    whole,mean_list,std_list=normalize(data.iloc[:,1:])

    time_feature=get_time_feature(data)
    past=torch.tensor([[time_feature[:period-prediction_period]]]*inputs)
    past=past.transpose(1,2)
    future=torch.tensor([[time_feature[period-prediction_period:period]]]*inputs)
    future=future.transpose(1,2)

    real=(torch.zeros((inputs,1))).to(torch.float32)

    static=get_static_feature(inputs)

    mask=get_mask(whole)
    maskf=mask[:,period-prediction_period:]
    maskp=mask[:,:period-prediction_period]
   
   
    
    whole=whole.fillna(0)
    whole=torch.tensor(whole.values)
    input=whole[:period-prediction_period].transpose(0,1)
    target=whole[period-prediction_period:].transpose(0,1)

    traindict={'target':target,'input':input,'past':past,'future':future,'maskp':maskp,'maskf':maskf,'sta':static,'real':real}
    train=Dataset.from_dict(traindict)
    train=train.with_format('torch')

    train_loader = DataLoader(train, batch_size=batchs, shuffle=False)
    lags_sequence = get_lags_for_frequency('1D')
    lags_sequence[15]=period-context_length-prediction_period


    prediction_length = prediction_period
    
    input_size = 1
    config = TimeSeriesTransformerConfig(
        prediction_length=prediction_length,
        context_length=context_length,
        input_size=input_size,
        encoder_layers=4,
        decoder_layers=4,
        d_model=22,
        num_static_categorical_features= 1,
        num_static_real_features= 1,
        num_time_features=1,
        cardinality=[inputs+1],
        lags_sequence=lags_sequence[:16]
    
        
        
    )
    model = TimeSeriesTransformerForPrediction(config)
    
    learning_rate = 0.001  
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    

    optimizer = AdamW(model.parameters(), lr=6e-4, betas=(0.9, 0.95), weight_decay=1e-1)

    model.train()
    for epoch in range(101):
        for ind,batch in enumerate(train_loader):
            optimizer.zero_grad()
            # print(batch,ind,"ind")
            outputs = model(
                static_categorical_features=batch["sta"]
                if config.num_static_categorical_features > 0
                else None,
                static_real_features=batch["real"]
                if config.num_static_real_features > 0
                else None,
                past_time_features=batch["past"],
                past_values=batch["input"],
                future_time_features=batch["future"],
                future_values=batch["target"],
                past_observed_mask=batch["maskp"],
                future_observed_mask=batch["maskf"],
            )
            loss = outputs.loss

            # Backpropagation
            loss.backward()
            optimizer.step()
        
        if epoch % 20 == 0:
            print(loss.item())

        model.eval()

    forecasts = []

    for batch in train_loader:
        outputs = model.generate(
            static_categorical_features=batch["sta"]
            if config.num_static_categorical_features > 0
            else None,
            static_real_features=batch["real"]
            if config.num_static_real_features > 0
            else None,
            past_time_features=batch["past"],
            past_values=batch["input"],
            future_time_features=batch["future"],
            past_observed_mask=batch["maskp"],
        )
        forecasts.append(outputs.sequences.cpu().numpy())
    forecasts = np.vstack(forecasts)
    foremean=np.mean(forecasts,1)
    test=train['target']

    mapel=[]
    smapel=[]
    for i in range(inputs):
        m=mape(foremean[i],test[i],maskf[i])
        mapel.append(float(m))
        sm=smape(foremean[i],test[i],maskf[i])
        smapel.append(float(sm))
    return foremean,test,mapel,smapel


In [7]:
get=transformer(data,data.shape[0],7,200,30,14)

  past=torch.tensor([[time_feature[:period-prediction_period]]]*inputs)


1.2641464471817017
0.4458208680152893
0.0040895212441682816
-0.22347934544086456
-0.38497358560562134
-0.4791935980319977


In [11]:
res= pd.DataFrame({'trans_mape':get[2],'trans_smape':get[3]})
res.to_csv('D:/timeseries/result/temp2.csv')

# result= pd.DataFrame({'name':data.columns[1:],'day1_pred':get[0][:,0], 'day1_actu':get[1][:,0]
                #    })
result= pd.DataFrame({'name':data.columns[1:],'day1/7_pred':get[0][:,0], 'day1/7_actu':get[1][:,0], 'day2/7_pred':get[0][:,1], 'day2/7_actu':get[1][:,1],
                       'day3/7_pred':get[0][:,2], 'day3/7_actu':get[1][:,2], 'day4/7_pred':get[0][:,3], 'day4/7_actu':get[1][:,3], 
                       'day5/7_pred':get[0][:,4], 'day5/7_actu':get[1][:,4], 'day6/7_pred':get[0][:,5], 'day6/7_actu':get[1][:,5],
                       'day7/7_pred':get[0][:,6], 'day7/7_actu':get[1][:,6]
                       })
result.to_csv('D:/timeseries/result/temp.csv')