In [3]:
!pip install mlforecast
!pip install statsforecast
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler

from prophet import Prophet

import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
pred_length = [96, 192, 336, 720]

## GBR

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingRegressor
from mlforecast import MLForecast
from utilsforecast.losses import rmse, mae

# List of datasets
datasets = ['electricity.csv', 'traffic.csv', 'weather.csv', 'ETTh1.csv', 'ETTm1.csv', 'exchange_rate.csv']
horizons = [96, 192, 336, 720] 
frequency_mapping = {
    'electricity.csv': 'H',     
    'traffic.csv': 'H',         
    'exchange_rate.csv': 'D',   
    'weather.csv': '10min',        
    'ETTh1.csv': 'H',          
    'ETTm1.csv': '15min'            
}

def process_dataset(file_name, horizons):
    data = pd.read_csv(file_name)
    if file_name == 'electricity.csv' or file_name == 'traffic.csv':
      data = data.iloc[-5000:, :]
    data['ds'] = pd.to_datetime(data['date'])
    data.drop(columns=['date'], inplace=True)
    
    numeric_columns = data.columns.difference(['ds'])
    data[numeric_columns] = MinMaxScaler().fit_transform(data[numeric_columns])
    
    df_melted = data.melt(id_vars=['ds'], var_name='unique_id', value_name='y')
    
    freq = frequency_mapping.get(file_name, 'D')
    
    results = []
    
    for horizon in horizons:
        models = [GradientBoostingRegressor()]
        
        mlf = MLForecast(
            models=models,
            freq=freq,
            date_features=['dayofweek', 'year', 'month'],
            lags = [2, 7],
        )
        
        crossvalidation_df = mlf.cross_validation(
            df=df_melted,
            h=horizon,
            n_windows= 10,
        )
        
        crossvalidation_df['id_cutoff'] = crossvalidation_df['unique_id'] + '_' + crossvalidation_df['cutoff'].astype(str)
        
        cv_rmse = rmse(crossvalidation_df, models=['GradientBoostingRegressor'], id_col='id_cutoff')['GradientBoostingRegressor'].mean()
        cv_mae = mae(crossvalidation_df, models=['GradientBoostingRegressor'], id_col='id_cutoff')['GradientBoostingRegressor'].mean()
        
        result = {
            'dataset': file_name,
            'horizon': horizon,
            'rmse': cv_rmse,
            'mae': cv_mae,
            'std_dev_rmse': crossvalidation_df.groupby('id_cutoff').apply(lambda x: np.std(x['GradientBoostingRegressor'])).mean(),
            'std_dev_mae': crossvalidation_df.groupby('id_cutoff').apply(lambda x: np.std(x['GradientBoostingRegressor'])).mean(),
        }
        results.append(result)
    
    return results

# Process all datasets and store results
all_results = []

for dataset in datasets:
    results = process_dataset(dataset, horizons)
    all_results.extend(results)

# Convert results to DataFrame 
results_df = pd.DataFrame(all_results)
print(results_df)

# Save the results to a CSV file
results_df.to_csv('results_summary.csv', index=False)

## ARIMA

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from statsforecast.models import AutoARIMA
from statsforecast import StatsForecast
from utilsforecast.losses import rmse, mae

# List of datasets
datasets = ['exchange_rate.csv', 'ETTh1.csv', 'ETTm1.csv', 'weather.csv', 'electricity.csv', 'traffic.csv']
horizons = [96, 192, 336, 720] 
frequency_mapping = {
    'electricity.csv': 'H',     
    'traffic.csv': 'H',         
    'exchange_rate.csv': 'D',   
    'weather.csv': '10min',        
    'ETTh1.csv': 'H',          
    'ETTm1.csv': '15min'            
}

def process_dataset(file_name, horizons):
    data = pd.read_csv(file_name)
    data = data.iloc[-5000:, :]
    data['ds'] = pd.to_datetime(data['date'])
    data.drop(columns=['date'], inplace=True)
    
    numeric_columns = data.columns.difference(['ds'])
    data[numeric_columns] = MinMaxScaler().fit_transform(data[numeric_columns])
    
    df_melted = data.melt(id_vars=['ds'], var_name='unique_id', value_name='y')
    
    freq = frequency_mapping.get(file_name, 'D')
    
    results = []
    
    for horizon in horizons:
        models = [AutoARIMA(),]
        
        sf = StatsForecast(
            models=models,
            freq=freq,
            df = df_melted,
        )
        
        crossvalidation_df = sf.cross_validation(
            df=df_melted,
            h=horizon,
            step_size = horizon,
            n_windows= 5,
        )
        
        crossvalidation_df['id_cutoff'] = crossvalidation_df.index + '_' + crossvalidation_df['cutoff'].astype(str)
        
        cv_rmse = rmse(crossvalidation_df, models=['AutoARIMA'], id_col='id_cutoff')['AutoARIMA'].mean()
        cv_mae = mae(crossvalidation_df, models=['AutoARIMA'], id_col='id_cutoff')['AutoARIMA'].mean()
        
        result = {
            'dataset': file_name,
            'horizon': horizon,
            'rmse': cv_rmse,
            'mae': cv_mae,
            'std_dev_rmse': crossvalidation_df.groupby('id_cutoff').apply(lambda x: np.std(x['AutoARIMA'])).mean(),
            'std_dev_mae': crossvalidation_df.groupby('id_cutoff').apply(lambda x: np.std(x['AutoARIMA'])).mean(),
        }
        results.append(result)
    
    return results

# Process all datasets and store results
all_results = []

for dataset in datasets:
    results = process_dataset(dataset, horizons)
    all_results.extend(results)

# Convert results to DataFrame
results_df = pd.DataFrame(all_results)
print(results_df)

# Save the results to a CSV file
results_df.to_csv('results_summary.csv', index=False)

## Naive

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from statsforecast.models import Naive
from statsforecast import StatsForecast
from utilsforecast.losses import rmse, mae

# List of datasets
datasets = ['exchange_rate.csv', 'ETTh1.csv', 'ETTm1.csv',  'electricity.csv', 'traffic.csv', 'weather.csv']
horizons = [96, 192, 336, 720] 
frequency_mapping = {
    'electricity.csv': 'H',     
    'traffic.csv': 'H',         
    'exchange_rate.csv': 'D',   
    'weather.csv': '10min',        
    'ETTh1.csv': 'H',          
    'ETTm1.csv': '15min'            
}

def process_dataset(file_name, horizons):
    data = pd.read_csv(file_name)
    if file_name == 'electricity.csv' or file_name == 'traffic.csv':
      data = data.iloc[-5000:, :]
    data['ds'] = pd.to_datetime(data['date'])
    data.drop(columns=['date'], inplace=True)
    
    numeric_columns = data.columns.difference(['ds'])
    data[numeric_columns] = MinMaxScaler().fit_transform(data[numeric_columns])
    
    df_melted = data.melt(id_vars=['ds'], var_name='unique_id', value_name='y')
    
    freq = frequency_mapping.get(file_name, 'D')
    
    results = []
    
    for horizon in horizons:
        models = [Naive(),]
        
        sf = StatsForecast(
            models=models,
            freq=freq,
            df = df_melted,
        )
        
        crossvalidation_df = sf.cross_validation(
            df=df_melted,
            h=horizon,
            step_size = horizon,
            n_windows= 10,
        )
        
        crossvalidation_df['id_cutoff'] = crossvalidation_df.index + '_' + crossvalidation_df['cutoff'].astype(str)
        
        cv_rmse = rmse(crossvalidation_df, models=['Naive'], id_col='id_cutoff')['Naive'].mean()
        cv_mae = mae(crossvalidation_df, models=['Naive'], id_col='id_cutoff')['Naive'].mean()
        
        result = {
            'dataset': file_name,
            'horizon': horizon,
            'rmse': cv_rmse,
            'mae': cv_mae,
            'std_dev_rmse': crossvalidation_df.groupby('id_cutoff').apply(lambda x: np.std(x['Naive'])).mean(),
            'std_dev_mae': crossvalidation_df.groupby('id_cutoff').apply(lambda x: np.std(x['Naive'])).mean(),
        }
        results.append(result)
    
    return results

# Process all datasets and store results
all_results = []

for dataset in datasets:
    results = process_dataset(dataset, horizons)
    all_results.extend(results)

# Convert results to DataFrame
results_df = pd.DataFrame(all_results)
print(results_df)

# Save the results to a CSV file
results_df.to_csv('results_summary.csv', index=False)

# Prophet


In [None]:
import logging

# Suppress debug and info messages from Prophet
logging.getLogger('prophet').setLevel(logging.WARNING)
logging.getLogger('cmdstanpy').setLevel(logging.WARNING)

# datasets = [('electricity','15T'), ('traffic','H'), ('exchange_rate','D'),('weather','10T'), ('ETTh1','H'), ('ETTm1','15T')]
datasets = [('ETTm1','15T')]

for dataset,fr in datasets:
    df = pd.read_csv(f'/content/drive/MyDrive/sl project/dataset/{dataset}.csv', index_col='date', parse_dates=True)

    # Min-max normalizer
    scaler = MinMaxScaler()
    df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)

    evaluation_results = pd.DataFrame(index=pred_length, columns=['mae', 'rmse'])

    for horizon in pred_length:
        tot_mae = 0
        tot_rmse = 0

        for col in df.columns:
            ts = df[col].reset_index()
            ts.columns = ['ds', 'y']

            train_size = len(ts) - horizon
            train_ts = ts.iloc[:train_size]
            test_ts = ts.iloc[train_size:train_size + horizon]

            # Fit Prophet model
            model = Prophet()
            model.fit(train_ts)

            # Make future dataframe
            future = model.make_future_dataframe(periods=horizon, freq=fr)
            forecast = model.predict(future)

            # Extract the predictions
            y_pred = forecast['yhat'].iloc[-horizon:].values
            y_test = test_ts['y'].values

            # Compute MAE and RMSE
            mae = mean_absolute_error(y_test, y_pred)
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))

            tot_mae += mae
            tot_rmse += rmse

        evaluation_results.loc[horizon, 'mae'] = tot_mae / len(df.columns)
        evaluation_results.loc[horizon, 'rmse'] = tot_rmse / len(df.columns)

    print(dataset)
    print(evaluation_results)

# LTSF Linear 

Implementation of the Linear Long Time Series Forcasting

## Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import torch
import torch.nn as nn
import torch.optim as optim

# Settings seed for reproducibility 
np.random.seed(42)

dataset_path = '/kaggle/input/sl-project/dataset/'

In [None]:
# Parameters of the model
lookback_window = 96
prediction_lengths = [96, 192, 336, 720]

# All the datasets
datasets = ['weather', 'exchange_rate', 'traffic', 'electricity', 'ETTh1', 'ETTm1']

## The LTSF-Linear model

In [None]:
# Defining the model as a class that inherits from nn.Module
# Taken from https://github.com/cure-lab/LTSF-Linear/

class LTSFLinear(nn.Module):

    def __init__(self, loopback_window, prediction_length):
        super(LTSFLinear, self).__init__()
        self.loopback_window = loopback_window
        self.prediction_length = prediction_length

        # The core of the model, a simple linear layer
        self.Linear = nn.Linear(self.loopback_window, self.prediction_length)

    def forward(self, x):
        x = self.Linear(x.permute(0,2,1)).permute(0,2,1)
        return x

In [None]:
# Function to convert the original dataframe to numpy arrays for pythorch model training

# Basically each element of X contains the sequence of data points of length lookback_window
# and the corresponding elements of y contains the sequence of data points of length prediction_length, 
# i.e. the data points to be predicted


def build_designMatrixAndPrediction(data, lookback_window, prediction_length):

    # Each row of X contains the sequence of data points of length lookback_window
    # Each row of y contains the sequence of data points of length prediction_length
    X = []
    y = []

    for i in range(len(data)-lookback_window-prediction_length+1):
        X.append(data[i : i+lookback_window])
        y.append(data[i+lookback_window : i+lookback_window+prediction_length])

    return np.array(X), np.array(y)

In [None]:
# Function to train the model with MSE Loss and Adam optimizer
# This is a modified version of the function provided in the github repository

def train_model(model, X_train, y_train, epochs, batch_size):

    # MSE Loss
    mse = nn.MSELoss()

    # Optimizer
    optimizer = optim.Adam(model.parameters())
    
    for epoch in range(epochs):
        for i in range(0, len(X_train), batch_size):
            batch_X = X_train[i:i+batch_size]
            batch_y = y_train[i:i+batch_size]
            
            outputs = model(batch_X)

            # Evaluate MSE loss and backpropagate
            loss = mse(outputs, batch_y)            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        # Log the loss every 10 epochs
        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}] - Loss: {loss.item():.4f}')

In [None]:
# Funciton to test the model
#  It outputts the MAE and RMSE of the model on the test data given as input

def test_model(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test)

    #  Reshaping the data to the original shape
    # before evaluating the metrics
    y_test_inv = y_test.reshape(-1, y_test.shape[-1])
    y_pred_inv = y_pred.reshape(-1, y_pred.shape[-1])
    
    mae = mean_absolute_error(y_test_inv, y_pred_inv)
    rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred_inv))
    
    return mae, rmse, y_test_inv, y_pred_inv

## Univariate TSs

We first train the model only on the dataset that have univariate Time Series.

In [None]:
# Univariate TS datasets

datasets_uni = ['exchange_rate', 'traffic', 'electricity']

In [None]:
# For each dataset, train the model for each prediction length
# and store the results in a dictionary that later will be converted into a datatframe 

results = {}

for dataset in datasets_uni:

    print("\nDataset: ", dataset)

    df = pd.read_csv(dataset_path + dataset + '.csv', index_col='date', parse_dates=True)

    # Scaling all the columns in the range[0, 1]
    scaler = MinMaxScaler()
    df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)
    
    results[dataset] = {}

    for pred_len in prediction_lengths:

        print(70 * "-")
        print(f"\nTraining LTSF-Linear for prediction length: {pred_len}")
        
        # Converting the dataframe to numpy arrays
        # such that the moel can be trained on it
        X, y = build_designMatrixAndPrediction(df, lookback_window, pred_len)
        
        # Splitting the data in 80% train and 20% test
        train_size = int(len(X) * 0.8)
        X_train, X_test = torch.FloatTensor(X[:train_size]), torch.FloatTensor(X[train_size:])
        y_train, y_test = torch.FloatTensor(y[:train_size]), torch.FloatTensor(y[train_size:])
        
        # Initializing and training the model
        modelLSTF = LTSFLinear(lookback_window, prediction_length = pred_len)
        
        train_model(modelLSTF, X_train, y_train, epochs = 100, batch_size = 32)
        
        # Testing the model and saving its metrics
        mae, rmse, y_test_inv, y_pred_inv = test_model(modelLSTF, X_test, y_test, scaler)
        
        results[dataset][pred_len] = {
            'MAE': mae,
            'RMSE': rmse
        }

In [None]:
# Creating a DataFrame from the results dictionary
multi_index = pd.MultiIndex.from_tuples(
    [(dataset, pred_len) for dataset in results for pred_len in results[dataset]],
    names=['Dataset', 'Prediction Length']
)

df_results = pd.DataFrame(
    [(results[dataset][pred_len]['MAE'], results[dataset][pred_len]['RMSE'])
     for dataset in results for pred_len in results[dataset]],
    index=multi_index,
    columns=['MAE', 'RMSE']
)

df_results

Unnamed: 0_level_0,Unnamed: 1_level_0,MAE,RMSE
Dataset,Prediction Length,Unnamed: 2_level_1,Unnamed: 3_level_1
exchange_rate,96,0.044368,0.059844
exchange_rate,192,0.054688,0.073013
exchange_rate,336,0.079542,0.105306
exchange_rate,720,0.185081,0.224984
traffic,96,0.040704,0.07862
traffic,192,0.037695,0.075344
traffic,336,0.039054,0.075822
electricity,96,0.05444,0.080014
electricity,192,0.058545,0.082661
electricity,336,0.056093,0.08194


In [None]:
# Writing df_results to a csv file
df_results.to_csv('results_uni.csv')

## Multivariate TSs

We first train the model only on the dataset that have multivariate Time Series.

Each file is a single Time Serie that have multiple feature to  be predicted.

In the same way that they do in the article, we will predict each feature separately using the LTSF-Linear model.

Since the datasets are too long, in order to not let the computation time exploding for high prediction windows we take only the last 10000 datapoints.

In [None]:
# Multivariate TS datasets

datasets_multi = ['weather', 'ETTh1', 'ETTm1']

In [None]:
# For each dataset, train the model for each prediction length
# and store the results in a dictionary that later will be converted into a datatframe 

results_multi = {}

for dataset in datasets_multi:

    print("\nDataset: ", dataset)

    df = pd.read_csv(dataset_path + dataset + '.csv', index_col='date', parse_dates=True)
    
    # Taking the last 10000 datapoints of the Time Serie
    df = df.iloc[-10000:,:]

    # Scaling all the columns in the range[0, 1]
    scaler = MinMaxScaler()
    df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)
    
    results_multi[dataset] = {}

    for pred_len in prediction_lengths:

        print(70 * "-")
        print(f"\nTraining LTSF-Linear for prediction length: {pred_len}")


        # For each prediction length, we train the model for each column of the dataset
        # And then we take the average of the MAE and RMSE of all the columns
        maes = []
        rmses = []

        for col in df.columns:
            print(70 * "-")
            print(f"\nTraining for column: {col}")            
            
            # Converting the dataframe to numpy arrays
            # such that the moel can be trained on it
            X, y = build_designMatrixAndPrediction(pd.DataFrame(df[col]), lookback_window, pred_len)
            
            # Split the data in 80% train and 20% test
            train_size = int(len(X) * 0.8)
            X_train, X_test = torch.FloatTensor(X[:train_size]), torch.FloatTensor(X[train_size:])
            y_train, y_test = torch.FloatTensor(y[:train_size]), torch.FloatTensor(y[train_size:])
            
            # Initializing and training the model
            modelLSTF = LTSFLinear(lookback_window, prediction_length = pred_len)
            
            train_model(modelLSTF, X_train, y_train, epochs = 50, batch_size = 32)
            
            # Testing the model and saving its metrics
            mae, rmse, y_test_inv, y_pred_inv = test_model(modelLSTF, X_test, y_test, scaler)
            maes.append(mae)
            rmses.append(rmse)
        
        # Saving the average of the MAE and RMSE of all the columns
        # In the dictionary
        results_multi[dataset][pred_len] = {
            'MAE': np.mean(maes),
            'RMSE': np.mean(rmses)
        }


In [None]:
# Creating a DataFrame from the results_multi dictionary
multi_index = pd.MultiIndex.from_tuples(
    [(dataset, pred_len) for dataset in results_multi for pred_len in results_multi[dataset]],
    names=['Dataset', 'Prediction Length']
)

df_results_multi = pd.DataFrame(
    [(results_multi[dataset][pred_len]['MAE'], results_multi[dataset][pred_len]['RMSE'])
     for dataset in results_multi for pred_len in results_multi[dataset]],
    index=multi_index,
    columns=['MAE', 'RMSE']
)

df_results_multi

Unnamed: 0_level_0,Unnamed: 1_level_0,MAE,RMSE
Dataset,Prediction Length,Unnamed: 2_level_1,Unnamed: 3_level_1
weather,96,0.089227,0.120001
weather,192,0.108004,0.144456
weather,336,0.143208,0.183607
weather,720,0.200576,0.245031
ETTh1,96,0.076808,0.104071
ETTh1,192,0.096826,0.125046
ETTh1,336,0.109616,0.139671
ETTh1,720,0.12048,0.15515
ETTm1,96,0.073092,0.096906
ETTm1,192,0.083562,0.107376


In [None]:
# Writing df_results_multi to a csv file
df_results_multi.to_csv('results_multi.csv')

## Finally merge the two dataframes with the metrics

In [None]:
df_results_final = pd.concat([df_results, df_results_multi])
df_results_final

Unnamed: 0_level_0,Unnamed: 1_level_0,MAE,RMSE
Dataset,Prediction Length,Unnamed: 2_level_1,Unnamed: 3_level_1
electricity,96,0.05444,0.080014
electricity,192,0.058545,0.082661
electricity,336,0.056093,0.08194
electricity,720,0.05882,0.084733
exchange_rate,96,0.044368,0.059844
exchange_rate,192,0.054688,0.073013
exchange_rate,336,0.079542,0.105306
exchange_rate,720,0.185081,0.224984
traffic,96,0.040704,0.07862
traffic,192,0.037695,0.075344


In [None]:
# Writing the results to file
df_results_final.to_csv('results_LTSF_Linear.csv')

# FreTS

## Data Loader

In [None]:
import os
import numpy as np
import pandas as pd
import random
import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from models import FreTS
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')


class Dataset_ETT_hour(Dataset):
    def __init__(self, root_path, flag='train', size=None,
                 features='S', data_path='ETTh1.csv',
                 target='OT', scale=True, timeenc=0, freq='h', train_only=False):
        if size == None:
            self.seq_len = 24 * 4 * 4
            self.label_len = 24 * 4
            self.pred_len = 24 * 4
        else:
            self.seq_len = size[0]
            self.label_len = size[1]
            self.pred_len = size[2]
        # init
        assert flag in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[flag]

        self.features = features
        self.target = target
        self.scale = scale
        self.timeenc = timeenc
        self.freq = freq

        self.root_path = root_path
        self.data_path = data_path
        self.__read_data__()

    def __read_data__(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(os.path.join(self.root_path,
                                          self.data_path))

        border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
        border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]

        if self.features == 'M' or self.features == 'MS':
            cols_data = df_raw.columns[1:]
            df_data = df_raw[cols_data]
        elif self.features == 'S':
            df_data = df_raw[[self.target]]

        mms = MinMaxScaler(feature_range=(0, 1))
        if self.scale:
            train_data = df_data[border1s[0]:border2s[0]]
            mms.fit(train_data.values)
            data = mms.transform(df_data.values)
        else:
            data = df_data.values

        df_stamp = df_raw[['date']][border1:border2]
        df_stamp['date'] = pd.to_datetime(df_stamp.date)
        if self.timeenc == 0:
            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
            data_stamp = df_stamp.drop(['date'], 1).values
        elif self.timeenc == 1:
            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
            data_stamp = data_stamp.transpose(1, 0)

        self.data_x = data[border1:border2]
        self.data_y = data[border1:border2]
        self.data_stamp = data_stamp

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len

        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]
        seq_x_mark = self.data_stamp[s_begin:s_end]
        seq_y_mark = self.data_stamp[r_begin:r_end]

        return seq_x, seq_y, seq_x_mark, seq_y_mark

    def __len__(self):
        return len(self.data_x) - self.seq_len - self.pred_len + 1

    def inverse_transform(self, data):
        mms = MinMaxScaler(feature_range=(0, 1))
        return mms.fit_transform(data.cpu())


class Dataset_ETT_minute(Dataset):
    def __init__(self, root_path, flag='train', size=None,
                 features='S', data_path='ETTm1.csv',
                 target='OT', scale=True, timeenc=0, freq='t'):
        # size [seq_len, label_len, pred_len]
        # info
        if size == None:
            self.seq_len = 24 * 4 * 4
            self.label_len = 24 * 4
            self.pred_len = 24 * 4
        else:
            self.seq_len = size[0]
            self.label_len = size[1]
            self.pred_len = size[2]
        # init
        assert flag in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[flag]

        self.features = features
        self.target = target
        self.scale = scale
        self.timeenc = timeenc
        self.freq = freq

        self.root_path = root_path
        self.data_path = data_path
        self.__read_data__()

    def __read_data__(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(os.path.join(self.root_path,
                                          self.data_path))

        border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
        border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]

        if self.features == 'M' or self.features == 'MS':
            cols_data = df_raw.columns[1:]
            df_data = df_raw[cols_data]
        elif self.features == 'S':
            df_data = df_raw[[self.target]]

        if self.scale:
            train_data = df_data[border1s[0]:border2s[0]]
            self.scaler.fit(train_data.values)
            data = self.scaler.transform(df_data.values)
        else:
            data = df_data.values

        df_stamp = df_raw[['date']][border1:border2]
        df_stamp['date'] = pd.to_datetime(df_stamp.date)
        if self.timeenc == 0:
            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
            df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
            df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
            data_stamp = df_stamp.drop(['date'], 1).values
        elif self.timeenc == 1:
            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
            data_stamp = data_stamp.transpose(1, 0)

        self.data_x = data[border1:border2]
        self.data_y = data[border1:border2]
        self.data_stamp = data_stamp

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len

        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]
        seq_x_mark = self.data_stamp[s_begin:s_end]
        seq_y_mark = self.data_stamp[r_begin:r_end]

        return seq_x, seq_y, seq_x_mark, seq_y_mark

    def __len__(self):
        return len(self.data_x) - self.seq_len - self.pred_len + 1

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)

class Dataset_Covid(Dataset):
    def __init__(self, root_path, flag='train', size=None,
                 features='S', data_path='ETTh1.csv',
                 target='OT', scale=True, timeenc=0, freq='h', train_only=False):
        # size [seq_len, label_len, pred_len]
        # info
        if size == None:
            self.seq_len = 24 * 4 * 4
            self.label_len = 24 * 4
            self.pred_len = 24 * 4
        else:
            self.seq_len = size[0]
            self.label_len = size[1]
            self.pred_len = size[2]
        # init
        assert flag in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[flag]

        self.features = features
        self.target = target
        self.scale = scale
        self.timeenc = timeenc
        self.freq = freq
        self.train_only = train_only

        self.root_path = root_path
        self.data_path = data_path
        self.__read_data__()

    def __read_data__(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(os.path.join(self.root_path,
                                          self.data_path))
        df_raw = df_raw.dropna()

        cols = list(df_raw.columns)
        if self.features == 'S':
            cols.remove(self.target)
        cols.remove('date')

        num_train = int(len(df_raw) * (0.6 if not self.train_only else 1))
        num_test = int(len(df_raw) * 0.2)
        num_vali = len(df_raw) - num_train - num_test
        border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
        border2s = [num_train, num_train + num_vali, len(df_raw)]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]

        if self.features == 'M' or self.features == 'MS':
            df_raw = df_raw[['date'] + cols]
            cols_data = df_raw.columns[1:]
            df_data = df_raw[cols_data]
        elif self.features == 'S':
            df_raw = df_raw[['date'] + cols + [self.target]]
            df_data = df_raw[[self.target]]

        ## min max scaler
        mms = MinMaxScaler(feature_range=(0, 1))
        if self.scale:
            train_data = df_data[border1s[0]:border2s[0]]
            mms.fit(train_data.values)
            data = mms.transform(df_data.values)
        else:
            data = df_data.values

        df_stamp = df_raw[['date']][border1:border2]
        df_stamp['date'] = pd.to_datetime(df_stamp.date)
        if self.timeenc == 0:
            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
            data_stamp = df_stamp.drop(['date'], 1).values
        elif self.timeenc == 1:
            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
            data_stamp = data_stamp.transpose(1, 0)

        self.data_x = data[border1:border2]
        self.data_y = data[border1:border2]
        self.data_stamp = data_stamp

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len

        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]
        seq_x_mark = self.data_stamp[s_begin:s_end]
        seq_y_mark = self.data_stamp[r_begin:r_end]

        return seq_x, seq_y, seq_x_mark, seq_y_mark

    def __len__(self):
        return len(self.data_x) - self.seq_len - self.pred_len + 1

    def inverse_transform(self, data):
        mms = MinMaxScaler(feature_range=(0, 1))
        return mms.fit_transform(data.cpu())

#min max scaler
class Dataset_Custom_(Dataset):
    def __init__(self, root_path, flag='train', size=None,
                 features='S', data_path='ETTh1.csv',
                 target='OT', scale=True, timeenc=0, freq='h', train_only=False):
        # size [seq_len, label_len, pred_len]
        # info
        if size == None:
            self.seq_len = 24 * 4 * 4
            self.label_len = 24 * 4
            self.pred_len = 24 * 4
        else:
            self.seq_len = size[0]
            self.label_len = size[1]
            self.pred_len = size[2]
        # init
        assert flag in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[flag]

        self.features = features
        self.target = target
        self.scale = scale
        self.timeenc = timeenc
        self.freq = freq
        self.train_only = train_only

        self.root_path = root_path
        self.data_path = data_path
        self.__read_data__()

    def __read_data__(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(os.path.join(self.root_path,
                                          self.data_path))
        df_raw = df_raw.dropna()

        cols = list(df_raw.columns)
        if self.features == 'S':
            cols.remove(self.target)
        cols.remove('date')

        num_train = int(len(df_raw) * (0.7 if not self.train_only else 1))
        num_test = int(len(df_raw) * 0.1)
        num_vali = len(df_raw) - num_train - num_test
        border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
        border2s = [num_train, num_train + num_vali, len(df_raw)]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]

        if self.features == 'M' or self.features == 'MS':
            df_raw = df_raw[['date'] + cols]
            cols_data = df_raw.columns[1:]
            df_data = df_raw[cols_data]
        elif self.features == 'S':
            df_raw = df_raw[['date'] + cols + [self.target]]
            df_data = df_raw[[self.target]]

        ## min max scaler
        mms = MinMaxScaler(feature_range=(0, 1))
        if self.scale:
            train_data = df_data[border1s[0]:border2s[0]]
            mms.fit(train_data.values)
            data = mms.transform(df_data.values)
        else:
            data = df_data.values

        df_stamp = df_raw[['date']][border1:border2]
        df_stamp['date'] = pd.to_datetime(df_stamp.date)
        if self.timeenc == 0:
            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
            data_stamp = df_stamp.drop(['date'], 1).values
        elif self.timeenc == 1:
            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
            data_stamp = data_stamp.transpose(1, 0)

        self.data_x = data[border1:border2]
        self.data_y = data[border1:border2]
        self.data_stamp = data_stamp

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len

        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]
        seq_x_mark = self.data_stamp[s_begin:s_end]
        seq_y_mark = self.data_stamp[r_begin:r_end]

        return seq_x, seq_y, seq_x_mark, seq_y_mark

    def __len__(self):
        return len(self.data_x) - self.seq_len - self.pred_len + 1

    def inverse_transform(self, data):
        mms = MinMaxScaler(feature_range=(0, 1))
        return mms.fit_transform(data.cpu())
        #return self.scaler.inverse_transform(data)

class Dataset_Custom(Dataset):
    def __init__(self, root_path, flag='train', size=None,
                 features='S', data_path='ETTh1.csv',
                 target='OT', scale=False, timeenc=0, freq='h', train_only=False):

        if size == None:
            self.seq_len = 24 * 4 * 4
            self.label_len = 24 * 4
            self.pred_len = 24 * 4
        else:
            self.seq_len = size[0]
            self.label_len = size[1]
            self.pred_len = size[2]
        # init
        assert flag in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[flag]

        self.features = features
        self.target = target
        self.scale = scale
        self.timeenc = timeenc
        self.freq = freq
        self.train_only = train_only

        self.root_path = root_path
        self.data_path = data_path
        self.__read_data__()

    def __read_data__(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(os.path.join(self.root_path,
                                          self.data_path))
        df_raw = df_raw.dropna()

        cols = list(df_raw.columns)
        if self.features == 'S':
            cols.remove(self.target)
        cols.remove('date')

        num_train = int(len(df_raw) * (0.7 if not self.train_only else 1))
        num_test = int(len(df_raw) * 0.1)
        num_vali = len(df_raw) - num_train - num_test
        border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
        border2s = [num_train, num_train + num_vali, len(df_raw)]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]

        if self.features == 'M' or self.features == 'MS':
            df_raw = df_raw[['date'] + cols]
            cols_data = df_raw.columns[1:]
            df_data = df_raw[cols_data]
        elif self.features == 'S':
            df_raw = df_raw[['date'] + cols + [self.target]]
            df_data = df_raw[[self.target]]

        if self.scale:
            train_data = df_data[border1s[0]:border2s[0]]
            self.scaler.fit(train_data.values)
            data = self.scaler.transform(df_data.values)
        else:
            data = df_data.values

        df_stamp = df_raw[['date']][border1:border2]
        df_stamp['date'] = pd.to_datetime(df_stamp.date)
        if self.timeenc == 0:
            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
            data_stamp = df_stamp.drop(['date'], 1).values
        elif self.timeenc == 1:
            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
            data_stamp = data_stamp.transpose(1, 0)

        self.data_x = data[border1:border2]
        self.data_y = data[border1:border2]
        self.data_stamp = data_stamp

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len

        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]
        seq_x_mark = self.data_stamp[s_begin:s_end]
        seq_y_mark = self.data_stamp[r_begin:r_end]

        return seq_x, seq_y, seq_x_mark, seq_y_mark

    def __len__(self):
        return len(self.data_x) - self.seq_len - self.pred_len + 1

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)

class Dataset_Pred(Dataset):
    def __init__(self, root_path, flag='pred', size=None,
                 features='S', data_path='ETTh1.csv',
                 target='OT', scale=True, inverse=False, timeenc=0, freq='15min', cols=None, train_only=False):

        if size == None:
            self.seq_len = 24 * 4 * 4
            self.label_len = 24 * 4
            self.pred_len = 24 * 4
        else:
            self.seq_len = size[0]
            self.label_len = size[1]
            self.pred_len = size[2]
        # init
        assert flag in ['pred']

        self.features = features
        self.target = target
        self.scale = scale
        self.inverse = inverse
        self.timeenc = timeenc
        self.freq = freq
        self.cols = cols
        self.root_path = root_path
        self.data_path = data_path
        self.__read_data__()

    def __read_data__(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(os.path.join(self.root_path,
                                          self.data_path))

        if self.cols:
            cols = self.cols.copy()
        else:
            cols = list(df_raw.columns)
            self.cols = cols.copy()
            cols.remove('date')
        if self.features == 'S':
            cols.remove(self.target)
        border1 = len(df_raw) - self.seq_len
        border2 = len(df_raw)

        if self.features == 'M' or self.features == 'MS':
            df_raw = df_raw[['date'] + cols]
            cols_data = df_raw.columns[1:]
            df_data = df_raw[cols_data]
        elif self.features == 'S':
            df_raw = df_raw[['date'] + cols + [self.target]]
            df_data = df_raw[[self.target]]

        if self.scale:
            self.scaler.fit(df_data.values)
            data = self.scaler.transform(df_data.values)
        else:
            data = df_data.values

        tmp_stamp = df_raw[['date']][border1:border2]
        tmp_stamp['date'] = pd.to_datetime(tmp_stamp.date)
        pred_dates = pd.date_range(tmp_stamp.date.values[-1], periods=self.pred_len + 1, freq=self.freq)

        df_stamp = pd.DataFrame(columns=['date'])
        df_stamp.date = list(tmp_stamp.date.values) + list(pred_dates[1:])
        self.future_dates = list(pred_dates[1:])
        if self.timeenc == 0:
            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
            df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
            df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
            data_stamp = df_stamp.drop(['date'], 1).values
        elif self.timeenc == 1:
            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
            data_stamp = data_stamp.transpose(1, 0)

        self.data_x = data[border1:border2]
        if self.inverse:
            self.data_y = df_data.values[border1:border2]
        else:
            self.data_y = data[border1:border2]
        self.data_stamp = data_stamp

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len

        seq_x = self.data_x[s_begin:s_end]
        if self.inverse:
            seq_y = self.data_x[r_begin:r_begin + self.label_len]
        else:
            seq_y = self.data_y[r_begin:r_begin + self.label_len]
        seq_x_mark = self.data_stamp[s_begin:s_end]
        seq_y_mark = self.data_stamp[r_begin:r_end]

        return seq_x, seq_y, seq_x_mark, seq_y_mark

    def __len__(self):
        return len(self.data_x) - self.seq_len + 1

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)


## Data Provider

In [None]:
from torch.utils.data import DataLoader

data_dict = {
    'ETTh1': Dataset_Custom_,#Dataset_ETT_hour,
    'ETTm1': Dataset_Custom_,
    'traffic': Dataset_Custom,
    'electricity': Dataset_Custom_,
    'exchange': Dataset_Custom_,
    'weather': Dataset_Custom_,
    'covid': Dataset_Covid,
    'ECG': Dataset_Custom_,
    'metr': Dataset_Custom_,
}


def data_provider(args, flag):
    Data = data_dict[args.data]
    timeenc = 0 if args.embed != 'timeF' else 1
    train_only = args.train_only

    if flag == 'test':
        shuffle_flag = False
        drop_last = True
        batch_size = args.batch_size
        freq = args.freq
    elif flag == 'pred':
        shuffle_flag = False
        drop_last = False
        batch_size = 1
        freq = args.freq
        Data = Dataset_Pred
    else:
        shuffle_flag = True
        drop_last = True
        batch_size = args.batch_size
        freq = args.freq

    data_set = Data(
        root_path=args.root_path,
        data_path=args.data_path,
        flag=flag,
        size=[args.seq_len, args.label_len, args.pred_len],
        features=args.features,
        target=args.target,
        timeenc=timeenc,
        freq=freq,
        train_only=train_only
    )
    print(flag, len(data_set))
    data_loader = DataLoader(
        data_set,
        batch_size=batch_size,
        shuffle=shuffle_flag,
        num_workers=args.num_workers,
        drop_last=drop_last)
    return data_set, data_loader

## Exp Basic

In [None]:
import os
import torch
import numpy as np


class Exp_Basic(object):
    def __init__(self, args):
        self.args = args
        self.device = self._acquire_device()
        self.model = self._build_model().to(self.device)

    def _build_model(self):
        raise NotImplementedError
        return None

    def _acquire_device(self):
        if self.args.use_gpu:
            os.environ["CUDA_VISIBLE_DEVICES"] = str(
                self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
            device = torch.device('cuda:{}'.format(self.args.gpu))
            print('Use GPU: cuda:{}'.format(self.args.gpu))
        else:
            device = torch.device('cpu')
            print('Use CPU')
        return device

    def _get_data(self):
        pass

    def vali(self):
        pass

    def train(self):
        pass

    def test(self):
        pass

## Utilities

In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import time

plt.switch_backend('agg')


def adjust_learning_rate(optimizer, epoch, args):
    # lr = args.learning_rate * (0.2 ** (epoch // 2))
    if args.lradj == 'type1':
        lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
    elif args.lradj == 'type2':
        lr_adjust = {
            2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
            10: 5e-7, 15: 1e-7, 20: 5e-8
        }
    elif args.lradj == '3':
        lr_adjust = {epoch: args.learning_rate if epoch < 10 else args.learning_rate*0.1}
    elif args.lradj == '4':
        lr_adjust = {epoch: args.learning_rate if epoch < 15 else args.learning_rate*0.1}
    elif args.lradj == '5':
        lr_adjust = {epoch: args.learning_rate if epoch < 25 else args.learning_rate*0.1}
    elif args.lradj == '6':
        lr_adjust = {epoch: args.learning_rate if epoch < 5 else args.learning_rate*0.1}
    if epoch in lr_adjust.keys():
        lr = lr_adjust[epoch]
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        print('Updating learning rate to {}'.format(lr))


class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model, path):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, path):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
        self.val_loss_min = val_loss


class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__


class StandardScaler():
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def transform(self, data):
        return (data - self.mean) / self.std

    def inverse_transform(self, data):
        return (data * self.std) + self.mean


def visual(true, preds=None, name='./pic/test.pdf'):
    """
    Results visualization
    """
    plt.figure()
    plt.plot(true, label='GroundTruth', linewidth=2)
    if preds is not None:
        plt.plot(preds, label='Prediction', linewidth=2)
    plt.legend()
    plt.savefig(name, bbox_inches='tight')

def test_params_flop(model,x_shape):
    """
    If you want to thest former's flop, you need to give default value to inputs in model.forward(), the following code can only pass one argument to forward()
    """
    model_params = 0
    for parameter in model.parameters():
        model_params += parameter.numel()
        print('INFO: Trainable parameter count: {:.2f}M'.format(model_params / 1000000.0))
    from ptflops import get_model_complexity_info
    with torch.cuda.device(0):
        macs, params = get_model_complexity_info(model.cuda(), x_shape, as_strings=True, print_per_layer_stat=True)
        # print('Flops:' + flops)
        # print('Params:' + params)
        print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
        print('{:<30}  {:<8}'.format('Number of parameters: ', params))



from typing import List

import numpy as np
import pandas as pd
from pandas.tseries import offsets
from pandas.tseries.frequencies import to_offset


class TimeFeature:
    def __init__(self):
        pass

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        pass

    def __repr__(self):
        return self.__class__.__name__ + "()"


class SecondOfMinute(TimeFeature):
    """Minute of hour encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.second / 59.0 - 0.5


class MinuteOfHour(TimeFeature):
    """Minute of hour encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.minute / 59.0 - 0.5


class HourOfDay(TimeFeature):
    """Hour of day encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.hour / 23.0 - 0.5


class DayOfWeek(TimeFeature):
    """Hour of day encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.dayofweek / 6.0 - 0.5


class DayOfMonth(TimeFeature):
    """Day of month encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.day - 1) / 30.0 - 0.5


class DayOfYear(TimeFeature):
    """Day of year encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.dayofyear - 1) / 365.0 - 0.5


class MonthOfYear(TimeFeature):
    """Month of year encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.month - 1) / 11.0 - 0.5


class WeekOfYear(TimeFeature):
    """Week of year encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.isocalendar().week - 1) / 52.0 - 0.5


def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
    """
    Returns a list of time features that will be appropriate for the given frequency string.
    Parameters
    ----------
    freq_str
        Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
    """

    features_by_offsets = {
        offsets.YearEnd: [],
        offsets.QuarterEnd: [MonthOfYear],
        offsets.MonthEnd: [MonthOfYear],
        offsets.Week: [DayOfMonth, WeekOfYear],
        offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
        offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
        offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
        offsets.Minute: [
            MinuteOfHour,
            HourOfDay,
            DayOfWeek,
            DayOfMonth,
            DayOfYear,
        ],
        offsets.Second: [
            SecondOfMinute,
            MinuteOfHour,
            HourOfDay,
            DayOfWeek,
            DayOfMonth,
            DayOfYear,
        ],
    }

    offset = to_offset(freq_str)

    for offset_type, feature_classes in features_by_offsets.items():
        if isinstance(offset, offset_type):
            return [cls() for cls in feature_classes]

    supported_freq_msg = f"""
    Unsupported frequency {freq_str}
    The following frequencies are supported:
        Y   - yearly
            alias: A
        M   - monthly
        W   - weekly
        D   - daily
        B   - business days
        H   - hourly
        T   - minutely
            alias: min
        S   - secondly
    """
    raise RuntimeError(supported_freq_msg)


def time_features(dates, freq='h'):
    return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])


## Metrics

In [None]:
import numpy as np


def RSE(pred, true):
    return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))


def CORR(pred, true):
    u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
    d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
    d += 1e-12
    return 0.01*(u / d).mean(-1)


def MAE(pred, true):
    return np.mean(np.abs(pred - true))


def MSE(pred, true):
    return np.mean((pred - true) ** 2)


def RMSE(pred, true):
    return np.sqrt(MSE(pred, true))


def MAPE(pred, true):
    return np.mean(np.abs((pred - true) / true))


def MSPE(pred, true):
    return np.mean(np.square((pred - true) / true))


def metric(pred, true):
    mae = MAE(pred, true)
    mse = MSE(pred, true)
    rmse = RMSE(pred, true)
    mape = MAPE(pred, true)
    mspe = MSPE(pred, true)
    rse = RSE(pred, true)
    corr = CORR(pred, true)

    return mae, mse, rmse, mape, mspe, rse, corr

## Exp Main

In [None]:
warnings.filterwarnings('ignore')

class Exp_Main(Exp_Basic):
    def __init__(self, args):
        super(Exp_Main, self).__init__(args)

    def _build_model(self):
        model_dict = {
            'FreLinear': FreTS
        }
        model = model_dict[self.args.model].Model(self.args).float()

        if self.args.use_multi_gpu and self.args.use_gpu:
            model = nn.DataParallel(model, device_ids=self.args.device_ids)
        return model

    def _get_data(self, flag):
        data_set, data_loader = data_provider(self.args, flag)
        return data_set, data_loader

    def _select_optimizer(self):
        model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
        return model_optim

    def _select_criterion(self):
        criterion = nn.MSELoss()
        return criterion

    def vali(self, vali_data, vali_loader, criterion):
        total_loss = []
        self.model.eval()
        with torch.no_grad():
            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
                batch_x = batch_x.float().to(self.device)
                batch_y = batch_y.float()

                batch_x_mark = batch_x_mark.float().to(self.device)
                batch_y_mark = batch_y_mark.float().to(self.device)

                # decoder input
                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
                # encoder - decoder
                if self.args.use_amp:
                    with torch.cuda.amp.autocast():
                        if 'Linear' in self.args.model:
                            outputs = self.model(batch_x)
                        else:
                            if self.args.output_attention:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                else:
                    if 'Linear' in self.args.model:
                        outputs = self.model(batch_x)
                    else:
                        if self.args.output_attention:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                        else:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                f_dim = -1 if self.args.features == 'MS' else 0
                outputs = outputs[:, -self.args.pred_len:, f_dim:]
                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)

                pred = outputs.detach().cpu()
                true = batch_y.detach().cpu()

                loss = criterion(pred, true)

                total_loss.append(loss)
        total_loss = np.average(total_loss)
        self.model.train()
        return total_loss

    def train(self, setting):
        train_data, train_loader = self._get_data(flag='train')
        if not self.args.train_only:
            vali_data, vali_loader = self._get_data(flag='val')
            test_data, test_loader = self._get_data(flag='test')

        path = os.path.join(self.args.checkpoints, setting)
        if not os.path.exists(path):
            os.makedirs(path)

        time_now = time.time()

        train_steps = len(train_loader)
        early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)

        model_optim = self._select_optimizer()
        criterion = self._select_criterion()

        total_params = 0
        for name, parameter in self.model.named_parameters():
            if not parameter.requires_grad: continue
            param = parameter.numel()
            total_params += param
        print(f"Total Trainable Params: {total_params}")

        if self.args.use_amp:
            scaler = torch.cuda.amp.GradScaler()

        for epoch in range(self.args.train_epochs):
            iter_count = 0
            train_loss = []

            self.model.train()
            epoch_time = time.time()
            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
                iter_count += 1
                model_optim.zero_grad()
                batch_x = batch_x.float().to(self.device)
                batch_y = batch_y.float().to(self.device)

                batch_x_mark = batch_x_mark.float().to(self.device)
                batch_y_mark = batch_y_mark.float().to(self.device)

                # decoder input
                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)

                # encoder - decoder
                if self.args.use_amp:
                    with torch.cuda.amp.autocast():
                        if 'Linear' in self.args.model:
                            outputs = self.model(batch_x)
                        else:
                            if self.args.output_attention:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

                        f_dim = -1 if self.args.features == 'MS' else 0
                        outputs = outputs[:, -self.args.pred_len:, f_dim:]
                        batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
                        loss = criterion(outputs, batch_y)
                        train_loss.append(loss.item())
                else:
                    if 'Linear' in self.args.model:
                            outputs = self.model(batch_x)
                    else:
                        if self.args.output_attention:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                        else:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark, batch_y)
                    # print(outputs.shape,batch_y.shape)
                    f_dim = -1 if self.args.features == 'MS' else 0
                    outputs = outputs[:, -self.args.pred_len:, f_dim:]
                    batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
                    loss = criterion(outputs, batch_y)
                    train_loss.append(loss.item())

                if (i + 1) % 100 == 0:
                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
                    speed = (time.time() - time_now) / iter_count
                    left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
                    iter_count = 0
                    time_now = time.time()

                if self.args.use_amp:
                    scaler.scale(loss).backward()
                    scaler.step(model_optim)
                    scaler.update()
                else:
                    loss.backward()
                    model_optim.step()

            print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
            train_loss = np.average(train_loss)
            if not self.args.train_only:
                vali_loss = self.vali(vali_data, vali_loader, criterion)
                test_loss = self.vali(test_data, test_loader, criterion)

                print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
                    epoch + 1, train_steps, train_loss, vali_loss, test_loss))
                early_stopping(vali_loss, self.model, path)
            else:
                print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f}".format(
                    epoch + 1, train_steps, train_loss))
                early_stopping(train_loss, self.model, path)

            if early_stopping.early_stop:
                print("Early stopping")
                break

            adjust_learning_rate(model_optim, epoch + 1, self.args)

        best_model_path = path + '/' + 'checkpoint.pth'
        self.model.load_state_dict(torch.load(best_model_path))

        return self.model

    def test(self, setting, test=0):
        test_data, test_loader = self._get_data(flag='test')

        if test:
            print('loading model')
            self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))

        preds = []
        trues = []
        inputx = []
        folder_path = './test_results/' + setting + '/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        self.model.eval()
        with torch.no_grad():
            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
                batch_x = batch_x.float().to(self.device)
                batch_y = batch_y.float().to(self.device)

                batch_x_mark = batch_x_mark.float().to(self.device)
                batch_y_mark = batch_y_mark.float().to(self.device)

                # decoder input
                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
                # encoder - decoder
                if self.args.use_amp:
                    with torch.cuda.amp.autocast():
                        if 'Linear' in self.args.model:
                            outputs = self.model(batch_x)
                        else:
                            if self.args.output_attention:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                else:
                    if 'Linear' in self.args.model:
                            outputs = self.model(batch_x)
                    else:
                        if self.args.output_attention:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]

                        else:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

                f_dim = -1 if self.args.features == 'MS' else 0
                # print(outputs.shape,batch_y.shape)
                outputs = outputs[:, -self.args.pred_len:, f_dim:]
                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
                outputs = outputs.detach().cpu().numpy()
                batch_y = batch_y.detach().cpu().numpy()

                pred = outputs  # outputs.detach().cpu().numpy()  # .squeeze()
                true = batch_y  # batch_y.detach().cpu().numpy()  # .squeeze()

                preds.append(pred)
                trues.append(true)
                inputx.append(batch_x.detach().cpu().numpy())
                if i % 20 == 0:
                    input = batch_x.detach().cpu().numpy()
                    gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
                    pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
                    visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))

        if self.args.test_flop:
            test_params_flop((batch_x.shape[1],batch_x.shape[2]))
            exit()
        preds = np.array(preds)
        trues = np.array(trues)
        inputx = np.array(inputx)

        preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
        trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
        inputx = inputx.reshape(-1, inputx.shape[-2], inputx.shape[-1])

        # result save
        folder_path = './results/' + setting + '/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        mae, mse, rmse, mape, mspe, rse, corr = metric(preds, trues)
        print('mse:{}, mae:{}, rmse:{}'.format(mse, mae, rmse))
        f = open("result.txt", 'a')
        f.write(setting + "  \n")
        f.write('mse:{}, mae:{}, rse:{}, rmse:{}'.format(mse, mae, rse, rmse))
        f.write('\n')
        f.write('\n')
        f.close()

        # np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe,rse, corr]))
        np.save(folder_path + 'pred.npy', preds)
        # np.save(folder_path + 'true.npy', trues)
        # np.save(folder_path + 'x.npy', inputx)
        return

    def predict(self, setting, load=False):
        pred_data, pred_loader = self._get_data(flag='pred')

        if load:
            path = os.path.join(self.args.checkpoints, setting)
            best_model_path = path + '/' + 'checkpoint.pth'
            self.model.load_state_dict(torch.load(best_model_path))

        preds = []

        self.model.eval()
        with torch.no_grad():
            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader):
                batch_x = batch_x.float().to(self.device)
                batch_y = batch_y.float()
                batch_x_mark = batch_x_mark.float().to(self.device)
                batch_y_mark = batch_y_mark.float().to(self.device)

                # decoder input
                dec_inp = torch.zeros([batch_y.shape[0], self.args.pred_len, batch_y.shape[2]]).float().to(batch_y.device)
                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
                # encoder - decoder
                if self.args.use_amp:
                    with torch.cuda.amp.autocast():
                        if 'Linear' in self.args.model:
                            outputs = self.model(batch_x)
                        else:
                            if self.args.output_attention:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                            else:
                                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                else:
                    if 'Linear' in self.args.model:
                        outputs = self.model(batch_x)
                    else:
                        if self.args.output_attention:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                        else:
                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
                pred = outputs.detach().cpu().numpy()  # .squeeze()
                preds.append(pred)

        preds = np.array(preds)
        preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
        if (pred_data.scale):
            preds = pred_data.inverse_transform(preds)

        # result save
        folder_path = './results/' + setting + '/'
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        np.save(folder_path + 'real_prediction.npy', preds)
        pd.DataFrame(np.append(np.transpose([pred_data.future_dates]), preds[0], axis=1), columns=pred_data.cols).to_csv(folder_path + 'real_prediction.csv', index=False)

        return

## Run Experiment

In [None]:
fix_seed = 2021
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)

In [None]:
class args:
    is_training=1
    train_only=False
    model_id='ExchangeTrial96'
    model='FreLinear'
    data='exchange'
    root_path='./dataset/'
    data_path='exchange_rate.csv'
    channel_independence=0
    features='M'
    target='OT'
    freq='h'
    checkpoints='./checkpoints/'
    seq_len=96
    label_len=48
    pred_len=96
    individual=False
    embed_type=0
    enc_in=7
    dec_in=7
    c_out=7
    d_model=512
    n_heads=8
    e_layers=2
    d_layers=1
    d_ff=2048
    moving_avg=25
    factor=1
    distil=True
    dropout=0.05
    embed='timeF'
    activation='gelu'
    output_attention=False
    do_predict=False
    num_workers=0
    itr=1
    train_epochs=500
    batch_size=50
    patience=3
    learning_rate=0.0001
    des='Exp'
    loss='mse'
    lradj='type1'
    use_amp=False
    use_gpu=True
    gpu=0
    use_multi_gpu=False
    devices='0,1,2'
    test_flop=False

In [None]:
args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False

if args.use_gpu and args.use_multi_gpu:
    args.dvices = args.devices.replace(' ', '')
    device_ids = args.devices.split(',')
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]

print('Args in experiment:')
print(args)

Args in experiment:
<class '__main__.args'>


In [None]:
Exp = Exp_Main

if args.is_training:
    for ii in range(args.itr):
        # setting record of experiments
        setting = '{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
            args.model_id,
            args.model,
            args.data,
            args.features,
            args.seq_len,
            args.label_len,
            args.pred_len,
            args.d_model,
            args.n_heads,
            args.e_layers,
            args.d_layers,
            args.d_ff,
            args.factor,
            args.embed,
            args.distil,
            args.des, ii)

        exp = Exp(args)  # set experiments
        print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
        exp.train(setting)

        if not args.train_only:
            print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
            exp.test(setting)

        if args.do_predict:
            print('>>>>>>>predicting : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
            exp.predict(setting, True)

        torch.cuda.empty_cache()
else:
    ii = 0
    setting = '{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(args.model_id,
                                                                                                  args.model,
                                                                                                  args.data,
                                                                                                  args.features,
                                                                                                  args.seq_len,
                                                                                                  args.label_len,
                                                                                                  args.pred_len,
                                                                                                  args.d_model,
                                                                                                  args.n_heads,
                                                                                                  args.e_layers,
                                                                                                  args.d_layers,
                                                                                                  args.d_ff,
                                                                                                  args.factor,
                                                                                                  args.embed,
                                                                                                  args.distil,
                                                                                                  args.des, ii)

    exp = Exp(args)  # set experiments

    if args.do_predict:
        print('>>>>>>>predicting : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
        exp.predict(setting, True)
    else:
        print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
        exp.test(setting, test=1)
    torch.cuda.empty_cache()

Use GPU: cuda:0
>>>>>>>start training : ExchangeTrial96_FreLinear_exchange_ftM_sl96_ll48_pl96_dm512_nh8_el2_dl1_df2048_fc1_ebtimeF_dtTrue_Exp_0>>>>>>>>>>>>>>>>>>>>>>>>>>
train 5120
val 1424
test 663
Total Trainable Params: 3236832
	iters: 100, epoch: 1 | loss: 0.0065862
	speed: 0.0151s/iter; left time: 768.9238s
Epoch: 1 cost time: 1.5426125526428223
Epoch: 1, Steps: 102 | Train Loss: 0.0157684 Vali Loss: 0.0069405 Test Loss: 0.0050341
Validation loss decreased (inf --> 0.006940).  Saving model ...
Updating learning rate to 0.0001
	iters: 100, epoch: 2 | loss: 0.0055324
	speed: 0.0187s/iter; left time: 948.7056s
Epoch: 2 cost time: 1.5090439319610596
Epoch: 2, Steps: 102 | Train Loss: 0.0073909 Vali Loss: 0.0070563 Test Loss: 0.0048450
EarlyStopping counter: 1 out of 3
Updating learning rate to 5e-05
	iters: 100, epoch: 3 | loss: 0.0070791
	speed: 0.0181s/iter; left time: 916.8483s
Epoch: 3 cost time: 1.5088677406311035
Epoch: 3, Steps: 102 | Train Loss: 0.0067811 Vali Loss: 0.0059217 